Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
    1// SPDX-License-Identifier: GPL-2.0-only
    2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
    3 * Copyright (c) 2016 Facebook
    4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
    5 */
    6#include <uapi/linux/btf.h>
    7#include <linux/bpf-cgroup.h>
    8#include <linux/kernel.h>
    9#include <linux/types.h>
   10#include <linux/slab.h>
   11#include <linux/bpf.h>
   12#include <linux/btf.h>
   13#include <linux/bpf_verifier.h>
   14#include <linux/filter.h>
   15#include <net/netlink.h>
   16#include <linux/file.h>
   17#include <linux/vmalloc.h>
   18#include <linux/stringify.h>
   19#include <linux/bsearch.h>
   20#include <linux/sort.h>
   21#include <linux/perf_event.h>
   22#include <linux/ctype.h>
   23#include <linux/error-injection.h>
   24#include <linux/bpf_lsm.h>
   25#include <linux/btf_ids.h>
   26#include <linux/poison.h>
   27
   28#include "disasm.h"
   29
   30static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
   31#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
   32	[_id] = & _name ## _verifier_ops,
   33#define BPF_MAP_TYPE(_id, _ops)
   34#define BPF_LINK_TYPE(_id, _name)
   35#include <linux/bpf_types.h>
   36#undef BPF_PROG_TYPE
   37#undef BPF_MAP_TYPE
   38#undef BPF_LINK_TYPE
   39};
   40
   41/* bpf_check() is a static code analyzer that walks eBPF program
   42 * instruction by instruction and updates register/stack state.
   43 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
   44 *
   45 * The first pass is depth-first-search to check that the program is a DAG.
   46 * It rejects the following programs:
   47 * - larger than BPF_MAXINSNS insns
   48 * - if loop is present (detected via back-edge)
   49 * - unreachable insns exist (shouldn't be a forest. program = one function)
   50 * - out of bounds or malformed jumps
   51 * The second pass is all possible path descent from the 1st insn.
   52 * Since it's analyzing all paths through the program, the length of the
   53 * analysis is limited to 64k insn, which may be hit even if total number of
   54 * insn is less then 4K, but there are too many branches that change stack/regs.
   55 * Number of 'branches to be analyzed' is limited to 1k
   56 *
   57 * On entry to each instruction, each register has a type, and the instruction
   58 * changes the types of the registers depending on instruction semantics.
   59 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
   60 * copied to R1.
   61 *
   62 * All registers are 64-bit.
   63 * R0 - return register
   64 * R1-R5 argument passing registers
   65 * R6-R9 callee saved registers
   66 * R10 - frame pointer read-only
   67 *
   68 * At the start of BPF program the register R1 contains a pointer to bpf_context
   69 * and has type PTR_TO_CTX.
   70 *
   71 * Verifier tracks arithmetic operations on pointers in case:
   72 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
   73 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
   74 * 1st insn copies R10 (which has FRAME_PTR) type into R1
   75 * and 2nd arithmetic instruction is pattern matched to recognize
   76 * that it wants to construct a pointer to some element within stack.
   77 * So after 2nd insn, the register R1 has type PTR_TO_STACK
   78 * (and -20 constant is saved for further stack bounds checking).
   79 * Meaning that this reg is a pointer to stack plus known immediate constant.
   80 *
   81 * Most of the time the registers have SCALAR_VALUE type, which
   82 * means the register has some value, but it's not a valid pointer.
   83 * (like pointer plus pointer becomes SCALAR_VALUE type)
   84 *
   85 * When verifier sees load or store instructions the type of base register
   86 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
   87 * four pointer types recognized by check_mem_access() function.
   88 *
   89 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
   90 * and the range of [ptr, ptr + map's value_size) is accessible.
   91 *
   92 * registers used to pass values to function calls are checked against
   93 * function argument constraints.
   94 *
   95 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
   96 * It means that the register type passed to this function must be
   97 * PTR_TO_STACK and it will be used inside the function as
   98 * 'pointer to map element key'
   99 *
  100 * For example the argument constraints for bpf_map_lookup_elem():
  101 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
  102 *   .arg1_type = ARG_CONST_MAP_PTR,
  103 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
  104 *
  105 * ret_type says that this function returns 'pointer to map elem value or null'
  106 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
  107 * 2nd argument should be a pointer to stack, which will be used inside
  108 * the helper function as a pointer to map element key.
  109 *
  110 * On the kernel side the helper function looks like:
  111 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  112 * {
  113 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
  114 *    void *key = (void *) (unsigned long) r2;
  115 *    void *value;
  116 *
  117 *    here kernel can access 'key' and 'map' pointers safely, knowing that
  118 *    [key, key + map->key_size) bytes are valid and were initialized on
  119 *    the stack of eBPF program.
  120 * }
  121 *
  122 * Corresponding eBPF program may look like:
  123 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
  124 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
  125 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
  126 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
  127 * here verifier looks at prototype of map_lookup_elem() and sees:
  128 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
  129 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
  130 *
  131 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
  132 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
  133 * and were initialized prior to this call.
  134 * If it's ok, then verifier allows this BPF_CALL insn and looks at
  135 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
  136 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
  137 * returns either pointer to map value or NULL.
  138 *
  139 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
  140 * insn, the register holding that pointer in the true branch changes state to
  141 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
  142 * branch. See check_cond_jmp_op().
  143 *
  144 * After the call R0 is set to return type of the function and registers R1-R5
  145 * are set to NOT_INIT to indicate that they are no longer readable.
  146 *
  147 * The following reference types represent a potential reference to a kernel
  148 * resource which, after first being allocated, must be checked and freed by
  149 * the BPF program:
  150 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
  151 *
  152 * When the verifier sees a helper call return a reference type, it allocates a
  153 * pointer id for the reference and stores it in the current function state.
  154 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
  155 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
  156 * passes through a NULL-check conditional. For the branch wherein the state is
  157 * changed to CONST_IMM, the verifier releases the reference.
  158 *
  159 * For each helper function that allocates a reference, such as
  160 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
  161 * bpf_sk_release(). When a reference type passes into the release function,
  162 * the verifier also releases the reference. If any unchecked or unreleased
  163 * reference remains at the end of the program, the verifier rejects it.
  164 */
  165
  166/* verifier_state + insn_idx are pushed to stack when branch is encountered */
  167struct bpf_verifier_stack_elem {
  168	/* verifer state is 'st'
  169	 * before processing instruction 'insn_idx'
  170	 * and after processing instruction 'prev_insn_idx'
  171	 */
  172	struct bpf_verifier_state st;
  173	int insn_idx;
  174	int prev_insn_idx;
  175	struct bpf_verifier_stack_elem *next;
  176	/* length of verifier log at the time this state was pushed on stack */
  177	u32 log_pos;
  178};
  179
  180#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
  181#define BPF_COMPLEXITY_LIMIT_STATES	64
  182
  183#define BPF_MAP_KEY_POISON	(1ULL << 63)
  184#define BPF_MAP_KEY_SEEN	(1ULL << 62)
  185
  186#define BPF_MAP_PTR_UNPRIV	1UL
  187#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
  188					  POISON_POINTER_DELTA))
  189#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
  190
  191static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
  192static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
  193
  194static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
  195{
  196	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
  197}
  198
  199static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
  200{
  201	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
  202}
  203
  204static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
  205			      const struct bpf_map *map, bool unpriv)
  206{
  207	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
  208	unpriv |= bpf_map_ptr_unpriv(aux);
  209	aux->map_ptr_state = (unsigned long)map |
  210			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
  211}
  212
  213static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
  214{
  215	return aux->map_key_state & BPF_MAP_KEY_POISON;
  216}
  217
  218static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
  219{
  220	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
  221}
  222
  223static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
  224{
  225	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
  226}
  227
  228static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
  229{
  230	bool poisoned = bpf_map_key_poisoned(aux);
  231
  232	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
  233			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
  234}
  235
  236static bool bpf_pseudo_call(const struct bpf_insn *insn)
  237{
  238	return insn->code == (BPF_JMP | BPF_CALL) &&
  239	       insn->src_reg == BPF_PSEUDO_CALL;
  240}
  241
  242static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
  243{
  244	return insn->code == (BPF_JMP | BPF_CALL) &&
  245	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
  246}
  247
  248struct bpf_call_arg_meta {
  249	struct bpf_map *map_ptr;
  250	bool raw_mode;
  251	bool pkt_access;
  252	u8 release_regno;
  253	int regno;
  254	int access_size;
  255	int mem_size;
  256	u64 msize_max_value;
  257	int ref_obj_id;
  258	int map_uid;
  259	int func_id;
  260	struct btf *btf;
  261	u32 btf_id;
  262	struct btf *ret_btf;
  263	u32 ret_btf_id;
  264	u32 subprogno;
  265	struct btf_field *kptr_field;
  266	u8 uninit_dynptr_regno;
  267};
  268
  269struct btf *btf_vmlinux;
  270
  271static DEFINE_MUTEX(bpf_verifier_lock);
  272
  273static const struct bpf_line_info *
  274find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
  275{
  276	const struct bpf_line_info *linfo;
  277	const struct bpf_prog *prog;
  278	u32 i, nr_linfo;
  279
  280	prog = env->prog;
  281	nr_linfo = prog->aux->nr_linfo;
  282
  283	if (!nr_linfo || insn_off >= prog->len)
  284		return NULL;
  285
  286	linfo = prog->aux->linfo;
  287	for (i = 1; i < nr_linfo; i++)
  288		if (insn_off < linfo[i].insn_off)
  289			break;
  290
  291	return &linfo[i - 1];
  292}
  293
  294void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
  295		       va_list args)
  296{
  297	unsigned int n;
  298
  299	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
  300
  301	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
  302		  "verifier log line truncated - local buffer too short\n");
  303
  304	if (log->level == BPF_LOG_KERNEL) {
  305		bool newline = n > 0 && log->kbuf[n - 1] == '\n';
  306
  307		pr_err("BPF: %s%s", log->kbuf, newline ? "" : "\n");
  308		return;
  309	}
  310
  311	n = min(log->len_total - log->len_used - 1, n);
  312	log->kbuf[n] = '\0';
  313	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
  314		log->len_used += n;
  315	else
  316		log->ubuf = NULL;
  317}
  318
  319static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
  320{
  321	char zero = 0;
  322
  323	if (!bpf_verifier_log_needed(log))
  324		return;
  325
  326	log->len_used = new_pos;
  327	if (put_user(zero, log->ubuf + new_pos))
  328		log->ubuf = NULL;
  329}
  330
  331/* log_level controls verbosity level of eBPF verifier.
  332 * bpf_verifier_log_write() is used to dump the verification trace to the log,
  333 * so the user can figure out what's wrong with the program
  334 */
  335__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
  336					   const char *fmt, ...)
  337{
  338	va_list args;
  339
  340	if (!bpf_verifier_log_needed(&env->log))
  341		return;
  342
  343	va_start(args, fmt);
  344	bpf_verifier_vlog(&env->log, fmt, args);
  345	va_end(args);
  346}
  347EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
  348
  349__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
  350{
  351	struct bpf_verifier_env *env = private_data;
  352	va_list args;
  353
  354	if (!bpf_verifier_log_needed(&env->log))
  355		return;
  356
  357	va_start(args, fmt);
  358	bpf_verifier_vlog(&env->log, fmt, args);
  359	va_end(args);
  360}
  361
  362__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
  363			    const char *fmt, ...)
  364{
  365	va_list args;
  366
  367	if (!bpf_verifier_log_needed(log))
  368		return;
  369
  370	va_start(args, fmt);
  371	bpf_verifier_vlog(log, fmt, args);
  372	va_end(args);
  373}
  374EXPORT_SYMBOL_GPL(bpf_log);
  375
  376static const char *ltrim(const char *s)
  377{
  378	while (isspace(*s))
  379		s++;
  380
  381	return s;
  382}
  383
  384__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
  385					 u32 insn_off,
  386					 const char *prefix_fmt, ...)
  387{
  388	const struct bpf_line_info *linfo;
  389
  390	if (!bpf_verifier_log_needed(&env->log))
  391		return;
  392
  393	linfo = find_linfo(env, insn_off);
  394	if (!linfo || linfo == env->prev_linfo)
  395		return;
  396
  397	if (prefix_fmt) {
  398		va_list args;
  399
  400		va_start(args, prefix_fmt);
  401		bpf_verifier_vlog(&env->log, prefix_fmt, args);
  402		va_end(args);
  403	}
  404
  405	verbose(env, "%s\n",
  406		ltrim(btf_name_by_offset(env->prog->aux->btf,
  407					 linfo->line_off)));
  408
  409	env->prev_linfo = linfo;
  410}
  411
  412static void verbose_invalid_scalar(struct bpf_verifier_env *env,
  413				   struct bpf_reg_state *reg,
  414				   struct tnum *range, const char *ctx,
  415				   const char *reg_name)
  416{
  417	char tn_buf[48];
  418
  419	verbose(env, "At %s the register %s ", ctx, reg_name);
  420	if (!tnum_is_unknown(reg->var_off)) {
  421		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  422		verbose(env, "has value %s", tn_buf);
  423	} else {
  424		verbose(env, "has unknown scalar value");
  425	}
  426	tnum_strn(tn_buf, sizeof(tn_buf), *range);
  427	verbose(env, " should have been in %s\n", tn_buf);
  428}
  429
  430static bool type_is_pkt_pointer(enum bpf_reg_type type)
  431{
  432	type = base_type(type);
  433	return type == PTR_TO_PACKET ||
  434	       type == PTR_TO_PACKET_META;
  435}
  436
  437static bool type_is_sk_pointer(enum bpf_reg_type type)
  438{
  439	return type == PTR_TO_SOCKET ||
  440		type == PTR_TO_SOCK_COMMON ||
  441		type == PTR_TO_TCP_SOCK ||
  442		type == PTR_TO_XDP_SOCK;
  443}
  444
  445static bool reg_type_not_null(enum bpf_reg_type type)
  446{
  447	return type == PTR_TO_SOCKET ||
  448		type == PTR_TO_TCP_SOCK ||
  449		type == PTR_TO_MAP_VALUE ||
  450		type == PTR_TO_MAP_KEY ||
  451		type == PTR_TO_SOCK_COMMON;
  452}
  453
  454static bool type_is_ptr_alloc_obj(u32 type)
  455{
  456	return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
  457}
  458
  459static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
  460{
  461	struct btf_record *rec = NULL;
  462	struct btf_struct_meta *meta;
  463
  464	if (reg->type == PTR_TO_MAP_VALUE) {
  465		rec = reg->map_ptr->record;
  466	} else if (type_is_ptr_alloc_obj(reg->type)) {
  467		meta = btf_find_struct_meta(reg->btf, reg->btf_id);
  468		if (meta)
  469			rec = meta->record;
  470	}
  471	return rec;
  472}
  473
  474static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
  475{
  476	return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
  477}
  478
  479static bool type_is_rdonly_mem(u32 type)
  480{
  481	return type & MEM_RDONLY;
  482}
  483
  484static bool type_may_be_null(u32 type)
  485{
  486	return type & PTR_MAYBE_NULL;
  487}
  488
  489static bool is_acquire_function(enum bpf_func_id func_id,
  490				const struct bpf_map *map)
  491{
  492	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
  493
  494	if (func_id == BPF_FUNC_sk_lookup_tcp ||
  495	    func_id == BPF_FUNC_sk_lookup_udp ||
  496	    func_id == BPF_FUNC_skc_lookup_tcp ||
  497	    func_id == BPF_FUNC_ringbuf_reserve ||
  498	    func_id == BPF_FUNC_kptr_xchg)
  499		return true;
  500
  501	if (func_id == BPF_FUNC_map_lookup_elem &&
  502	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
  503	     map_type == BPF_MAP_TYPE_SOCKHASH))
  504		return true;
  505
  506	return false;
  507}
  508
  509static bool is_ptr_cast_function(enum bpf_func_id func_id)
  510{
  511	return func_id == BPF_FUNC_tcp_sock ||
  512		func_id == BPF_FUNC_sk_fullsock ||
  513		func_id == BPF_FUNC_skc_to_tcp_sock ||
  514		func_id == BPF_FUNC_skc_to_tcp6_sock ||
  515		func_id == BPF_FUNC_skc_to_udp6_sock ||
  516		func_id == BPF_FUNC_skc_to_mptcp_sock ||
  517		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
  518		func_id == BPF_FUNC_skc_to_tcp_request_sock;
  519}
  520
  521static bool is_dynptr_ref_function(enum bpf_func_id func_id)
  522{
  523	return func_id == BPF_FUNC_dynptr_data;
  524}
  525
  526static bool is_callback_calling_function(enum bpf_func_id func_id)
  527{
  528	return func_id == BPF_FUNC_for_each_map_elem ||
  529	       func_id == BPF_FUNC_timer_set_callback ||
  530	       func_id == BPF_FUNC_find_vma ||
  531	       func_id == BPF_FUNC_loop ||
  532	       func_id == BPF_FUNC_user_ringbuf_drain;
  533}
  534
  535static bool is_storage_get_function(enum bpf_func_id func_id)
  536{
  537	return func_id == BPF_FUNC_sk_storage_get ||
  538	       func_id == BPF_FUNC_inode_storage_get ||
  539	       func_id == BPF_FUNC_task_storage_get ||
  540	       func_id == BPF_FUNC_cgrp_storage_get;
  541}
  542
  543static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
  544					const struct bpf_map *map)
  545{
  546	int ref_obj_uses = 0;
  547
  548	if (is_ptr_cast_function(func_id))
  549		ref_obj_uses++;
  550	if (is_acquire_function(func_id, map))
  551		ref_obj_uses++;
  552	if (is_dynptr_ref_function(func_id))
  553		ref_obj_uses++;
  554
  555	return ref_obj_uses > 1;
  556}
  557
  558static bool is_cmpxchg_insn(const struct bpf_insn *insn)
  559{
  560	return BPF_CLASS(insn->code) == BPF_STX &&
  561	       BPF_MODE(insn->code) == BPF_ATOMIC &&
  562	       insn->imm == BPF_CMPXCHG;
  563}
  564
  565/* string representation of 'enum bpf_reg_type'
  566 *
  567 * Note that reg_type_str() can not appear more than once in a single verbose()
  568 * statement.
  569 */
  570static const char *reg_type_str(struct bpf_verifier_env *env,
  571				enum bpf_reg_type type)
  572{
  573	char postfix[16] = {0}, prefix[64] = {0};
  574	static const char * const str[] = {
  575		[NOT_INIT]		= "?",
  576		[SCALAR_VALUE]		= "scalar",
  577		[PTR_TO_CTX]		= "ctx",
  578		[CONST_PTR_TO_MAP]	= "map_ptr",
  579		[PTR_TO_MAP_VALUE]	= "map_value",
  580		[PTR_TO_STACK]		= "fp",
  581		[PTR_TO_PACKET]		= "pkt",
  582		[PTR_TO_PACKET_META]	= "pkt_meta",
  583		[PTR_TO_PACKET_END]	= "pkt_end",
  584		[PTR_TO_FLOW_KEYS]	= "flow_keys",
  585		[PTR_TO_SOCKET]		= "sock",
  586		[PTR_TO_SOCK_COMMON]	= "sock_common",
  587		[PTR_TO_TCP_SOCK]	= "tcp_sock",
  588		[PTR_TO_TP_BUFFER]	= "tp_buffer",
  589		[PTR_TO_XDP_SOCK]	= "xdp_sock",
  590		[PTR_TO_BTF_ID]		= "ptr_",
  591		[PTR_TO_MEM]		= "mem",
  592		[PTR_TO_BUF]		= "buf",
  593		[PTR_TO_FUNC]		= "func",
  594		[PTR_TO_MAP_KEY]	= "map_key",
  595		[CONST_PTR_TO_DYNPTR]	= "dynptr_ptr",
  596	};
  597
  598	if (type & PTR_MAYBE_NULL) {
  599		if (base_type(type) == PTR_TO_BTF_ID)
  600			strncpy(postfix, "or_null_", 16);
  601		else
  602			strncpy(postfix, "_or_null", 16);
  603	}
  604
  605	snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
  606		 type & MEM_RDONLY ? "rdonly_" : "",
  607		 type & MEM_RINGBUF ? "ringbuf_" : "",
  608		 type & MEM_USER ? "user_" : "",
  609		 type & MEM_PERCPU ? "percpu_" : "",
  610		 type & MEM_RCU ? "rcu_" : "",
  611		 type & PTR_UNTRUSTED ? "untrusted_" : "",
  612		 type & PTR_TRUSTED ? "trusted_" : ""
  613	);
  614
  615	snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s",
  616		 prefix, str[base_type(type)], postfix);
  617	return env->type_str_buf;
  618}
  619
  620static char slot_type_char[] = {
  621	[STACK_INVALID]	= '?',
  622	[STACK_SPILL]	= 'r',
  623	[STACK_MISC]	= 'm',
  624	[STACK_ZERO]	= '0',
  625	[STACK_DYNPTR]	= 'd',
  626};
  627
  628static void print_liveness(struct bpf_verifier_env *env,
  629			   enum bpf_reg_liveness live)
  630{
  631	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
  632	    verbose(env, "_");
  633	if (live & REG_LIVE_READ)
  634		verbose(env, "r");
  635	if (live & REG_LIVE_WRITTEN)
  636		verbose(env, "w");
  637	if (live & REG_LIVE_DONE)
  638		verbose(env, "D");
  639}
  640
  641static int get_spi(s32 off)
  642{
  643	return (-off - 1) / BPF_REG_SIZE;
  644}
  645
  646static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
  647{
  648	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
  649
  650	/* We need to check that slots between [spi - nr_slots + 1, spi] are
  651	 * within [0, allocated_stack).
  652	 *
  653	 * Please note that the spi grows downwards. For example, a dynptr
  654	 * takes the size of two stack slots; the first slot will be at
  655	 * spi and the second slot will be at spi - 1.
  656	 */
  657	return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
  658}
  659
  660static struct bpf_func_state *func(struct bpf_verifier_env *env,
  661				   const struct bpf_reg_state *reg)
  662{
  663	struct bpf_verifier_state *cur = env->cur_state;
  664
  665	return cur->frame[reg->frameno];
  666}
  667
  668static const char *kernel_type_name(const struct btf* btf, u32 id)
  669{
  670	return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
  671}
  672
  673static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
  674{
  675	env->scratched_regs |= 1U << regno;
  676}
  677
  678static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
  679{
  680	env->scratched_stack_slots |= 1ULL << spi;
  681}
  682
  683static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
  684{
  685	return (env->scratched_regs >> regno) & 1;
  686}
  687
  688static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
  689{
  690	return (env->scratched_stack_slots >> regno) & 1;
  691}
  692
  693static bool verifier_state_scratched(const struct bpf_verifier_env *env)
  694{
  695	return env->scratched_regs || env->scratched_stack_slots;
  696}
  697
  698static void mark_verifier_state_clean(struct bpf_verifier_env *env)
  699{
  700	env->scratched_regs = 0U;
  701	env->scratched_stack_slots = 0ULL;
  702}
  703
  704/* Used for printing the entire verifier state. */
  705static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
  706{
  707	env->scratched_regs = ~0U;
  708	env->scratched_stack_slots = ~0ULL;
  709}
  710
  711static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
  712{
  713	switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
  714	case DYNPTR_TYPE_LOCAL:
  715		return BPF_DYNPTR_TYPE_LOCAL;
  716	case DYNPTR_TYPE_RINGBUF:
  717		return BPF_DYNPTR_TYPE_RINGBUF;
  718	default:
  719		return BPF_DYNPTR_TYPE_INVALID;
  720	}
  721}
  722
  723static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
  724{
  725	return type == BPF_DYNPTR_TYPE_RINGBUF;
  726}
  727
  728static void __mark_dynptr_reg(struct bpf_reg_state *reg,
  729			      enum bpf_dynptr_type type,
  730			      bool first_slot);
  731
  732static void __mark_reg_not_init(const struct bpf_verifier_env *env,
  733				struct bpf_reg_state *reg);
  734
  735static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
  736				   struct bpf_reg_state *sreg2,
  737				   enum bpf_dynptr_type type)
  738{
  739	__mark_dynptr_reg(sreg1, type, true);
  740	__mark_dynptr_reg(sreg2, type, false);
  741}
  742
  743static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
  744			       enum bpf_dynptr_type type)
  745{
  746	__mark_dynptr_reg(reg, type, true);
  747}
  748
  749
  750static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
  751				   enum bpf_arg_type arg_type, int insn_idx)
  752{
  753	struct bpf_func_state *state = func(env, reg);
  754	enum bpf_dynptr_type type;
  755	int spi, i, id;
  756
  757	spi = get_spi(reg->off);
  758
  759	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
  760		return -EINVAL;
  761
  762	for (i = 0; i < BPF_REG_SIZE; i++) {
  763		state->stack[spi].slot_type[i] = STACK_DYNPTR;
  764		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
  765	}
  766
  767	type = arg_to_dynptr_type(arg_type);
  768	if (type == BPF_DYNPTR_TYPE_INVALID)
  769		return -EINVAL;
  770
  771	mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
  772			       &state->stack[spi - 1].spilled_ptr, type);
  773
  774	if (dynptr_type_refcounted(type)) {
  775		/* The id is used to track proper releasing */
  776		id = acquire_reference_state(env, insn_idx);
  777		if (id < 0)
  778			return id;
  779
  780		state->stack[spi].spilled_ptr.ref_obj_id = id;
  781		state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
  782	}
  783
  784	return 0;
  785}
  786
  787static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
  788{
  789	struct bpf_func_state *state = func(env, reg);
  790	int spi, i;
  791
  792	spi = get_spi(reg->off);
  793
  794	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
  795		return -EINVAL;
  796
  797	for (i = 0; i < BPF_REG_SIZE; i++) {
  798		state->stack[spi].slot_type[i] = STACK_INVALID;
  799		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
  800	}
  801
  802	/* Invalidate any slices associated with this dynptr */
  803	if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type))
  804		WARN_ON_ONCE(release_reference(env, state->stack[spi].spilled_ptr.ref_obj_id));
  805
  806	__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
  807	__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
  808	return 0;
  809}
  810
  811static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
  812{
  813	struct bpf_func_state *state = func(env, reg);
  814	int spi, i;
  815
  816	if (reg->type == CONST_PTR_TO_DYNPTR)
  817		return false;
  818
  819	spi = get_spi(reg->off);
  820	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
  821		return true;
  822
  823	for (i = 0; i < BPF_REG_SIZE; i++) {
  824		if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
  825		    state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
  826			return false;
  827	}
  828
  829	return true;
  830}
  831
  832static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
  833{
  834	struct bpf_func_state *state = func(env, reg);
  835	int spi;
  836	int i;
  837
  838	/* This already represents first slot of initialized bpf_dynptr */
  839	if (reg->type == CONST_PTR_TO_DYNPTR)
  840		return true;
  841
  842	spi = get_spi(reg->off);
  843	if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
  844	    !state->stack[spi].spilled_ptr.dynptr.first_slot)
  845		return false;
  846
  847	for (i = 0; i < BPF_REG_SIZE; i++) {
  848		if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
  849		    state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
  850			return false;
  851	}
  852
  853	return true;
  854}
  855
  856static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
  857				    enum bpf_arg_type arg_type)
  858{
  859	struct bpf_func_state *state = func(env, reg);
  860	enum bpf_dynptr_type dynptr_type;
  861	int spi;
  862
  863	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
  864	if (arg_type == ARG_PTR_TO_DYNPTR)
  865		return true;
  866
  867	dynptr_type = arg_to_dynptr_type(arg_type);
  868	if (reg->type == CONST_PTR_TO_DYNPTR) {
  869		return reg->dynptr.type == dynptr_type;
  870	} else {
  871		spi = get_spi(reg->off);
  872		return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
  873	}
  874}
  875
  876/* The reg state of a pointer or a bounded scalar was saved when
  877 * it was spilled to the stack.
  878 */
  879static bool is_spilled_reg(const struct bpf_stack_state *stack)
  880{
  881	return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
  882}
  883
  884static void scrub_spilled_slot(u8 *stype)
  885{
  886	if (*stype != STACK_INVALID)
  887		*stype = STACK_MISC;
  888}
  889
  890static void print_verifier_state(struct bpf_verifier_env *env,
  891				 const struct bpf_func_state *state,
  892				 bool print_all)
  893{
  894	const struct bpf_reg_state *reg;
  895	enum bpf_reg_type t;
  896	int i;
  897
  898	if (state->frameno)
  899		verbose(env, " frame%d:", state->frameno);
  900	for (i = 0; i < MAX_BPF_REG; i++) {
  901		reg = &state->regs[i];
  902		t = reg->type;
  903		if (t == NOT_INIT)
  904			continue;
  905		if (!print_all && !reg_scratched(env, i))
  906			continue;
  907		verbose(env, " R%d", i);
  908		print_liveness(env, reg->live);
  909		verbose(env, "=");
  910		if (t == SCALAR_VALUE && reg->precise)
  911			verbose(env, "P");
  912		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
  913		    tnum_is_const(reg->var_off)) {
  914			/* reg->off should be 0 for SCALAR_VALUE */
  915			verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
  916			verbose(env, "%lld", reg->var_off.value + reg->off);
  917		} else {
  918			const char *sep = "";
  919
  920			verbose(env, "%s", reg_type_str(env, t));
  921			if (base_type(t) == PTR_TO_BTF_ID)
  922				verbose(env, "%s", kernel_type_name(reg->btf, reg->btf_id));
  923			verbose(env, "(");
  924/*
  925 * _a stands for append, was shortened to avoid multiline statements below.
  926 * This macro is used to output a comma separated list of attributes.
  927 */
  928#define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
  929
  930			if (reg->id)
  931				verbose_a("id=%d", reg->id);
  932			if (reg->ref_obj_id)
  933				verbose_a("ref_obj_id=%d", reg->ref_obj_id);
  934			if (t != SCALAR_VALUE)
  935				verbose_a("off=%d", reg->off);
  936			if (type_is_pkt_pointer(t))
  937				verbose_a("r=%d", reg->range);
  938			else if (base_type(t) == CONST_PTR_TO_MAP ||
  939				 base_type(t) == PTR_TO_MAP_KEY ||
  940				 base_type(t) == PTR_TO_MAP_VALUE)
  941				verbose_a("ks=%d,vs=%d",
  942					  reg->map_ptr->key_size,
  943					  reg->map_ptr->value_size);
  944			if (tnum_is_const(reg->var_off)) {
  945				/* Typically an immediate SCALAR_VALUE, but
  946				 * could be a pointer whose offset is too big
  947				 * for reg->off
  948				 */
  949				verbose_a("imm=%llx", reg->var_off.value);
  950			} else {
  951				if (reg->smin_value != reg->umin_value &&
  952				    reg->smin_value != S64_MIN)
  953					verbose_a("smin=%lld", (long long)reg->smin_value);
  954				if (reg->smax_value != reg->umax_value &&
  955				    reg->smax_value != S64_MAX)
  956					verbose_a("smax=%lld", (long long)reg->smax_value);
  957				if (reg->umin_value != 0)
  958					verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
  959				if (reg->umax_value != U64_MAX)
  960					verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
  961				if (!tnum_is_unknown(reg->var_off)) {
  962					char tn_buf[48];
  963
  964					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  965					verbose_a("var_off=%s", tn_buf);
  966				}
  967				if (reg->s32_min_value != reg->smin_value &&
  968				    reg->s32_min_value != S32_MIN)
  969					verbose_a("s32_min=%d", (int)(reg->s32_min_value));
  970				if (reg->s32_max_value != reg->smax_value &&
  971				    reg->s32_max_value != S32_MAX)
  972					verbose_a("s32_max=%d", (int)(reg->s32_max_value));
  973				if (reg->u32_min_value != reg->umin_value &&
  974				    reg->u32_min_value != U32_MIN)
  975					verbose_a("u32_min=%d", (int)(reg->u32_min_value));
  976				if (reg->u32_max_value != reg->umax_value &&
  977				    reg->u32_max_value != U32_MAX)
  978					verbose_a("u32_max=%d", (int)(reg->u32_max_value));
  979			}
  980#undef verbose_a
  981
  982			verbose(env, ")");
  983		}
  984	}
  985	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  986		char types_buf[BPF_REG_SIZE + 1];
  987		bool valid = false;
  988		int j;
  989
  990		for (j = 0; j < BPF_REG_SIZE; j++) {
  991			if (state->stack[i].slot_type[j] != STACK_INVALID)
  992				valid = true;
  993			types_buf[j] = slot_type_char[
  994					state->stack[i].slot_type[j]];
  995		}
  996		types_buf[BPF_REG_SIZE] = 0;
  997		if (!valid)
  998			continue;
  999		if (!print_all && !stack_slot_scratched(env, i))
 1000			continue;
 1001		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 1002		print_liveness(env, state->stack[i].spilled_ptr.live);
 1003		if (is_spilled_reg(&state->stack[i])) {
 1004			reg = &state->stack[i].spilled_ptr;
 1005			t = reg->type;
 1006			verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
 1007			if (t == SCALAR_VALUE && reg->precise)
 1008				verbose(env, "P");
 1009			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 1010				verbose(env, "%lld", reg->var_off.value + reg->off);
 1011		} else {
 1012			verbose(env, "=%s", types_buf);
 1013		}
 1014	}
 1015	if (state->acquired_refs && state->refs[0].id) {
 1016		verbose(env, " refs=%d", state->refs[0].id);
 1017		for (i = 1; i < state->acquired_refs; i++)
 1018			if (state->refs[i].id)
 1019				verbose(env, ",%d", state->refs[i].id);
 1020	}
 1021	if (state->in_callback_fn)
 1022		verbose(env, " cb");
 1023	if (state->in_async_callback_fn)
 1024		verbose(env, " async_cb");
 1025	verbose(env, "\n");
 1026	mark_verifier_state_clean(env);
 1027}
 1028
 1029static inline u32 vlog_alignment(u32 pos)
 1030{
 1031	return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
 1032			BPF_LOG_MIN_ALIGNMENT) - pos - 1;
 1033}
 1034
 1035static void print_insn_state(struct bpf_verifier_env *env,
 1036			     const struct bpf_func_state *state)
 1037{
 1038	if (env->prev_log_len && env->prev_log_len == env->log.len_used) {
 1039		/* remove new line character */
 1040		bpf_vlog_reset(&env->log, env->prev_log_len - 1);
 1041		verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_len), ' ');
 1042	} else {
 1043		verbose(env, "%d:", env->insn_idx);
 1044	}
 1045	print_verifier_state(env, state, false);
 1046}
 1047
 1048/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
 1049 * small to hold src. This is different from krealloc since we don't want to preserve
 1050 * the contents of dst.
 1051 *
 1052 * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
 1053 * not be allocated.
 1054 */
 1055static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
 1056{
 1057	size_t alloc_bytes;
 1058	void *orig = dst;
 1059	size_t bytes;
 1060
 1061	if (ZERO_OR_NULL_PTR(src))
 1062		goto out;
 1063
 1064	if (unlikely(check_mul_overflow(n, size, &bytes)))
 1065		return NULL;
 1066
 1067	alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
 1068	dst = krealloc(orig, alloc_bytes, flags);
 1069	if (!dst) {
 1070		kfree(orig);
 1071		return NULL;
 1072	}
 1073
 1074	memcpy(dst, src, bytes);
 1075out:
 1076	return dst ? dst : ZERO_SIZE_PTR;
 1077}
 1078
 1079/* resize an array from old_n items to new_n items. the array is reallocated if it's too
 1080 * small to hold new_n items. new items are zeroed out if the array grows.
 1081 *
 1082 * Contrary to krealloc_array, does not free arr if new_n is zero.
 1083 */
 1084static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
 1085{
 1086	size_t alloc_size;
 1087	void *new_arr;
 1088
 1089	if (!new_n || old_n == new_n)
 1090		goto out;
 1091
 1092	alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
 1093	new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
 1094	if (!new_arr) {
 1095		kfree(arr);
 1096		return NULL;
 1097	}
 1098	arr = new_arr;
 1099
 1100	if (new_n > old_n)
 1101		memset(arr + old_n * size, 0, (new_n - old_n) * size);
 1102
 1103out:
 1104	return arr ? arr : ZERO_SIZE_PTR;
 1105}
 1106
 1107static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 1108{
 1109	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
 1110			       sizeof(struct bpf_reference_state), GFP_KERNEL);
 1111	if (!dst->refs)
 1112		return -ENOMEM;
 1113
 1114	dst->acquired_refs = src->acquired_refs;
 1115	return 0;
 1116}
 1117
 1118static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 1119{
 1120	size_t n = src->allocated_stack / BPF_REG_SIZE;
 1121
 1122	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
 1123				GFP_KERNEL);
 1124	if (!dst->stack)
 1125		return -ENOMEM;
 1126
 1127	dst->allocated_stack = src->allocated_stack;
 1128	return 0;
 1129}
 1130
 1131static int resize_reference_state(struct bpf_func_state *state, size_t n)
 1132{
 1133	state->refs = realloc_array(state->refs, state->acquired_refs, n,
 1134				    sizeof(struct bpf_reference_state));
 1135	if (!state->refs)
 1136		return -ENOMEM;
 1137
 1138	state->acquired_refs = n;
 1139	return 0;
 1140}
 1141
 1142static int grow_stack_state(struct bpf_func_state *state, int size)
 1143{
 1144	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
 1145
 1146	if (old_n >= n)
 1147		return 0;
 1148
 1149	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
 1150	if (!state->stack)
 1151		return -ENOMEM;
 1152
 1153	state->allocated_stack = size;
 1154	return 0;
 1155}
 1156
 1157/* Acquire a pointer id from the env and update the state->refs to include
 1158 * this new pointer reference.
 1159 * On success, returns a valid pointer id to associate with the register
 1160 * On failure, returns a negative errno.
 1161 */
 1162static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 1163{
 1164	struct bpf_func_state *state = cur_func(env);
 1165	int new_ofs = state->acquired_refs;
 1166	int id, err;
 1167
 1168	err = resize_reference_state(state, state->acquired_refs + 1);
 1169	if (err)
 1170		return err;
 1171	id = ++env->id_gen;
 1172	state->refs[new_ofs].id = id;
 1173	state->refs[new_ofs].insn_idx = insn_idx;
 1174	state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
 1175
 1176	return id;
 1177}
 1178
 1179/* release function corresponding to acquire_reference_state(). Idempotent. */
 1180static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 1181{
 1182	int i, last_idx;
 1183
 1184	last_idx = state->acquired_refs - 1;
 1185	for (i = 0; i < state->acquired_refs; i++) {
 1186		if (state->refs[i].id == ptr_id) {
 1187			/* Cannot release caller references in callbacks */
 1188			if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
 1189				return -EINVAL;
 1190			if (last_idx && i != last_idx)
 1191				memcpy(&state->refs[i], &state->refs[last_idx],
 1192				       sizeof(*state->refs));
 1193			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 1194			state->acquired_refs--;
 1195			return 0;
 1196		}
 1197	}
 1198	return -EINVAL;
 1199}
 1200
 1201static void free_func_state(struct bpf_func_state *state)
 1202{
 1203	if (!state)
 1204		return;
 1205	kfree(state->refs);
 1206	kfree(state->stack);
 1207	kfree(state);
 1208}
 1209
 1210static void clear_jmp_history(struct bpf_verifier_state *state)
 1211{
 1212	kfree(state->jmp_history);
 1213	state->jmp_history = NULL;
 1214	state->jmp_history_cnt = 0;
 1215}
 1216
 1217static void free_verifier_state(struct bpf_verifier_state *state,
 1218				bool free_self)
 1219{
 1220	int i;
 1221
 1222	for (i = 0; i <= state->curframe; i++) {
 1223		free_func_state(state->frame[i]);
 1224		state->frame[i] = NULL;
 1225	}
 1226	clear_jmp_history(state);
 1227	if (free_self)
 1228		kfree(state);
 1229}
 1230
 1231/* copy verifier state from src to dst growing dst stack space
 1232 * when necessary to accommodate larger src stack
 1233 */
 1234static int copy_func_state(struct bpf_func_state *dst,
 1235			   const struct bpf_func_state *src)
 1236{
 1237	int err;
 1238
 1239	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 1240	err = copy_reference_state(dst, src);
 1241	if (err)
 1242		return err;
 1243	return copy_stack_state(dst, src);
 1244}
 1245
 1246static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 1247			       const struct bpf_verifier_state *src)
 1248{
 1249	struct bpf_func_state *dst;
 1250	int i, err;
 1251
 1252	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
 1253					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
 1254					    GFP_USER);
 1255	if (!dst_state->jmp_history)
 1256		return -ENOMEM;
 1257	dst_state->jmp_history_cnt = src->jmp_history_cnt;
 1258
 1259	/* if dst has more stack frames then src frame, free them */
 1260	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 1261		free_func_state(dst_state->frame[i]);
 1262		dst_state->frame[i] = NULL;
 1263	}
 1264	dst_state->speculative = src->speculative;
 1265	dst_state->active_rcu_lock = src->active_rcu_lock;
 1266	dst_state->curframe = src->curframe;
 1267	dst_state->active_lock.ptr = src->active_lock.ptr;
 1268	dst_state->active_lock.id = src->active_lock.id;
 1269	dst_state->branches = src->branches;
 1270	dst_state->parent = src->parent;
 1271	dst_state->first_insn_idx = src->first_insn_idx;
 1272	dst_state->last_insn_idx = src->last_insn_idx;
 1273	for (i = 0; i <= src->curframe; i++) {
 1274		dst = dst_state->frame[i];
 1275		if (!dst) {
 1276			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 1277			if (!dst)
 1278				return -ENOMEM;
 1279			dst_state->frame[i] = dst;
 1280		}
 1281		err = copy_func_state(dst, src->frame[i]);
 1282		if (err)
 1283			return err;
 1284	}
 1285	return 0;
 1286}
 1287
 1288static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 1289{
 1290	while (st) {
 1291		u32 br = --st->branches;
 1292
 1293		/* WARN_ON(br > 1) technically makes sense here,
 1294		 * but see comment in push_stack(), hence:
 1295		 */
 1296		WARN_ONCE((int)br < 0,
 1297			  "BUG update_branch_counts:branches_to_explore=%d\n",
 1298			  br);
 1299		if (br)
 1300			break;
 1301		st = st->parent;
 1302	}
 1303}
 1304
 1305static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 1306		     int *insn_idx, bool pop_log)
 1307{
 1308	struct bpf_verifier_state *cur = env->cur_state;
 1309	struct bpf_verifier_stack_elem *elem, *head = env->head;
 1310	int err;
 1311
 1312	if (env->head == NULL)
 1313		return -ENOENT;
 1314
 1315	if (cur) {
 1316		err = copy_verifier_state(cur, &head->st);
 1317		if (err)
 1318			return err;
 1319	}
 1320	if (pop_log)
 1321		bpf_vlog_reset(&env->log, head->log_pos);
 1322	if (insn_idx)
 1323		*insn_idx = head->insn_idx;
 1324	if (prev_insn_idx)
 1325		*prev_insn_idx = head->prev_insn_idx;
 1326	elem = head->next;
 1327	free_verifier_state(&head->st, false);
 1328	kfree(head);
 1329	env->head = elem;
 1330	env->stack_size--;
 1331	return 0;
 1332}
 1333
 1334static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 1335					     int insn_idx, int prev_insn_idx,
 1336					     bool speculative)
 1337{
 1338	struct bpf_verifier_state *cur = env->cur_state;
 1339	struct bpf_verifier_stack_elem *elem;
 1340	int err;
 1341
 1342	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 1343	if (!elem)
 1344		goto err;
 1345
 1346	elem->insn_idx = insn_idx;
 1347	elem->prev_insn_idx = prev_insn_idx;
 1348	elem->next = env->head;
 1349	elem->log_pos = env->log.len_used;
 1350	env->head = elem;
 1351	env->stack_size++;
 1352	err = copy_verifier_state(&elem->st, cur);
 1353	if (err)
 1354		goto err;
 1355	elem->st.speculative |= speculative;
 1356	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 1357		verbose(env, "The sequence of %d jumps is too complex.\n",
 1358			env->stack_size);
 1359		goto err;
 1360	}
 1361	if (elem->st.parent) {
 1362		++elem->st.parent->branches;
 1363		/* WARN_ON(branches > 2) technically makes sense here,
 1364		 * but
 1365		 * 1. speculative states will bump 'branches' for non-branch
 1366		 * instructions
 1367		 * 2. is_state_visited() heuristics may decide not to create
 1368		 * a new state for a sequence of branches and all such current
 1369		 * and cloned states will be pointing to a single parent state
 1370		 * which might have large 'branches' count.
 1371		 */
 1372	}
 1373	return &elem->st;
 1374err:
 1375	free_verifier_state(env->cur_state, true);
 1376	env->cur_state = NULL;
 1377	/* pop all elements and return */
 1378	while (!pop_stack(env, NULL, NULL, false));
 1379	return NULL;
 1380}
 1381
 1382#define CALLER_SAVED_REGS 6
 1383static const int caller_saved[CALLER_SAVED_REGS] = {
 1384	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 1385};
 1386
 1387/* This helper doesn't clear reg->id */
 1388static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 1389{
 1390	reg->var_off = tnum_const(imm);
 1391	reg->smin_value = (s64)imm;
 1392	reg->smax_value = (s64)imm;
 1393	reg->umin_value = imm;
 1394	reg->umax_value = imm;
 1395
 1396	reg->s32_min_value = (s32)imm;
 1397	reg->s32_max_value = (s32)imm;
 1398	reg->u32_min_value = (u32)imm;
 1399	reg->u32_max_value = (u32)imm;
 1400}
 1401
 1402/* Mark the unknown part of a register (variable offset or scalar value) as
 1403 * known to have the value @imm.
 1404 */
 1405static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 1406{
 1407	/* Clear id, off, and union(map_ptr, range) */
 1408	memset(((u8 *)reg) + sizeof(reg->type), 0,
 1409	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
 1410	___mark_reg_known(reg, imm);
 1411}
 1412
 1413static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
 1414{
 1415	reg->var_off = tnum_const_subreg(reg->var_off, imm);
 1416	reg->s32_min_value = (s32)imm;
 1417	reg->s32_max_value = (s32)imm;
 1418	reg->u32_min_value = (u32)imm;
 1419	reg->u32_max_value = (u32)imm;
 1420}
 1421
 1422/* Mark the 'variable offset' part of a register as zero.  This should be
 1423 * used only on registers holding a pointer type.
 1424 */
 1425static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 1426{
 1427	__mark_reg_known(reg, 0);
 1428}
 1429
 1430static void __mark_reg_const_zero(struct bpf_reg_state *reg)
 1431{
 1432	__mark_reg_known(reg, 0);
 1433	reg->type = SCALAR_VALUE;
 1434}
 1435
 1436static void mark_reg_known_zero(struct bpf_verifier_env *env,
 1437				struct bpf_reg_state *regs, u32 regno)
 1438{
 1439	if (WARN_ON(regno >= MAX_BPF_REG)) {
 1440		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 1441		/* Something bad happened, let's kill all regs */
 1442		for (regno = 0; regno < MAX_BPF_REG; regno++)
 1443			__mark_reg_not_init(env, regs + regno);
 1444		return;
 1445	}
 1446	__mark_reg_known_zero(regs + regno);
 1447}
 1448
 1449static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
 1450			      bool first_slot)
 1451{
 1452	/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
 1453	 * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
 1454	 * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
 1455	 */
 1456	__mark_reg_known_zero(reg);
 1457	reg->type = CONST_PTR_TO_DYNPTR;
 1458	reg->dynptr.type = type;
 1459	reg->dynptr.first_slot = first_slot;
 1460}
 1461
 1462static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
 1463{
 1464	if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
 1465		const struct bpf_map *map = reg->map_ptr;
 1466
 1467		if (map->inner_map_meta) {
 1468			reg->type = CONST_PTR_TO_MAP;
 1469			reg->map_ptr = map->inner_map_meta;
 1470			/* transfer reg's id which is unique for every map_lookup_elem
 1471			 * as UID of the inner map.
 1472			 */
 1473			if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
 1474				reg->map_uid = reg->id;
 1475		} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 1476			reg->type = PTR_TO_XDP_SOCK;
 1477		} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
 1478			   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
 1479			reg->type = PTR_TO_SOCKET;
 1480		} else {
 1481			reg->type = PTR_TO_MAP_VALUE;
 1482		}
 1483		return;
 1484	}
 1485
 1486	reg->type &= ~PTR_MAYBE_NULL;
 1487}
 1488
 1489static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
 1490{
 1491	return type_is_pkt_pointer(reg->type);
 1492}
 1493
 1494static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
 1495{
 1496	return reg_is_pkt_pointer(reg) ||
 1497	       reg->type == PTR_TO_PACKET_END;
 1498}
 1499
 1500/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
 1501static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 1502				    enum bpf_reg_type which)
 1503{
 1504	/* The register can already have a range from prior markings.
 1505	 * This is fine as long as it hasn't been advanced from its
 1506	 * origin.
 1507	 */
 1508	return reg->type == which &&
 1509	       reg->id == 0 &&
 1510	       reg->off == 0 &&
 1511	       tnum_equals_const(reg->var_off, 0);
 1512}
 1513
 1514/* Reset the min/max bounds of a register */
 1515static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 1516{
 1517	reg->smin_value = S64_MIN;
 1518	reg->smax_value = S64_MAX;
 1519	reg->umin_value = 0;
 1520	reg->umax_value = U64_MAX;
 1521
 1522	reg->s32_min_value = S32_MIN;
 1523	reg->s32_max_value = S32_MAX;
 1524	reg->u32_min_value = 0;
 1525	reg->u32_max_value = U32_MAX;
 1526}
 1527
 1528static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
 1529{
 1530	reg->smin_value = S64_MIN;
 1531	reg->smax_value = S64_MAX;
 1532	reg->umin_value = 0;
 1533	reg->umax_value = U64_MAX;
 1534}
 1535
 1536static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
 1537{
 1538	reg->s32_min_value = S32_MIN;
 1539	reg->s32_max_value = S32_MAX;
 1540	reg->u32_min_value = 0;
 1541	reg->u32_max_value = U32_MAX;
 1542}
 1543
 1544static void __update_reg32_bounds(struct bpf_reg_state *reg)
 1545{
 1546	struct tnum var32_off = tnum_subreg(reg->var_off);
 1547
 1548	/* min signed is max(sign bit) | min(other bits) */
 1549	reg->s32_min_value = max_t(s32, reg->s32_min_value,
 1550			var32_off.value | (var32_off.mask & S32_MIN));
 1551	/* max signed is min(sign bit) | max(other bits) */
 1552	reg->s32_max_value = min_t(s32, reg->s32_max_value,
 1553			var32_off.value | (var32_off.mask & S32_MAX));
 1554	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
 1555	reg->u32_max_value = min(reg->u32_max_value,
 1556				 (u32)(var32_off.value | var32_off.mask));
 1557}
 1558
 1559static void __update_reg64_bounds(struct bpf_reg_state *reg)
 1560{
 1561	/* min signed is max(sign bit) | min(other bits) */
 1562	reg->smin_value = max_t(s64, reg->smin_value,
 1563				reg->var_off.value | (reg->var_off.mask & S64_MIN));
 1564	/* max signed is min(sign bit) | max(other bits) */
 1565	reg->smax_value = min_t(s64, reg->smax_value,
 1566				reg->var_off.value | (reg->var_off.mask & S64_MAX));
 1567	reg->umin_value = max(reg->umin_value, reg->var_off.value);
 1568	reg->umax_value = min(reg->umax_value,
 1569			      reg->var_off.value | reg->var_off.mask);
 1570}
 1571
 1572static void __update_reg_bounds(struct bpf_reg_state *reg)
 1573{
 1574	__update_reg32_bounds(reg);
 1575	__update_reg64_bounds(reg);
 1576}
 1577
 1578/* Uses signed min/max values to inform unsigned, and vice-versa */
 1579static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
 1580{
 1581	/* Learn sign from signed bounds.
 1582	 * If we cannot cross the sign boundary, then signed and unsigned bounds
 1583	 * are the same, so combine.  This works even in the negative case, e.g.
 1584	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 1585	 */
 1586	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
 1587		reg->s32_min_value = reg->u32_min_value =
 1588			max_t(u32, reg->s32_min_value, reg->u32_min_value);
 1589		reg->s32_max_value = reg->u32_max_value =
 1590			min_t(u32, reg->s32_max_value, reg->u32_max_value);
 1591		return;
 1592	}
 1593	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
 1594	 * boundary, so we must be careful.
 1595	 */
 1596	if ((s32)reg->u32_max_value >= 0) {
 1597		/* Positive.  We can't learn anything from the smin, but smax
 1598		 * is positive, hence safe.
 1599		 */
 1600		reg->s32_min_value = reg->u32_min_value;
 1601		reg->s32_max_value = reg->u32_max_value =
 1602			min_t(u32, reg->s32_max_value, reg->u32_max_value);
 1603	} else if ((s32)reg->u32_min_value < 0) {
 1604		/* Negative.  We can't learn anything from the smax, but smin
 1605		 * is negative, hence safe.
 1606		 */
 1607		reg->s32_min_value = reg->u32_min_value =
 1608			max_t(u32, reg->s32_min_value, reg->u32_min_value);
 1609		reg->s32_max_value = reg->u32_max_value;
 1610	}
 1611}
 1612
 1613static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
 1614{
 1615	/* Learn sign from signed bounds.
 1616	 * If we cannot cross the sign boundary, then signed and unsigned bounds
 1617	 * are the same, so combine.  This works even in the negative case, e.g.
 1618	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 1619	 */
 1620	if (reg->smin_value >= 0 || reg->smax_value < 0) {
 1621		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 1622							  reg->umin_value);
 1623		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 1624							  reg->umax_value);
 1625		return;
 1626	}
 1627	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
 1628	 * boundary, so we must be careful.
 1629	 */
 1630	if ((s64)reg->umax_value >= 0) {
 1631		/* Positive.  We can't learn anything from the smin, but smax
 1632		 * is positive, hence safe.
 1633		 */
 1634		reg->smin_value = reg->umin_value;
 1635		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 1636							  reg->umax_value);
 1637	} else if ((s64)reg->umin_value < 0) {
 1638		/* Negative.  We can't learn anything from the smax, but smin
 1639		 * is negative, hence safe.
 1640		 */
 1641		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 1642							  reg->umin_value);
 1643		reg->smax_value = reg->umax_value;
 1644	}
 1645}
 1646
 1647static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 1648{
 1649	__reg32_deduce_bounds(reg);
 1650	__reg64_deduce_bounds(reg);
 1651}
 1652
 1653/* Attempts to improve var_off based on unsigned min/max information */
 1654static void __reg_bound_offset(struct bpf_reg_state *reg)
 1655{
 1656	struct tnum var64_off = tnum_intersect(reg->var_off,
 1657					       tnum_range(reg->umin_value,
 1658							  reg->umax_value));
 1659	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
 1660						tnum_range(reg->u32_min_value,
 1661							   reg->u32_max_value));
 1662
 1663	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
 1664}
 1665
 1666static void reg_bounds_sync(struct bpf_reg_state *reg)
 1667{
 1668	/* We might have learned new bounds from the var_off. */
 1669	__update_reg_bounds(reg);
 1670	/* We might have learned something about the sign bit. */
 1671	__reg_deduce_bounds(reg);
 1672	/* We might have learned some bits from the bounds. */
 1673	__reg_bound_offset(reg);
 1674	/* Intersecting with the old var_off might have improved our bounds
 1675	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
 1676	 * then new var_off is (0; 0x7f...fc) which improves our umax.
 1677	 */
 1678	__update_reg_bounds(reg);
 1679}
 1680
 1681static bool __reg32_bound_s64(s32 a)
 1682{
 1683	return a >= 0 && a <= S32_MAX;
 1684}
 1685
 1686static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
 1687{
 1688	reg->umin_value = reg->u32_min_value;
 1689	reg->umax_value = reg->u32_max_value;
 1690
 1691	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
 1692	 * be positive otherwise set to worse case bounds and refine later
 1693	 * from tnum.
 1694	 */
 1695	if (__reg32_bound_s64(reg->s32_min_value) &&
 1696	    __reg32_bound_s64(reg->s32_max_value)) {
 1697		reg->smin_value = reg->s32_min_value;
 1698		reg->smax_value = reg->s32_max_value;
 1699	} else {
 1700		reg->smin_value = 0;
 1701		reg->smax_value = U32_MAX;
 1702	}
 1703}
 1704
 1705static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
 1706{
 1707	/* special case when 64-bit register has upper 32-bit register
 1708	 * zeroed. Typically happens after zext or <<32, >>32 sequence
 1709	 * allowing us to use 32-bit bounds directly,
 1710	 */
 1711	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
 1712		__reg_assign_32_into_64(reg);
 1713	} else {
 1714		/* Otherwise the best we can do is push lower 32bit known and
 1715		 * unknown bits into register (var_off set from jmp logic)
 1716		 * then learn as much as possible from the 64-bit tnum
 1717		 * known and unknown bits. The previous smin/smax bounds are
 1718		 * invalid here because of jmp32 compare so mark them unknown
 1719		 * so they do not impact tnum bounds calculation.
 1720		 */
 1721		__mark_reg64_unbounded(reg);
 1722	}
 1723	reg_bounds_sync(reg);
 1724}
 1725
 1726static bool __reg64_bound_s32(s64 a)
 1727{
 1728	return a >= S32_MIN && a <= S32_MAX;
 1729}
 1730
 1731static bool __reg64_bound_u32(u64 a)
 1732{
 1733	return a >= U32_MIN && a <= U32_MAX;
 1734}
 1735
 1736static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
 1737{
 1738	__mark_reg32_unbounded(reg);
 1739	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
 1740		reg->s32_min_value = (s32)reg->smin_value;
 1741		reg->s32_max_value = (s32)reg->smax_value;
 1742	}
 1743	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
 1744		reg->u32_min_value = (u32)reg->umin_value;
 1745		reg->u32_max_value = (u32)reg->umax_value;
 1746	}
 1747	reg_bounds_sync(reg);
 1748}
 1749
 1750/* Mark a register as having a completely unknown (scalar) value. */
 1751static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 1752			       struct bpf_reg_state *reg)
 1753{
 1754	/*
 1755	 * Clear type, id, off, and union(map_ptr, range) and
 1756	 * padding between 'type' and union
 1757	 */
 1758	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
 1759	reg->type = SCALAR_VALUE;
 1760	reg->var_off = tnum_unknown;
 1761	reg->frameno = 0;
 1762	reg->precise = !env->bpf_capable;
 1763	__mark_reg_unbounded(reg);
 1764}
 1765
 1766static void mark_reg_unknown(struct bpf_verifier_env *env,
 1767			     struct bpf_reg_state *regs, u32 regno)
 1768{
 1769	if (WARN_ON(regno >= MAX_BPF_REG)) {
 1770		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
 1771		/* Something bad happened, let's kill all regs except FP */
 1772		for (regno = 0; regno < BPF_REG_FP; regno++)
 1773			__mark_reg_not_init(env, regs + regno);
 1774		return;
 1775	}
 1776	__mark_reg_unknown(env, regs + regno);
 1777}
 1778
 1779static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 1780				struct bpf_reg_state *reg)
 1781{
 1782	__mark_reg_unknown(env, reg);
 1783	reg->type = NOT_INIT;
 1784}
 1785
 1786static void mark_reg_not_init(struct bpf_verifier_env *env,
 1787			      struct bpf_reg_state *regs, u32 regno)
 1788{
 1789	if (WARN_ON(regno >= MAX_BPF_REG)) {
 1790		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
 1791		/* Something bad happened, let's kill all regs except FP */
 1792		for (regno = 0; regno < BPF_REG_FP; regno++)
 1793			__mark_reg_not_init(env, regs + regno);
 1794		return;
 1795	}
 1796	__mark_reg_not_init(env, regs + regno);
 1797}
 1798
 1799static void mark_btf_ld_reg(struct bpf_verifier_env *env,
 1800			    struct bpf_reg_state *regs, u32 regno,
 1801			    enum bpf_reg_type reg_type,
 1802			    struct btf *btf, u32 btf_id,
 1803			    enum bpf_type_flag flag)
 1804{
 1805	if (reg_type == SCALAR_VALUE) {
 1806		mark_reg_unknown(env, regs, regno);
 1807		return;
 1808	}
 1809	mark_reg_known_zero(env, regs, regno);
 1810	regs[regno].type = PTR_TO_BTF_ID | flag;
 1811	regs[regno].btf = btf;
 1812	regs[regno].btf_id = btf_id;
 1813}
 1814
 1815#define DEF_NOT_SUBREG	(0)
 1816static void init_reg_state(struct bpf_verifier_env *env,
 1817			   struct bpf_func_state *state)
 1818{
 1819	struct bpf_reg_state *regs = state->regs;
 1820	int i;
 1821
 1822	for (i = 0; i < MAX_BPF_REG; i++) {
 1823		mark_reg_not_init(env, regs, i);
 1824		regs[i].live = REG_LIVE_NONE;
 1825		regs[i].parent = NULL;
 1826		regs[i].subreg_def = DEF_NOT_SUBREG;
 1827	}
 1828
 1829	/* frame pointer */
 1830	regs[BPF_REG_FP].type = PTR_TO_STACK;
 1831	mark_reg_known_zero(env, regs, BPF_REG_FP);
 1832	regs[BPF_REG_FP].frameno = state->frameno;
 1833}
 1834
 1835#define BPF_MAIN_FUNC (-1)
 1836static void init_func_state(struct bpf_verifier_env *env,
 1837			    struct bpf_func_state *state,
 1838			    int callsite, int frameno, int subprogno)
 1839{
 1840	state->callsite = callsite;
 1841	state->frameno = frameno;
 1842	state->subprogno = subprogno;
 1843	state->callback_ret_range = tnum_range(0, 0);
 1844	init_reg_state(env, state);
 1845	mark_verifier_state_scratched(env);
 1846}
 1847
 1848/* Similar to push_stack(), but for async callbacks */
 1849static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
 1850						int insn_idx, int prev_insn_idx,
 1851						int subprog)
 1852{
 1853	struct bpf_verifier_stack_elem *elem;
 1854	struct bpf_func_state *frame;
 1855
 1856	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 1857	if (!elem)
 1858		goto err;
 1859
 1860	elem->insn_idx = insn_idx;
 1861	elem->prev_insn_idx = prev_insn_idx;
 1862	elem->next = env->head;
 1863	elem->log_pos = env->log.len_used;
 1864	env->head = elem;
 1865	env->stack_size++;
 1866	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 1867		verbose(env,
 1868			"The sequence of %d jumps is too complex for async cb.\n",
 1869			env->stack_size);
 1870		goto err;
 1871	}
 1872	/* Unlike push_stack() do not copy_verifier_state().
 1873	 * The caller state doesn't matter.
 1874	 * This is async callback. It starts in a fresh stack.
 1875	 * Initialize it similar to do_check_common().
 1876	 */
 1877	elem->st.branches = 1;
 1878	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
 1879	if (!frame)
 1880		goto err;
 1881	init_func_state(env, frame,
 1882			BPF_MAIN_FUNC /* callsite */,
 1883			0 /* frameno within this callchain */,
 1884			subprog /* subprog number within this prog */);
 1885	elem->st.frame[0] = frame;
 1886	return &elem->st;
 1887err:
 1888	free_verifier_state(env->cur_state, true);
 1889	env->cur_state = NULL;
 1890	/* pop all elements and return */
 1891	while (!pop_stack(env, NULL, NULL, false));
 1892	return NULL;
 1893}
 1894
 1895
 1896enum reg_arg_type {
 1897	SRC_OP,		/* register is used as source operand */
 1898	DST_OP,		/* register is used as destination operand */
 1899	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 1900};
 1901
 1902static int cmp_subprogs(const void *a, const void *b)
 1903{
 1904	return ((struct bpf_subprog_info *)a)->start -
 1905	       ((struct bpf_subprog_info *)b)->start;
 1906}
 1907
 1908static int find_subprog(struct bpf_verifier_env *env, int off)
 1909{
 1910	struct bpf_subprog_info *p;
 1911
 1912	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
 1913		    sizeof(env->subprog_info[0]), cmp_subprogs);
 1914	if (!p)
 1915		return -ENOENT;
 1916	return p - env->subprog_info;
 1917
 1918}
 1919
 1920static int add_subprog(struct bpf_verifier_env *env, int off)
 1921{
 1922	int insn_cnt = env->prog->len;
 1923	int ret;
 1924
 1925	if (off >= insn_cnt || off < 0) {
 1926		verbose(env, "call to invalid destination\n");
 1927		return -EINVAL;
 1928	}
 1929	ret = find_subprog(env, off);
 1930	if (ret >= 0)
 1931		return ret;
 1932	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
 1933		verbose(env, "too many subprograms\n");
 1934		return -E2BIG;
 1935	}
 1936	/* determine subprog starts. The end is one before the next starts */
 1937	env->subprog_info[env->subprog_cnt++].start = off;
 1938	sort(env->subprog_info, env->subprog_cnt,
 1939	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
 1940	return env->subprog_cnt - 1;
 1941}
 1942
 1943#define MAX_KFUNC_DESCS 256
 1944#define MAX_KFUNC_BTFS	256
 1945
 1946struct bpf_kfunc_desc {
 1947	struct btf_func_model func_model;
 1948	u32 func_id;
 1949	s32 imm;
 1950	u16 offset;
 1951};
 1952
 1953struct bpf_kfunc_btf {
 1954	struct btf *btf;
 1955	struct module *module;
 1956	u16 offset;
 1957};
 1958
 1959struct bpf_kfunc_desc_tab {
 1960	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
 1961	u32 nr_descs;
 1962};
 1963
 1964struct bpf_kfunc_btf_tab {
 1965	struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
 1966	u32 nr_descs;
 1967};
 1968
 1969static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
 1970{
 1971	const struct bpf_kfunc_desc *d0 = a;
 1972	const struct bpf_kfunc_desc *d1 = b;
 1973
 1974	/* func_id is not greater than BTF_MAX_TYPE */
 1975	return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
 1976}
 1977
 1978static int kfunc_btf_cmp_by_off(const void *a, const void *b)
 1979{
 1980	const struct bpf_kfunc_btf *d0 = a;
 1981	const struct bpf_kfunc_btf *d1 = b;
 1982
 1983	return d0->offset - d1->offset;
 1984}
 1985
 1986static const struct bpf_kfunc_desc *
 1987find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
 1988{
 1989	struct bpf_kfunc_desc desc = {
 1990		.func_id = func_id,
 1991		.offset = offset,
 1992	};
 1993	struct bpf_kfunc_desc_tab *tab;
 1994
 1995	tab = prog->aux->kfunc_tab;
 1996	return bsearch(&desc, tab->descs, tab->nr_descs,
 1997		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
 1998}
 1999
 2000static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
 2001					 s16 offset)
 2002{
 2003	struct bpf_kfunc_btf kf_btf = { .offset = offset };
 2004	struct bpf_kfunc_btf_tab *tab;
 2005	struct bpf_kfunc_btf *b;
 2006	struct module *mod;
 2007	struct btf *btf;
 2008	int btf_fd;
 2009
 2010	tab = env->prog->aux->kfunc_btf_tab;
 2011	b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
 2012		    sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
 2013	if (!b) {
 2014		if (tab->nr_descs == MAX_KFUNC_BTFS) {
 2015			verbose(env, "too many different module BTFs\n");
 2016			return ERR_PTR(-E2BIG);
 2017		}
 2018
 2019		if (bpfptr_is_null(env->fd_array)) {
 2020			verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
 2021			return ERR_PTR(-EPROTO);
 2022		}
 2023
 2024		if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
 2025					    offset * sizeof(btf_fd),
 2026					    sizeof(btf_fd)))
 2027			return ERR_PTR(-EFAULT);
 2028
 2029		btf = btf_get_by_fd(btf_fd);
 2030		if (IS_ERR(btf)) {
 2031			verbose(env, "invalid module BTF fd specified\n");
 2032			return btf;
 2033		}
 2034
 2035		if (!btf_is_module(btf)) {
 2036			verbose(env, "BTF fd for kfunc is not a module BTF\n");
 2037			btf_put(btf);
 2038			return ERR_PTR(-EINVAL);
 2039		}
 2040
 2041		mod = btf_try_get_module(btf);
 2042		if (!mod) {
 2043			btf_put(btf);
 2044			return ERR_PTR(-ENXIO);
 2045		}
 2046
 2047		b = &tab->descs[tab->nr_descs++];
 2048		b->btf = btf;
 2049		b->module = mod;
 2050		b->offset = offset;
 2051
 2052		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 2053		     kfunc_btf_cmp_by_off, NULL);
 2054	}
 2055	return b->btf;
 2056}
 2057
 2058void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
 2059{
 2060	if (!tab)
 2061		return;
 2062
 2063	while (tab->nr_descs--) {
 2064		module_put(tab->descs[tab->nr_descs].module);
 2065		btf_put(tab->descs[tab->nr_descs].btf);
 2066	}
 2067	kfree(tab);
 2068}
 2069
 2070static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
 2071{
 2072	if (offset) {
 2073		if (offset < 0) {
 2074			/* In the future, this can be allowed to increase limit
 2075			 * of fd index into fd_array, interpreted as u16.
 2076			 */
 2077			verbose(env, "negative offset disallowed for kernel module function call\n");
 2078			return ERR_PTR(-EINVAL);
 2079		}
 2080
 2081		return __find_kfunc_desc_btf(env, offset);
 2082	}
 2083	return btf_vmlinux ?: ERR_PTR(-ENOENT);
 2084}
 2085
 2086static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
 2087{
 2088	const struct btf_type *func, *func_proto;
 2089	struct bpf_kfunc_btf_tab *btf_tab;
 2090	struct bpf_kfunc_desc_tab *tab;
 2091	struct bpf_prog_aux *prog_aux;
 2092	struct bpf_kfunc_desc *desc;
 2093	const char *func_name;
 2094	struct btf *desc_btf;
 2095	unsigned long call_imm;
 2096	unsigned long addr;
 2097	int err;
 2098
 2099	prog_aux = env->prog->aux;
 2100	tab = prog_aux->kfunc_tab;
 2101	btf_tab = prog_aux->kfunc_btf_tab;
 2102	if (!tab) {
 2103		if (!btf_vmlinux) {
 2104			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
 2105			return -ENOTSUPP;
 2106		}
 2107
 2108		if (!env->prog->jit_requested) {
 2109			verbose(env, "JIT is required for calling kernel function\n");
 2110			return -ENOTSUPP;
 2111		}
 2112
 2113		if (!bpf_jit_supports_kfunc_call()) {
 2114			verbose(env, "JIT does not support calling kernel function\n");
 2115			return -ENOTSUPP;
 2116		}
 2117
 2118		if (!env->prog->gpl_compatible) {
 2119			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
 2120			return -EINVAL;
 2121		}
 2122
 2123		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
 2124		if (!tab)
 2125			return -ENOMEM;
 2126		prog_aux->kfunc_tab = tab;
 2127	}
 2128
 2129	/* func_id == 0 is always invalid, but instead of returning an error, be
 2130	 * conservative and wait until the code elimination pass before returning
 2131	 * error, so that invalid calls that get pruned out can be in BPF programs
 2132	 * loaded from userspace.  It is also required that offset be untouched
 2133	 * for such calls.
 2134	 */
 2135	if (!func_id && !offset)
 2136		return 0;
 2137
 2138	if (!btf_tab && offset) {
 2139		btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
 2140		if (!btf_tab)
 2141			return -ENOMEM;
 2142		prog_aux->kfunc_btf_tab = btf_tab;
 2143	}
 2144
 2145	desc_btf = find_kfunc_desc_btf(env, offset);
 2146	if (IS_ERR(desc_btf)) {
 2147		verbose(env, "failed to find BTF for kernel function\n");
 2148		return PTR_ERR(desc_btf);
 2149	}
 2150
 2151	if (find_kfunc_desc(env->prog, func_id, offset))
 2152		return 0;
 2153
 2154	if (tab->nr_descs == MAX_KFUNC_DESCS) {
 2155		verbose(env, "too many different kernel function calls\n");
 2156		return -E2BIG;
 2157	}
 2158
 2159	func = btf_type_by_id(desc_btf, func_id);
 2160	if (!func || !btf_type_is_func(func)) {
 2161		verbose(env, "kernel btf_id %u is not a function\n",
 2162			func_id);
 2163		return -EINVAL;
 2164	}
 2165	func_proto = btf_type_by_id(desc_btf, func->type);
 2166	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
 2167		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
 2168			func_id);
 2169		return -EINVAL;
 2170	}
 2171
 2172	func_name = btf_name_by_offset(desc_btf, func->name_off);
 2173	addr = kallsyms_lookup_name(func_name);
 2174	if (!addr) {
 2175		verbose(env, "cannot find address for kernel function %s\n",
 2176			func_name);
 2177		return -EINVAL;
 2178	}
 2179
 2180	call_imm = BPF_CALL_IMM(addr);
 2181	/* Check whether or not the relative offset overflows desc->imm */
 2182	if ((unsigned long)(s32)call_imm != call_imm) {
 2183		verbose(env, "address of kernel function %s is out of range\n",
 2184			func_name);
 2185		return -EINVAL;
 2186	}
 2187
 2188	desc = &tab->descs[tab->nr_descs++];
 2189	desc->func_id = func_id;
 2190	desc->imm = call_imm;
 2191	desc->offset = offset;
 2192	err = btf_distill_func_proto(&env->log, desc_btf,
 2193				     func_proto, func_name,
 2194				     &desc->func_model);
 2195	if (!err)
 2196		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 2197		     kfunc_desc_cmp_by_id_off, NULL);
 2198	return err;
 2199}
 2200
 2201static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
 2202{
 2203	const struct bpf_kfunc_desc *d0 = a;
 2204	const struct bpf_kfunc_desc *d1 = b;
 2205
 2206	if (d0->imm > d1->imm)
 2207		return 1;
 2208	else if (d0->imm < d1->imm)
 2209		return -1;
 2210	return 0;
 2211}
 2212
 2213static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
 2214{
 2215	struct bpf_kfunc_desc_tab *tab;
 2216
 2217	tab = prog->aux->kfunc_tab;
 2218	if (!tab)
 2219		return;
 2220
 2221	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 2222	     kfunc_desc_cmp_by_imm, NULL);
 2223}
 2224
 2225bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
 2226{
 2227	return !!prog->aux->kfunc_tab;
 2228}
 2229
 2230const struct btf_func_model *
 2231bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 2232			 const struct bpf_insn *insn)
 2233{
 2234	const struct bpf_kfunc_desc desc = {
 2235		.imm = insn->imm,
 2236	};
 2237	const struct bpf_kfunc_desc *res;
 2238	struct bpf_kfunc_desc_tab *tab;
 2239
 2240	tab = prog->aux->kfunc_tab;
 2241	res = bsearch(&desc, tab->descs, tab->nr_descs,
 2242		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
 2243
 2244	return res ? &res->func_model : NULL;
 2245}
 2246
 2247static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
 2248{
 2249	struct bpf_subprog_info *subprog = env->subprog_info;
 2250	struct bpf_insn *insn = env->prog->insnsi;
 2251	int i, ret, insn_cnt = env->prog->len;
 2252
 2253	/* Add entry function. */
 2254	ret = add_subprog(env, 0);
 2255	if (ret)
 2256		return ret;
 2257
 2258	for (i = 0; i < insn_cnt; i++, insn++) {
 2259		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
 2260		    !bpf_pseudo_kfunc_call(insn))
 2261			continue;
 2262
 2263		if (!env->bpf_capable) {
 2264			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
 2265			return -EPERM;
 2266		}
 2267
 2268		if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
 2269			ret = add_subprog(env, i + insn->imm + 1);
 2270		else
 2271			ret = add_kfunc_call(env, insn->imm, insn->off);
 2272
 2273		if (ret < 0)
 2274			return ret;
 2275	}
 2276
 2277	/* Add a fake 'exit' subprog which could simplify subprog iteration
 2278	 * logic. 'subprog_cnt' should not be increased.
 2279	 */
 2280	subprog[env->subprog_cnt].start = insn_cnt;
 2281
 2282	if (env->log.level & BPF_LOG_LEVEL2)
 2283		for (i = 0; i < env->subprog_cnt; i++)
 2284			verbose(env, "func#%d @%d\n", i, subprog[i].start);
 2285
 2286	return 0;
 2287}
 2288
 2289static int check_subprogs(struct bpf_verifier_env *env)
 2290{
 2291	int i, subprog_start, subprog_end, off, cur_subprog = 0;
 2292	struct bpf_subprog_info *subprog = env->subprog_info;
 2293	struct bpf_insn *insn = env->prog->insnsi;
 2294	int insn_cnt = env->prog->len;
 2295
 2296	/* now check that all jumps are within the same subprog */
 2297	subprog_start = subprog[cur_subprog].start;
 2298	subprog_end = subprog[cur_subprog + 1].start;
 2299	for (i = 0; i < insn_cnt; i++) {
 2300		u8 code = insn[i].code;
 2301
 2302		if (code == (BPF_JMP | BPF_CALL) &&
 2303		    insn[i].imm == BPF_FUNC_tail_call &&
 2304		    insn[i].src_reg != BPF_PSEUDO_CALL)
 2305			subprog[cur_subprog].has_tail_call = true;
 2306		if (BPF_CLASS(code) == BPF_LD &&
 2307		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
 2308			subprog[cur_subprog].has_ld_abs = true;
 2309		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
 2310			goto next;
 2311		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
 2312			goto next;
 2313		off = i + insn[i].off + 1;
 2314		if (off < subprog_start || off >= subprog_end) {
 2315			verbose(env, "jump out of range from insn %d to %d\n", i, off);
 2316			return -EINVAL;
 2317		}
 2318next:
 2319		if (i == subprog_end - 1) {
 2320			/* to avoid fall-through from one subprog into another
 2321			 * the last insn of the subprog should be either exit
 2322			 * or unconditional jump back
 2323			 */
 2324			if (code != (BPF_JMP | BPF_EXIT) &&
 2325			    code != (BPF_JMP | BPF_JA)) {
 2326				verbose(env, "last insn is not an exit or jmp\n");
 2327				return -EINVAL;
 2328			}
 2329			subprog_start = subprog_end;
 2330			cur_subprog++;
 2331			if (cur_subprog < env->subprog_cnt)
 2332				subprog_end = subprog[cur_subprog + 1].start;
 2333		}
 2334	}
 2335	return 0;
 2336}
 2337
 2338/* Parentage chain of this register (or stack slot) should take care of all
 2339 * issues like callee-saved registers, stack slot allocation time, etc.
 2340 */
 2341static int mark_reg_read(struct bpf_verifier_env *env,
 2342			 const struct bpf_reg_state *state,
 2343			 struct bpf_reg_state *parent, u8 flag)
 2344{
 2345	bool writes = parent == state->parent; /* Observe write marks */
 2346	int cnt = 0;
 2347
 2348	while (parent) {
 2349		/* if read wasn't screened by an earlier write ... */
 2350		if (writes && state->live & REG_LIVE_WRITTEN)
 2351			break;
 2352		if (parent->live & REG_LIVE_DONE) {
 2353			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
 2354				reg_type_str(env, parent->type),
 2355				parent->var_off.value, parent->off);
 2356			return -EFAULT;
 2357		}
 2358		/* The first condition is more likely to be true than the
 2359		 * second, checked it first.
 2360		 */
 2361		if ((parent->live & REG_LIVE_READ) == flag ||
 2362		    parent->live & REG_LIVE_READ64)
 2363			/* The parentage chain never changes and
 2364			 * this parent was already marked as LIVE_READ.
 2365			 * There is no need to keep walking the chain again and
 2366			 * keep re-marking all parents as LIVE_READ.
 2367			 * This case happens when the same register is read
 2368			 * multiple times without writes into it in-between.
 2369			 * Also, if parent has the stronger REG_LIVE_READ64 set,
 2370			 * then no need to set the weak REG_LIVE_READ32.
 2371			 */
 2372			break;
 2373		/* ... then we depend on parent's value */
 2374		parent->live |= flag;
 2375		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
 2376		if (flag == REG_LIVE_READ64)
 2377			parent->live &= ~REG_LIVE_READ32;
 2378		state = parent;
 2379		parent = state->parent;
 2380		writes = true;
 2381		cnt++;
 2382	}
 2383
 2384	if (env->longest_mark_read_walk < cnt)
 2385		env->longest_mark_read_walk = cnt;
 2386	return 0;
 2387}
 2388
 2389/* This function is supposed to be used by the following 32-bit optimization
 2390 * code only. It returns TRUE if the source or destination register operates
 2391 * on 64-bit, otherwise return FALSE.
 2392 */
 2393static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
 2394		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
 2395{
 2396	u8 code, class, op;
 2397
 2398	code = insn->code;
 2399	class = BPF_CLASS(code);
 2400	op = BPF_OP(code);
 2401	if (class == BPF_JMP) {
 2402		/* BPF_EXIT for "main" will reach here. Return TRUE
 2403		 * conservatively.
 2404		 */
 2405		if (op == BPF_EXIT)
 2406			return true;
 2407		if (op == BPF_CALL) {
 2408			/* BPF to BPF call will reach here because of marking
 2409			 * caller saved clobber with DST_OP_NO_MARK for which we
 2410			 * don't care the register def because they are anyway
 2411			 * marked as NOT_INIT already.
 2412			 */
 2413			if (insn->src_reg == BPF_PSEUDO_CALL)
 2414				return false;
 2415			/* Helper call will reach here because of arg type
 2416			 * check, conservatively return TRUE.
 2417			 */
 2418			if (t == SRC_OP)
 2419				return true;
 2420
 2421			return false;
 2422		}
 2423	}
 2424
 2425	if (class == BPF_ALU64 || class == BPF_JMP ||
 2426	    /* BPF_END always use BPF_ALU class. */
 2427	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
 2428		return true;
 2429
 2430	if (class == BPF_ALU || class == BPF_JMP32)
 2431		return false;
 2432
 2433	if (class == BPF_LDX) {
 2434		if (t != SRC_OP)
 2435			return BPF_SIZE(code) == BPF_DW;
 2436		/* LDX source must be ptr. */
 2437		return true;
 2438	}
 2439
 2440	if (class == BPF_STX) {
 2441		/* BPF_STX (including atomic variants) has multiple source
 2442		 * operands, one of which is a ptr. Check whether the caller is
 2443		 * asking about it.
 2444		 */
 2445		if (t == SRC_OP && reg->type != SCALAR_VALUE)
 2446			return true;
 2447		return BPF_SIZE(code) == BPF_DW;
 2448	}
 2449
 2450	if (class == BPF_LD) {
 2451		u8 mode = BPF_MODE(code);
 2452
 2453		/* LD_IMM64 */
 2454		if (mode == BPF_IMM)
 2455			return true;
 2456
 2457		/* Both LD_IND and LD_ABS return 32-bit data. */
 2458		if (t != SRC_OP)
 2459			return  false;
 2460
 2461		/* Implicit ctx ptr. */
 2462		if (regno == BPF_REG_6)
 2463			return true;
 2464
 2465		/* Explicit source could be any width. */
 2466		return true;
 2467	}
 2468
 2469	if (class == BPF_ST)
 2470		/* The only source register for BPF_ST is a ptr. */
 2471		return true;
 2472
 2473	/* Conservatively return true at default. */
 2474	return true;
 2475}
 2476
 2477/* Return the regno defined by the insn, or -1. */
 2478static int insn_def_regno(const struct bpf_insn *insn)
 2479{
 2480	switch (BPF_CLASS(insn->code)) {
 2481	case BPF_JMP:
 2482	case BPF_JMP32:
 2483	case BPF_ST:
 2484		return -1;
 2485	case BPF_STX:
 2486		if (BPF_MODE(insn->code) == BPF_ATOMIC &&
 2487		    (insn->imm & BPF_FETCH)) {
 2488			if (insn->imm == BPF_CMPXCHG)
 2489				return BPF_REG_0;
 2490			else
 2491				return insn->src_reg;
 2492		} else {
 2493			return -1;
 2494		}
 2495	default:
 2496		return insn->dst_reg;
 2497	}
 2498}
 2499
 2500/* Return TRUE if INSN has defined any 32-bit value explicitly. */
 2501static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
 2502{
 2503	int dst_reg = insn_def_regno(insn);
 2504
 2505	if (dst_reg == -1)
 2506		return false;
 2507
 2508	return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
 2509}
 2510
 2511static void mark_insn_zext(struct bpf_verifier_env *env,
 2512			   struct bpf_reg_state *reg)
 2513{
 2514	s32 def_idx = reg->subreg_def;
 2515
 2516	if (def_idx == DEF_NOT_SUBREG)
 2517		return;
 2518
 2519	env->insn_aux_data[def_idx - 1].zext_dst = true;
 2520	/* The dst will be zero extended, so won't be sub-register anymore. */
 2521	reg->subreg_def = DEF_NOT_SUBREG;
 2522}
 2523
 2524static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 2525			 enum reg_arg_type t)
 2526{
 2527	struct bpf_verifier_state *vstate = env->cur_state;
 2528	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 2529	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
 2530	struct bpf_reg_state *reg, *regs = state->regs;
 2531	bool rw64;
 2532
 2533	if (regno >= MAX_BPF_REG) {
 2534		verbose(env, "R%d is invalid\n", regno);
 2535		return -EINVAL;
 2536	}
 2537
 2538	mark_reg_scratched(env, regno);
 2539
 2540	reg = &regs[regno];
 2541	rw64 = is_reg64(env, insn, regno, reg, t);
 2542	if (t == SRC_OP) {
 2543		/* check whether register used as source operand can be read */
 2544		if (reg->type == NOT_INIT) {
 2545			verbose(env, "R%d !read_ok\n", regno);
 2546			return -EACCES;
 2547		}
 2548		/* We don't need to worry about FP liveness because it's read-only */
 2549		if (regno == BPF_REG_FP)
 2550			return 0;
 2551
 2552		if (rw64)
 2553			mark_insn_zext(env, reg);
 2554
 2555		return mark_reg_read(env, reg, reg->parent,
 2556				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
 2557	} else {
 2558		/* check whether register used as dest operand can be written to */
 2559		if (regno == BPF_REG_FP) {
 2560			verbose(env, "frame pointer is read only\n");
 2561			return -EACCES;
 2562		}
 2563		reg->live |= REG_LIVE_WRITTEN;
 2564		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
 2565		if (t == DST_OP)
 2566			mark_reg_unknown(env, regs, regno);
 2567	}
 2568	return 0;
 2569}
 2570
 2571static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
 2572{
 2573	env->insn_aux_data[idx].jmp_point = true;
 2574}
 2575
 2576static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
 2577{
 2578	return env->insn_aux_data[insn_idx].jmp_point;
 2579}
 2580
 2581/* for any branch, call, exit record the history of jmps in the given state */
 2582static int push_jmp_history(struct bpf_verifier_env *env,
 2583			    struct bpf_verifier_state *cur)
 2584{
 2585	u32 cnt = cur->jmp_history_cnt;
 2586	struct bpf_idx_pair *p;
 2587	size_t alloc_size;
 2588
 2589	if (!is_jmp_point(env, env->insn_idx))
 2590		return 0;
 2591
 2592	cnt++;
 2593	alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
 2594	p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
 2595	if (!p)
 2596		return -ENOMEM;
 2597	p[cnt - 1].idx = env->insn_idx;
 2598	p[cnt - 1].prev_idx = env->prev_insn_idx;
 2599	cur->jmp_history = p;
 2600	cur->jmp_history_cnt = cnt;
 2601	return 0;
 2602}
 2603
 2604/* Backtrack one insn at a time. If idx is not at the top of recorded
 2605 * history then previous instruction came from straight line execution.
 2606 */
 2607static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
 2608			     u32 *history)
 2609{
 2610	u32 cnt = *history;
 2611
 2612	if (cnt && st->jmp_history[cnt - 1].idx == i) {
 2613		i = st->jmp_history[cnt - 1].prev_idx;
 2614		(*history)--;
 2615	} else {
 2616		i--;
 2617	}
 2618	return i;
 2619}
 2620
 2621static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 2622{
 2623	const struct btf_type *func;
 2624	struct btf *desc_btf;
 2625
 2626	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
 2627		return NULL;
 2628
 2629	desc_btf = find_kfunc_desc_btf(data, insn->off);
 2630	if (IS_ERR(desc_btf))
 2631		return "<error>";
 2632
 2633	func = btf_type_by_id(desc_btf, insn->imm);
 2634	return btf_name_by_offset(desc_btf, func->name_off);
 2635}
 2636
 2637/* For given verifier state backtrack_insn() is called from the last insn to
 2638 * the first insn. Its purpose is to compute a bitmask of registers and
 2639 * stack slots that needs precision in the parent verifier state.
 2640 */
 2641static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 2642			  u32 *reg_mask, u64 *stack_mask)
 2643{
 2644	const struct bpf_insn_cbs cbs = {
 2645		.cb_call	= disasm_kfunc_name,
 2646		.cb_print	= verbose,
 2647		.private_data	= env,
 2648	};
 2649	struct bpf_insn *insn = env->prog->insnsi + idx;
 2650	u8 class = BPF_CLASS(insn->code);
 2651	u8 opcode = BPF_OP(insn->code);
 2652	u8 mode = BPF_MODE(insn->code);
 2653	u32 dreg = 1u << insn->dst_reg;
 2654	u32 sreg = 1u << insn->src_reg;
 2655	u32 spi;
 2656
 2657	if (insn->code == 0)
 2658		return 0;
 2659	if (env->log.level & BPF_LOG_LEVEL2) {
 2660		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
 2661		verbose(env, "%d: ", idx);
 2662		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 2663	}
 2664
 2665	if (class == BPF_ALU || class == BPF_ALU64) {
 2666		if (!(*reg_mask & dreg))
 2667			return 0;
 2668		if (opcode == BPF_MOV) {
 2669			if (BPF_SRC(insn->code) == BPF_X) {
 2670				/* dreg = sreg
 2671				 * dreg needs precision after this insn
 2672				 * sreg needs precision before this insn
 2673				 */
 2674				*reg_mask &= ~dreg;
 2675				*reg_mask |= sreg;
 2676			} else {
 2677				/* dreg = K
 2678				 * dreg needs precision after this insn.
 2679				 * Corresponding register is already marked
 2680				 * as precise=true in this verifier state.
 2681				 * No further markings in parent are necessary
 2682				 */
 2683				*reg_mask &= ~dreg;
 2684			}
 2685		} else {
 2686			if (BPF_SRC(insn->code) == BPF_X) {
 2687				/* dreg += sreg
 2688				 * both dreg and sreg need precision
 2689				 * before this insn
 2690				 */
 2691				*reg_mask |= sreg;
 2692			} /* else dreg += K
 2693			   * dreg still needs precision before this insn
 2694			   */
 2695		}
 2696	} else if (class == BPF_LDX) {
 2697		if (!(*reg_mask & dreg))
 2698			return 0;
 2699		*reg_mask &= ~dreg;
 2700
 2701		/* scalars can only be spilled into stack w/o losing precision.
 2702		 * Load from any other memory can be zero extended.
 2703		 * The desire to keep that precision is already indicated
 2704		 * by 'precise' mark in corresponding register of this state.
 2705		 * No further tracking necessary.
 2706		 */
 2707		if (insn->src_reg != BPF_REG_FP)
 2708			return 0;
 2709
 2710		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
 2711		 * that [fp - off] slot contains scalar that needs to be
 2712		 * tracked with precision
 2713		 */
 2714		spi = (-insn->off - 1) / BPF_REG_SIZE;
 2715		if (spi >= 64) {
 2716			verbose(env, "BUG spi %d\n", spi);
 2717			WARN_ONCE(1, "verifier backtracking bug");
 2718			return -EFAULT;
 2719		}
 2720		*stack_mask |= 1ull << spi;
 2721	} else if (class == BPF_STX || class == BPF_ST) {
 2722		if (*reg_mask & dreg)
 2723			/* stx & st shouldn't be using _scalar_ dst_reg
 2724			 * to access memory. It means backtracking
 2725			 * encountered a case of pointer subtraction.
 2726			 */
 2727			return -ENOTSUPP;
 2728		/* scalars can only be spilled into stack */
 2729		if (insn->dst_reg != BPF_REG_FP)
 2730			return 0;
 2731		spi = (-insn->off - 1) / BPF_REG_SIZE;
 2732		if (spi >= 64) {
 2733			verbose(env, "BUG spi %d\n", spi);
 2734			WARN_ONCE(1, "verifier backtracking bug");
 2735			return -EFAULT;
 2736		}
 2737		if (!(*stack_mask & (1ull << spi)))
 2738			return 0;
 2739		*stack_mask &= ~(1ull << spi);
 2740		if (class == BPF_STX)
 2741			*reg_mask |= sreg;
 2742	} else if (class == BPF_JMP || class == BPF_JMP32) {
 2743		if (opcode == BPF_CALL) {
 2744			if (insn->src_reg == BPF_PSEUDO_CALL)
 2745				return -ENOTSUPP;
 2746			/* BPF helpers that invoke callback subprogs are
 2747			 * equivalent to BPF_PSEUDO_CALL above
 2748			 */
 2749			if (insn->src_reg == 0 && is_callback_calling_function(insn->imm))
 2750				return -ENOTSUPP;
 2751			/* kfunc with imm==0 is invalid and fixup_kfunc_call will
 2752			 * catch this error later. Make backtracking conservative
 2753			 * with ENOTSUPP.
 2754			 */
 2755			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
 2756				return -ENOTSUPP;
 2757			/* regular helper call sets R0 */
 2758			*reg_mask &= ~1;
 2759			if (*reg_mask & 0x3f) {
 2760				/* if backtracing was looking for registers R1-R5
 2761				 * they should have been found already.
 2762				 */
 2763				verbose(env, "BUG regs %x\n", *reg_mask);
 2764				WARN_ONCE(1, "verifier backtracking bug");
 2765				return -EFAULT;
 2766			}
 2767		} else if (opcode == BPF_EXIT) {
 2768			return -ENOTSUPP;
 2769		}
 2770	} else if (class == BPF_LD) {
 2771		if (!(*reg_mask & dreg))
 2772			return 0;
 2773		*reg_mask &= ~dreg;
 2774		/* It's ld_imm64 or ld_abs or ld_ind.
 2775		 * For ld_imm64 no further tracking of precision
 2776		 * into parent is necessary
 2777		 */
 2778		if (mode == BPF_IND || mode == BPF_ABS)
 2779			/* to be analyzed */
 2780			return -ENOTSUPP;
 2781	}
 2782	return 0;
 2783}
 2784
 2785/* the scalar precision tracking algorithm:
 2786 * . at the start all registers have precise=false.
 2787 * . scalar ranges are tracked as normal through alu and jmp insns.
 2788 * . once precise value of the scalar register is used in:
 2789 *   .  ptr + scalar alu
 2790 *   . if (scalar cond K|scalar)
 2791 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
 2792 *   backtrack through the verifier states and mark all registers and
 2793 *   stack slots with spilled constants that these scalar regisers
 2794 *   should be precise.
 2795 * . during state pruning two registers (or spilled stack slots)
 2796 *   are equivalent if both are not precise.
 2797 *
 2798 * Note the verifier cannot simply walk register parentage chain,
 2799 * since many different registers and stack slots could have been
 2800 * used to compute single precise scalar.
 2801 *
 2802 * The approach of starting with precise=true for all registers and then
 2803 * backtrack to mark a register as not precise when the verifier detects
 2804 * that program doesn't care about specific value (e.g., when helper
 2805 * takes register as ARG_ANYTHING parameter) is not safe.
 2806 *
 2807 * It's ok to walk single parentage chain of the verifier states.
 2808 * It's possible that this backtracking will go all the way till 1st insn.
 2809 * All other branches will be explored for needing precision later.
 2810 *
 2811 * The backtracking needs to deal with cases like:
 2812 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
 2813 * r9 -= r8
 2814 * r5 = r9
 2815 * if r5 > 0x79f goto pc+7
 2816 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
 2817 * r5 += 1
 2818 * ...
 2819 * call bpf_perf_event_output#25
 2820 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
 2821 *
 2822 * and this case:
 2823 * r6 = 1
 2824 * call foo // uses callee's r6 inside to compute r0
 2825 * r0 += r6
 2826 * if r0 == 0 goto
 2827 *
 2828 * to track above reg_mask/stack_mask needs to be independent for each frame.
 2829 *
 2830 * Also if parent's curframe > frame where backtracking started,
 2831 * the verifier need to mark registers in both frames, otherwise callees
 2832 * may incorrectly prune callers. This is similar to
 2833 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
 2834 *
 2835 * For now backtracking falls back into conservative marking.
 2836 */
 2837static void mark_all_scalars_precise(struct bpf_verifier_env *env,
 2838				     struct bpf_verifier_state *st)
 2839{
 2840	struct bpf_func_state *func;
 2841	struct bpf_reg_state *reg;
 2842	int i, j;
 2843
 2844	/* big hammer: mark all scalars precise in this path.
 2845	 * pop_stack may still get !precise scalars.
 2846	 * We also skip current state and go straight to first parent state,
 2847	 * because precision markings in current non-checkpointed state are
 2848	 * not needed. See why in the comment in __mark_chain_precision below.
 2849	 */
 2850	for (st = st->parent; st; st = st->parent) {
 2851		for (i = 0; i <= st->curframe; i++) {
 2852			func = st->frame[i];
 2853			for (j = 0; j < BPF_REG_FP; j++) {
 2854				reg = &func->regs[j];
 2855				if (reg->type != SCALAR_VALUE)
 2856					continue;
 2857				reg->precise = true;
 2858			}
 2859			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
 2860				if (!is_spilled_reg(&func->stack[j]))
 2861					continue;
 2862				reg = &func->stack[j].spilled_ptr;
 2863				if (reg->type != SCALAR_VALUE)
 2864					continue;
 2865				reg->precise = true;
 2866			}
 2867		}
 2868	}
 2869}
 2870
 2871static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 2872{
 2873	struct bpf_func_state *func;
 2874	struct bpf_reg_state *reg;
 2875	int i, j;
 2876
 2877	for (i = 0; i <= st->curframe; i++) {
 2878		func = st->frame[i];
 2879		for (j = 0; j < BPF_REG_FP; j++) {
 2880			reg = &func->regs[j];
 2881			if (reg->type != SCALAR_VALUE)
 2882				continue;
 2883			reg->precise = false;
 2884		}
 2885		for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
 2886			if (!is_spilled_reg(&func->stack[j]))
 2887				continue;
 2888			reg = &func->stack[j].spilled_ptr;
 2889			if (reg->type != SCALAR_VALUE)
 2890				continue;
 2891			reg->precise = false;
 2892		}
 2893	}
 2894}
 2895
 2896/*
 2897 * __mark_chain_precision() backtracks BPF program instruction sequence and
 2898 * chain of verifier states making sure that register *regno* (if regno >= 0)
 2899 * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
 2900 * SCALARS, as well as any other registers and slots that contribute to
 2901 * a tracked state of given registers/stack slots, depending on specific BPF
 2902 * assembly instructions (see backtrack_insns() for exact instruction handling
 2903 * logic). This backtracking relies on recorded jmp_history and is able to
 2904 * traverse entire chain of parent states. This process ends only when all the
 2905 * necessary registers/slots and their transitive dependencies are marked as
 2906 * precise.
 2907 *
 2908 * One important and subtle aspect is that precise marks *do not matter* in
 2909 * the currently verified state (current state). It is important to understand
 2910 * why this is the case.
 2911 *
 2912 * First, note that current state is the state that is not yet "checkpointed",
 2913 * i.e., it is not yet put into env->explored_states, and it has no children
 2914 * states as well. It's ephemeral, and can end up either a) being discarded if
 2915 * compatible explored state is found at some point or BPF_EXIT instruction is
 2916 * reached or b) checkpointed and put into env->explored_states, branching out
 2917 * into one or more children states.
 2918 *
 2919 * In the former case, precise markings in current state are completely
 2920 * ignored by state comparison code (see regsafe() for details). Only
 2921 * checkpointed ("old") state precise markings are important, and if old
 2922 * state's register/slot is precise, regsafe() assumes current state's
 2923 * register/slot as precise and checks value ranges exactly and precisely. If
 2924 * states turn out to be compatible, current state's necessary precise
 2925 * markings and any required parent states' precise markings are enforced
 2926 * after the fact with propagate_precision() logic, after the fact. But it's
 2927 * important to realize that in this case, even after marking current state
 2928 * registers/slots as precise, we immediately discard current state. So what
 2929 * actually matters is any of the precise markings propagated into current
 2930 * state's parent states, which are always checkpointed (due to b) case above).
 2931 * As such, for scenario a) it doesn't matter if current state has precise
 2932 * markings set or not.
 2933 *
 2934 * Now, for the scenario b), checkpointing and forking into child(ren)
 2935 * state(s). Note that before current state gets to checkpointing step, any
 2936 * processed instruction always assumes precise SCALAR register/slot
 2937 * knowledge: if precise value or range is useful to prune jump branch, BPF
 2938 * verifier takes this opportunity enthusiastically. Similarly, when
 2939 * register's value is used to calculate offset or memory address, exact
 2940 * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
 2941 * what we mentioned above about state comparison ignoring precise markings
 2942 * during state comparison, BPF verifier ignores and also assumes precise
 2943 * markings *at will* during instruction verification process. But as verifier
 2944 * assumes precision, it also propagates any precision dependencies across
 2945 * parent states, which are not yet finalized, so can be further restricted
 2946 * based on new knowledge gained from restrictions enforced by their children
 2947 * states. This is so that once those parent states are finalized, i.e., when
 2948 * they have no more active children state, state comparison logic in
 2949 * is_state_visited() would enforce strict and precise SCALAR ranges, if
 2950 * required for correctness.
 2951 *
 2952 * To build a bit more intuition, note also that once a state is checkpointed,
 2953 * the path we took to get to that state is not important. This is crucial
 2954 * property for state pruning. When state is checkpointed and finalized at
 2955 * some instruction index, it can be correctly and safely used to "short
 2956 * circuit" any *compatible* state that reaches exactly the same instruction
 2957 * index. I.e., if we jumped to that instruction from a completely different
 2958 * code path than original finalized state was derived from, it doesn't
 2959 * matter, current state can be discarded because from that instruction
 2960 * forward having a compatible state will ensure we will safely reach the
 2961 * exit. States describe preconditions for further exploration, but completely
 2962 * forget the history of how we got here.
 2963 *
 2964 * This also means that even if we needed precise SCALAR range to get to
 2965 * finalized state, but from that point forward *that same* SCALAR register is
 2966 * never used in a precise context (i.e., it's precise value is not needed for
 2967 * correctness), it's correct and safe to mark such register as "imprecise"
 2968 * (i.e., precise marking set to false). This is what we rely on when we do
 2969 * not set precise marking in current state. If no child state requires
 2970 * precision for any given SCALAR register, it's safe to dictate that it can
 2971 * be imprecise. If any child state does require this register to be precise,
 2972 * we'll mark it precise later retroactively during precise markings
 2973 * propagation from child state to parent states.
 2974 *
 2975 * Skipping precise marking setting in current state is a mild version of
 2976 * relying on the above observation. But we can utilize this property even
 2977 * more aggressively by proactively forgetting any precise marking in the
 2978 * current state (which we inherited from the parent state), right before we
 2979 * checkpoint it and branch off into new child state. This is done by
 2980 * mark_all_scalars_imprecise() to hopefully get more permissive and generic
 2981 * finalized states which help in short circuiting more future states.
 2982 */
 2983static int __mark_chain_precision(struct bpf_verifier_env *env, int frame, int regno,
 2984				  int spi)
 2985{
 2986	struct bpf_verifier_state *st = env->cur_state;
 2987	int first_idx = st->first_insn_idx;
 2988	int last_idx = env->insn_idx;
 2989	struct bpf_func_state *func;
 2990	struct bpf_reg_state *reg;
 2991	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
 2992	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
 2993	bool skip_first = true;
 2994	bool new_marks = false;
 2995	int i, err;
 2996
 2997	if (!env->bpf_capable)
 2998		return 0;
 2999
 3000	/* Do sanity checks against current state of register and/or stack
 3001	 * slot, but don't set precise flag in current state, as precision
 3002	 * tracking in the current state is unnecessary.
 3003	 */
 3004	func = st->frame[frame];
 3005	if (regno >= 0) {
 3006		reg = &func->regs[regno];
 3007		if (reg->type != SCALAR_VALUE) {
 3008			WARN_ONCE(1, "backtracing misuse");
 3009			return -EFAULT;
 3010		}
 3011		new_marks = true;
 3012	}
 3013
 3014	while (spi >= 0) {
 3015		if (!is_spilled_reg(&func->stack[spi])) {
 3016			stack_mask = 0;
 3017			break;
 3018		}
 3019		reg = &func->stack[spi].spilled_ptr;
 3020		if (reg->type != SCALAR_VALUE) {
 3021			stack_mask = 0;
 3022			break;
 3023		}
 3024		new_marks = true;
 3025		break;
 3026	}
 3027
 3028	if (!new_marks)
 3029		return 0;
 3030	if (!reg_mask && !stack_mask)
 3031		return 0;
 3032
 3033	for (;;) {
 3034		DECLARE_BITMAP(mask, 64);
 3035		u32 history = st->jmp_history_cnt;
 3036
 3037		if (env->log.level & BPF_LOG_LEVEL2)
 3038			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
 3039
 3040		if (last_idx < 0) {
 3041			/* we are at the entry into subprog, which
 3042			 * is expected for global funcs, but only if
 3043			 * requested precise registers are R1-R5
 3044			 * (which are global func's input arguments)
 3045			 */
 3046			if (st->curframe == 0 &&
 3047			    st->frame[0]->subprogno > 0 &&
 3048			    st->frame[0]->callsite == BPF_MAIN_FUNC &&
 3049			    stack_mask == 0 && (reg_mask & ~0x3e) == 0) {
 3050				bitmap_from_u64(mask, reg_mask);
 3051				for_each_set_bit(i, mask, 32) {
 3052					reg = &st->frame[0]->regs[i];
 3053					if (reg->type != SCALAR_VALUE) {
 3054						reg_mask &= ~(1u << i);
 3055						continue;
 3056					}
 3057					reg->precise = true;
 3058				}
 3059				return 0;
 3060			}
 3061
 3062			verbose(env, "BUG backtracing func entry subprog %d reg_mask %x stack_mask %llx\n",
 3063				st->frame[0]->subprogno, reg_mask, stack_mask);
 3064			WARN_ONCE(1, "verifier backtracking bug");
 3065			return -EFAULT;
 3066		}
 3067
 3068		for (i = last_idx;;) {
 3069			if (skip_first) {
 3070				err = 0;
 3071				skip_first = false;
 3072			} else {
 3073				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
 3074			}
 3075			if (err == -ENOTSUPP) {
 3076				mark_all_scalars_precise(env, st);
 3077				return 0;
 3078			} else if (err) {
 3079				return err;
 3080			}
 3081			if (!reg_mask && !stack_mask)
 3082				/* Found assignment(s) into tracked register in this state.
 3083				 * Since this state is already marked, just return.
 3084				 * Nothing to be tracked further in the parent state.
 3085				 */
 3086				return 0;
 3087			if (i == first_idx)
 3088				break;
 3089			i = get_prev_insn_idx(st, i, &history);
 3090			if (i >= env->prog->len) {
 3091				/* This can happen if backtracking reached insn 0
 3092				 * and there are still reg_mask or stack_mask
 3093				 * to backtrack.
 3094				 * It means the backtracking missed the spot where
 3095				 * particular register was initialized with a constant.
 3096				 */
 3097				verbose(env, "BUG backtracking idx %d\n", i);
 3098				WARN_ONCE(1, "verifier backtracking bug");
 3099				return -EFAULT;
 3100			}
 3101		}
 3102		st = st->parent;
 3103		if (!st)
 3104			break;
 3105
 3106		new_marks = false;
 3107		func = st->frame[frame];
 3108		bitmap_from_u64(mask, reg_mask);
 3109		for_each_set_bit(i, mask, 32) {
 3110			reg = &func->regs[i];
 3111			if (reg->type != SCALAR_VALUE) {
 3112				reg_mask &= ~(1u << i);
 3113				continue;
 3114			}
 3115			if (!reg->precise)
 3116				new_marks = true;
 3117			reg->precise = true;
 3118		}
 3119
 3120		bitmap_from_u64(mask, stack_mask);
 3121		for_each_set_bit(i, mask, 64) {
 3122			if (i >= func->allocated_stack / BPF_REG_SIZE) {
 3123				/* the sequence of instructions:
 3124				 * 2: (bf) r3 = r10
 3125				 * 3: (7b) *(u64 *)(r3 -8) = r0
 3126				 * 4: (79) r4 = *(u64 *)(r10 -8)
 3127				 * doesn't contain jmps. It's backtracked
 3128				 * as a single block.
 3129				 * During backtracking insn 3 is not recognized as
 3130				 * stack access, so at the end of backtracking
 3131				 * stack slot fp-8 is still marked in stack_mask.
 3132				 * However the parent state may not have accessed
 3133				 * fp-8 and it's "unallocated" stack space.
 3134				 * In such case fallback to conservative.
 3135				 */
 3136				mark_all_scalars_precise(env, st);
 3137				return 0;
 3138			}
 3139
 3140			if (!is_spilled_reg(&func->stack[i])) {
 3141				stack_mask &= ~(1ull << i);
 3142				continue;
 3143			}
 3144			reg = &func->stack[i].spilled_ptr;
 3145			if (reg->type != SCALAR_VALUE) {
 3146				stack_mask &= ~(1ull << i);
 3147				continue;
 3148			}
 3149			if (!reg->precise)
 3150				new_marks = true;
 3151			reg->precise = true;
 3152		}
 3153		if (env->log.level & BPF_LOG_LEVEL2) {
 3154			verbose(env, "parent %s regs=%x stack=%llx marks:",
 3155				new_marks ? "didn't have" : "already had",
 3156				reg_mask, stack_mask);
 3157			print_verifier_state(env, func, true);
 3158		}
 3159
 3160		if (!reg_mask && !stack_mask)
 3161			break;
 3162		if (!new_marks)
 3163			break;
 3164
 3165		last_idx = st->last_insn_idx;
 3166		first_idx = st->first_insn_idx;
 3167	}
 3168	return 0;
 3169}
 3170
 3171int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 3172{
 3173	return __mark_chain_precision(env, env->cur_state->curframe, regno, -1);
 3174}
 3175
 3176static int mark_chain_precision_frame(struct bpf_verifier_env *env, int frame, int regno)
 3177{
 3178	return __mark_chain_precision(env, frame, regno, -1);
 3179}
 3180
 3181static int mark_chain_precision_stack_frame(struct bpf_verifier_env *env, int frame, int spi)
 3182{
 3183	return __mark_chain_precision(env, frame, -1, spi);
 3184}
 3185
 3186static bool is_spillable_regtype(enum bpf_reg_type type)
 3187{
 3188	switch (base_type(type)) {
 3189	case PTR_TO_MAP_VALUE:
 3190	case PTR_TO_STACK:
 3191	case PTR_TO_CTX:
 3192	case PTR_TO_PACKET:
 3193	case PTR_TO_PACKET_META:
 3194	case PTR_TO_PACKET_END:
 3195	case PTR_TO_FLOW_KEYS:
 3196	case CONST_PTR_TO_MAP:
 3197	case PTR_TO_SOCKET:
 3198	case PTR_TO_SOCK_COMMON:
 3199	case PTR_TO_TCP_SOCK:
 3200	case PTR_TO_XDP_SOCK:
 3201	case PTR_TO_BTF_ID:
 3202	case PTR_TO_BUF:
 3203	case PTR_TO_MEM:
 3204	case PTR_TO_FUNC:
 3205	case PTR_TO_MAP_KEY:
 3206		return true;
 3207	default:
 3208		return false;
 3209	}
 3210}
 3211
 3212/* Does this register contain a constant zero? */
 3213static bool register_is_null(struct bpf_reg_state *reg)
 3214{
 3215	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
 3216}
 3217
 3218static bool register_is_const(struct bpf_reg_state *reg)
 3219{
 3220	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
 3221}
 3222
 3223static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
 3224{
 3225	return tnum_is_unknown(reg->var_off) &&
 3226	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
 3227	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
 3228	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
 3229	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
 3230}
 3231
 3232static bool register_is_bounded(struct bpf_reg_state *reg)
 3233{
 3234	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
 3235}
 3236
 3237static bool __is_pointer_value(bool allow_ptr_leaks,
 3238			       const struct bpf_reg_state *reg)
 3239{
 3240	if (allow_ptr_leaks)
 3241		return false;
 3242
 3243	return reg->type != SCALAR_VALUE;
 3244}
 3245
 3246/* Copy src state preserving dst->parent and dst->live fields */
 3247static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
 3248{
 3249	struct bpf_reg_state *parent = dst->parent;
 3250	enum bpf_reg_liveness live = dst->live;
 3251
 3252	*dst = *src;
 3253	dst->parent = parent;
 3254	dst->live = live;
 3255}
 3256
 3257static void save_register_state(struct bpf_func_state *state,
 3258				int spi, struct bpf_reg_state *reg,
 3259				int size)
 3260{
 3261	int i;
 3262
 3263	copy_register_state(&state->stack[spi].spilled_ptr, reg);
 3264	if (size == BPF_REG_SIZE)
 3265		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 3266
 3267	for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
 3268		state->stack[spi].slot_type[i - 1] = STACK_SPILL;
 3269
 3270	/* size < 8 bytes spill */
 3271	for (; i; i--)
 3272		scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
 3273}
 3274
 3275/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
 3276 * stack boundary and alignment are checked in check_mem_access()
 3277 */
 3278static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 3279				       /* stack frame we're writing to */
 3280				       struct bpf_func_state *state,
 3281				       int off, int size, int value_regno,
 3282				       int insn_idx)
 3283{
 3284	struct bpf_func_state *cur; /* state of the current function */
 3285	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
 3286	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
 3287	struct bpf_reg_state *reg = NULL;
 3288
 3289	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
 3290	if (err)
 3291		return err;
 3292	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
 3293	 * so it's aligned access and [off, off + size) are within stack limits
 3294	 */
 3295	if (!env->allow_ptr_leaks &&
 3296	    state->stack[spi].slot_type[0] == STACK_SPILL &&
 3297	    size != BPF_REG_SIZE) {
 3298		verbose(env, "attempt to corrupt spilled pointer on stack\n");
 3299		return -EACCES;
 3300	}
 3301
 3302	cur = env->cur_state->frame[env->cur_state->curframe];
 3303	if (value_regno >= 0)
 3304		reg = &cur->regs[value_regno];
 3305	if (!env->bypass_spec_v4) {
 3306		bool sanitize = reg && is_spillable_regtype(reg->type);
 3307
 3308		for (i = 0; i < size; i++) {
 3309			u8 type = state->stack[spi].slot_type[i];
 3310
 3311			if (type != STACK_MISC && type != STACK_ZERO) {
 3312				sanitize = true;
 3313				break;
 3314			}
 3315		}
 3316
 3317		if (sanitize)
 3318			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
 3319	}
 3320
 3321	mark_stack_slot_scratched(env, spi);
 3322	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
 3323	    !register_is_null(reg) && env->bpf_capable) {
 3324		if (dst_reg != BPF_REG_FP) {
 3325			/* The backtracking logic can only recognize explicit
 3326			 * stack slot address like [fp - 8]. Other spill of
 3327			 * scalar via different register has to be conservative.
 3328			 * Backtrack from here and mark all registers as precise
 3329			 * that contributed into 'reg' being a constant.
 3330			 */
 3331			err = mark_chain_precision(env, value_regno);
 3332			if (err)
 3333				return err;
 3334		}
 3335		save_register_state(state, spi, reg, size);
 3336	} else if (reg && is_spillable_regtype(reg->type)) {
 3337		/* register containing pointer is being spilled into stack */
 3338		if (size != BPF_REG_SIZE) {
 3339			verbose_linfo(env, insn_idx, "; ");
 3340			verbose(env, "invalid size of register spill\n");
 3341			return -EACCES;
 3342		}
 3343		if (state != cur && reg->type == PTR_TO_STACK) {
 3344			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
 3345			return -EINVAL;
 3346		}
 3347		save_register_state(state, spi, reg, size);
 3348	} else {
 3349		u8 type = STACK_MISC;
 3350
 3351		/* regular write of data into stack destroys any spilled ptr */
 3352		state->stack[spi].spilled_ptr.type = NOT_INIT;
 3353		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
 3354		if (is_spilled_reg(&state->stack[spi]))
 3355			for (i = 0; i < BPF_REG_SIZE; i++)
 3356				scrub_spilled_slot(&state->stack[spi].slot_type[i]);
 3357
 3358		/* only mark the slot as written if all 8 bytes were written
 3359		 * otherwise read propagation may incorrectly stop too soon
 3360		 * when stack slots are partially written.
 3361		 * This heuristic means that read propagation will be
 3362		 * conservative, since it will add reg_live_read marks
 3363		 * to stack slots all the way to first state when programs
 3364		 * writes+reads less than 8 bytes
 3365		 */
 3366		if (size == BPF_REG_SIZE)
 3367			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 3368
 3369		/* when we zero initialize stack slots mark them as such */
 3370		if (reg && register_is_null(reg)) {
 3371			/* backtracking doesn't work for STACK_ZERO yet. */
 3372			err = mark_chain_precision(env, value_regno);
 3373			if (err)
 3374				return err;
 3375			type = STACK_ZERO;
 3376		}
 3377
 3378		/* Mark slots affected by this stack write. */
 3379		for (i = 0; i < size; i++)
 3380			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
 3381				type;
 3382	}
 3383	return 0;
 3384}
 3385
 3386/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
 3387 * known to contain a variable offset.
 3388 * This function checks whether the write is permitted and conservatively
 3389 * tracks the effects of the write, considering that each stack slot in the
 3390 * dynamic range is potentially written to.
 3391 *
 3392 * 'off' includes 'regno->off'.
 3393 * 'value_regno' can be -1, meaning that an unknown value is being written to
 3394 * the stack.
 3395 *
 3396 * Spilled pointers in range are not marked as written because we don't know
 3397 * what's going to be actually written. This means that read propagation for
 3398 * future reads cannot be terminated by this write.
 3399 *
 3400 * For privileged programs, uninitialized stack slots are considered
 3401 * initialized by this write (even though we don't know exactly what offsets
 3402 * are going to be written to). The idea is that we don't want the verifier to
 3403 * reject future reads that access slots written to through variable offsets.
 3404 */
 3405static int check_stack_write_var_off(struct bpf_verifier_env *env,
 3406				     /* func where register points to */
 3407				     struct bpf_func_state *state,
 3408				     int ptr_regno, int off, int size,
 3409				     int value_regno, int insn_idx)
 3410{
 3411	struct bpf_func_state *cur; /* state of the current function */
 3412	int min_off, max_off;
 3413	int i, err;
 3414	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
 3415	bool writing_zero = false;
 3416	/* set if the fact that we're writing a zero is used to let any
 3417	 * stack slots remain STACK_ZERO
 3418	 */
 3419	bool zero_used = false;
 3420
 3421	cur = env->cur_state->frame[env->cur_state->curframe];
 3422	ptr_reg = &cur->regs[ptr_regno];
 3423	min_off = ptr_reg->smin_value + off;
 3424	max_off = ptr_reg->smax_value + off + size;
 3425	if (value_regno >= 0)
 3426		value_reg = &cur->regs[value_regno];
 3427	if (value_reg && register_is_null(value_reg))
 3428		writing_zero = true;
 3429
 3430	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
 3431	if (err)
 3432		return err;
 3433
 3434
 3435	/* Variable offset writes destroy any spilled pointers in range. */
 3436	for (i = min_off; i < max_off; i++) {
 3437		u8 new_type, *stype;
 3438		int slot, spi;
 3439
 3440		slot = -i - 1;
 3441		spi = slot / BPF_REG_SIZE;
 3442		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
 3443		mark_stack_slot_scratched(env, spi);
 3444
 3445		if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
 3446			/* Reject the write if range we may write to has not
 3447			 * been initialized beforehand. If we didn't reject
 3448			 * here, the ptr status would be erased below (even
 3449			 * though not all slots are actually overwritten),
 3450			 * possibly opening the door to leaks.
 3451			 *
 3452			 * We do however catch STACK_INVALID case below, and
 3453			 * only allow reading possibly uninitialized memory
 3454			 * later for CAP_PERFMON, as the write may not happen to
 3455			 * that slot.
 3456			 */
 3457			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
 3458				insn_idx, i);
 3459			return -EINVAL;
 3460		}
 3461
 3462		/* Erase all spilled pointers. */
 3463		state->stack[spi].spilled_ptr.type = NOT_INIT;
 3464
 3465		/* Update the slot type. */
 3466		new_type = STACK_MISC;
 3467		if (writing_zero && *stype == STACK_ZERO) {
 3468			new_type = STACK_ZERO;
 3469			zero_used = true;
 3470		}
 3471		/* If the slot is STACK_INVALID, we check whether it's OK to
 3472		 * pretend that it will be initialized by this write. The slot
 3473		 * might not actually be written to, and so if we mark it as
 3474		 * initialized future reads might leak uninitialized memory.
 3475		 * For privileged programs, we will accept such reads to slots
 3476		 * that may or may not be written because, if we're reject
 3477		 * them, the error would be too confusing.
 3478		 */
 3479		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
 3480			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
 3481					insn_idx, i);
 3482			return -EINVAL;
 3483		}
 3484		*stype = new_type;
 3485	}
 3486	if (zero_used) {
 3487		/* backtracking doesn't work for STACK_ZERO yet. */
 3488		err = mark_chain_precision(env, value_regno);
 3489		if (err)
 3490			return err;
 3491	}
 3492	return 0;
 3493}
 3494
 3495/* When register 'dst_regno' is assigned some values from stack[min_off,
 3496 * max_off), we set the register's type according to the types of the
 3497 * respective stack slots. If all the stack values are known to be zeros, then
 3498 * so is the destination reg. Otherwise, the register is considered to be
 3499 * SCALAR. This function does not deal with register filling; the caller must
 3500 * ensure that all spilled registers in the stack range have been marked as
 3501 * read.
 3502 */
 3503static void mark_reg_stack_read(struct bpf_verifier_env *env,
 3504				/* func where src register points to */
 3505				struct bpf_func_state *ptr_state,
 3506				int min_off, int max_off, int dst_regno)
 3507{
 3508	struct bpf_verifier_state *vstate = env->cur_state;
 3509	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 3510	int i, slot, spi;
 3511	u8 *stype;
 3512	int zeros = 0;
 3513
 3514	for (i = min_off; i < max_off; i++) {
 3515		slot = -i - 1;
 3516		spi = slot / BPF_REG_SIZE;
 3517		stype = ptr_state->stack[spi].slot_type;
 3518		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
 3519			break;
 3520		zeros++;
 3521	}
 3522	if (zeros == max_off - min_off) {
 3523		/* any access_size read into register is zero extended,
 3524		 * so the whole register == const_zero
 3525		 */
 3526		__mark_reg_const_zero(&state->regs[dst_regno]);
 3527		/* backtracking doesn't support STACK_ZERO yet,
 3528		 * so mark it precise here, so that later
 3529		 * backtracking can stop here.
 3530		 * Backtracking may not need this if this register
 3531		 * doesn't participate in pointer adjustment.
 3532		 * Forward propagation of precise flag is not
 3533		 * necessary either. This mark is only to stop
 3534		 * backtracking. Any register that contributed
 3535		 * to const 0 was marked precise before spill.
 3536		 */
 3537		state->regs[dst_regno].precise = true;
 3538	} else {
 3539		/* have read misc data from the stack */
 3540		mark_reg_unknown(env, state->regs, dst_regno);
 3541	}
 3542	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
 3543}
 3544
 3545/* Read the stack at 'off' and put the results into the register indicated by
 3546 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
 3547 * spilled reg.
 3548 *
 3549 * 'dst_regno' can be -1, meaning that the read value is not going to a
 3550 * register.
 3551 *
 3552 * The access is assumed to be within the current stack bounds.
 3553 */
 3554static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
 3555				      /* func where src register points to */
 3556				      struct bpf_func_state *reg_state,
 3557				      int off, int size, int dst_regno)
 3558{
 3559	struct bpf_verifier_state *vstate = env->cur_state;
 3560	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 3561	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
 3562	struct bpf_reg_state *reg;
 3563	u8 *stype, type;
 3564
 3565	stype = reg_state->stack[spi].slot_type;
 3566	reg = &reg_state->stack[spi].spilled_ptr;
 3567
 3568	if (is_spilled_reg(&reg_state->stack[spi])) {
 3569		u8 spill_size = 1;
 3570
 3571		for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
 3572			spill_size++;
 3573
 3574		if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
 3575			if (reg->type != SCALAR_VALUE) {
 3576				verbose_linfo(env, env->insn_idx, "; ");
 3577				verbose(env, "invalid size of register fill\n");
 3578				return -EACCES;
 3579			}
 3580
 3581			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 3582			if (dst_regno < 0)
 3583				return 0;
 3584
 3585			if (!(off % BPF_REG_SIZE) && size == spill_size) {
 3586				/* The earlier check_reg_arg() has decided the
 3587				 * subreg_def for this insn.  Save it first.
 3588				 */
 3589				s32 subreg_def = state->regs[dst_regno].subreg_def;
 3590
 3591				copy_register_state(&state->regs[dst_regno], reg);
 3592				state->regs[dst_regno].subreg_def = subreg_def;
 3593			} else {
 3594				for (i = 0; i < size; i++) {
 3595					type = stype[(slot - i) % BPF_REG_SIZE];
 3596					if (type == STACK_SPILL)
 3597						continue;
 3598					if (type == STACK_MISC)
 3599						continue;
 3600					verbose(env, "invalid read from stack off %d+%d size %d\n",
 3601						off, i, size);
 3602					return -EACCES;
 3603				}
 3604				mark_reg_unknown(env, state->regs, dst_regno);
 3605			}
 3606			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
 3607			return 0;
 3608		}
 3609
 3610		if (dst_regno >= 0) {
 3611			/* restore register state from stack */
 3612			copy_register_state(&state->regs[dst_regno], reg);
 3613			/* mark reg as written since spilled pointer state likely
 3614			 * has its liveness marks cleared by is_state_visited()
 3615			 * which resets stack/reg liveness for state transitions
 3616			 */
 3617			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
 3618		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
 3619			/* If dst_regno==-1, the caller is asking us whether
 3620			 * it is acceptable to use this value as a SCALAR_VALUE
 3621			 * (e.g. for XADD).
 3622			 * We must not allow unprivileged callers to do that
 3623			 * with spilled pointers.
 3624			 */
 3625			verbose(env, "leaking pointer from stack off %d\n",
 3626				off);
 3627			return -EACCES;
 3628		}
 3629		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 3630	} else {
 3631		for (i = 0; i < size; i++) {
 3632			type = stype[(slot - i) % BPF_REG_SIZE];
 3633			if (type == STACK_MISC)
 3634				continue;
 3635			if (type == STACK_ZERO)
 3636				continue;
 3637			verbose(env, "invalid read from stack off %d+%d size %d\n",
 3638				off, i, size);
 3639			return -EACCES;
 3640		}
 3641		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 3642		if (dst_regno >= 0)
 3643			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
 3644	}
 3645	return 0;
 3646}
 3647
 3648enum bpf_access_src {
 3649	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
 3650	ACCESS_HELPER = 2,  /* the access is performed by a helper */
 3651};
 3652
 3653static int check_stack_range_initialized(struct bpf_verifier_env *env,
 3654					 int regno, int off, int access_size,
 3655					 bool zero_size_allowed,
 3656					 enum bpf_access_src type,
 3657					 struct bpf_call_arg_meta *meta);
 3658
 3659static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
 3660{
 3661	return cur_regs(env) + regno;
 3662}
 3663
 3664/* Read the stack at 'ptr_regno + off' and put the result into the register
 3665 * 'dst_regno'.
 3666 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
 3667 * but not its variable offset.
 3668 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
 3669 *
 3670 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
 3671 * filling registers (i.e. reads of spilled register cannot be detected when
 3672 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
 3673 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
 3674 * offset; for a fixed offset check_stack_read_fixed_off should be used
 3675 * instead.
 3676 */
 3677static int check_stack_read_var_off(struct bpf_verifier_env *env,
 3678				    int ptr_regno, int off, int size, int dst_regno)
 3679{
 3680	/* The state of the source register. */
 3681	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
 3682	struct bpf_func_state *ptr_state = func(env, reg);
 3683	int err;
 3684	int min_off, max_off;
 3685
 3686	/* Note that we pass a NULL meta, so raw access will not be permitted.
 3687	 */
 3688	err = check_stack_range_initialized(env, ptr_regno, off, size,
 3689					    false, ACCESS_DIRECT, NULL);
 3690	if (err)
 3691		return err;
 3692
 3693	min_off = reg->smin_value + off;
 3694	max_off = reg->smax_value + off;
 3695	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
 3696	return 0;
 3697}
 3698
 3699/* check_stack_read dispatches to check_stack_read_fixed_off or
 3700 * check_stack_read_var_off.
 3701 *
 3702 * The caller must ensure that the offset falls within the allocated stack
 3703 * bounds.
 3704 *
 3705 * 'dst_regno' is a register which will receive the value from the stack. It
 3706 * can be -1, meaning that the read value is not going to a register.
 3707 */
 3708static int check_stack_read(struct bpf_verifier_env *env,
 3709			    int ptr_regno, int off, int size,
 3710			    int dst_regno)
 3711{
 3712	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
 3713	struct bpf_func_state *state = func(env, reg);
 3714	int err;
 3715	/* Some accesses are only permitted with a static offset. */
 3716	bool var_off = !tnum_is_const(reg->var_off);
 3717
 3718	/* The offset is required to be static when reads don't go to a
 3719	 * register, in order to not leak pointers (see
 3720	 * check_stack_read_fixed_off).
 3721	 */
 3722	if (dst_regno < 0 && var_off) {
 3723		char tn_buf[48];
 3724
 3725		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3726		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
 3727			tn_buf, off, size);
 3728		return -EACCES;
 3729	}
 3730	/* Variable offset is prohibited for unprivileged mode for simplicity
 3731	 * since it requires corresponding support in Spectre masking for stack
 3732	 * ALU. See also retrieve_ptr_limit().
 3733	 */
 3734	if (!env->bypass_spec_v1 && var_off) {
 3735		char tn_buf[48];
 3736
 3737		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3738		verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
 3739				ptr_regno, tn_buf);
 3740		return -EACCES;
 3741	}
 3742
 3743	if (!var_off) {
 3744		off += reg->var_off.value;
 3745		err = check_stack_read_fixed_off(env, state, off, size,
 3746						 dst_regno);
 3747	} else {
 3748		/* Variable offset stack reads need more conservative handling
 3749		 * than fixed offset ones. Note that dst_regno >= 0 on this
 3750		 * branch.
 3751		 */
 3752		err = check_stack_read_var_off(env, ptr_regno, off, size,
 3753					       dst_regno);
 3754	}
 3755	return err;
 3756}
 3757
 3758
 3759/* check_stack_write dispatches to check_stack_write_fixed_off or
 3760 * check_stack_write_var_off.
 3761 *
 3762 * 'ptr_regno' is the register used as a pointer into the stack.
 3763 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
 3764 * 'value_regno' is the register whose value we're writing to the stack. It can
 3765 * be -1, meaning that we're not writing from a register.
 3766 *
 3767 * The caller must ensure that the offset falls within the maximum stack size.
 3768 */
 3769static int check_stack_write(struct bpf_verifier_env *env,
 3770			     int ptr_regno, int off, int size,
 3771			     int value_regno, int insn_idx)
 3772{
 3773	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
 3774	struct bpf_func_state *state = func(env, reg);
 3775	int err;
 3776
 3777	if (tnum_is_const(reg->var_off)) {
 3778		off += reg->var_off.value;
 3779		err = check_stack_write_fixed_off(env, state, off, size,
 3780						  value_regno, insn_idx);
 3781	} else {
 3782		/* Variable offset stack reads need more conservative handling
 3783		 * than fixed offset ones.
 3784		 */
 3785		err = check_stack_write_var_off(env, state,
 3786						ptr_regno, off, size,
 3787						value_regno, insn_idx);
 3788	}
 3789	return err;
 3790}
 3791
 3792static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
 3793				 int off, int size, enum bpf_access_type type)
 3794{
 3795	struct bpf_reg_state *regs = cur_regs(env);
 3796	struct bpf_map *map = regs[regno].map_ptr;
 3797	u32 cap = bpf_map_flags_to_cap(map);
 3798
 3799	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
 3800		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
 3801			map->value_size, off, size);
 3802		return -EACCES;
 3803	}
 3804
 3805	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
 3806		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
 3807			map->value_size, off, size);
 3808		return -EACCES;
 3809	}
 3810
 3811	return 0;
 3812}
 3813
 3814/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
 3815static int __check_mem_access(struct bpf_verifier_env *env, int regno,
 3816			      int off, int size, u32 mem_size,
 3817			      bool zero_size_allowed)
 3818{
 3819	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
 3820	struct bpf_reg_state *reg;
 3821
 3822	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
 3823		return 0;
 3824
 3825	reg = &cur_regs(env)[regno];
 3826	switch (reg->type) {
 3827	case PTR_TO_MAP_KEY:
 3828		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
 3829			mem_size, off, size);
 3830		break;
 3831	case PTR_TO_MAP_VALUE:
 3832		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
 3833			mem_size, off, size);
 3834		break;
 3835	case PTR_TO_PACKET:
 3836	case PTR_TO_PACKET_META:
 3837	case PTR_TO_PACKET_END:
 3838		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 3839			off, size, regno, reg->id, off, mem_size);
 3840		break;
 3841	case PTR_TO_MEM:
 3842	default:
 3843		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
 3844			mem_size, off, size);
 3845	}
 3846
 3847	return -EACCES;
 3848}
 3849
 3850/* check read/write into a memory region with possible variable offset */
 3851static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
 3852				   int off, int size, u32 mem_size,
 3853				   bool zero_size_allowed)
 3854{
 3855	struct bpf_verifier_state *vstate = env->cur_state;
 3856	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 3857	struct bpf_reg_state *reg = &state->regs[regno];
 3858	int err;
 3859
 3860	/* We may have adjusted the register pointing to memory region, so we
 3861	 * need to try adding each of min_value and max_value to off
 3862	 * to make sure our theoretical access will be safe.
 3863	 *
 3864	 * The minimum value is only important with signed
 3865	 * comparisons where we can't assume the floor of a
 3866	 * value is 0.  If we are using signed variables for our
 3867	 * index'es we need to make sure that whatever we use
 3868	 * will have a set floor within our range.
 3869	 */
 3870	if (reg->smin_value < 0 &&
 3871	    (reg->smin_value == S64_MIN ||
 3872	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
 3873	      reg->smin_value + off < 0)) {
 3874		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 3875			regno);
 3876		return -EACCES;
 3877	}
 3878	err = __check_mem_access(env, regno, reg->smin_value + off, size,
 3879				 mem_size, zero_size_allowed);
 3880	if (err) {
 3881		verbose(env, "R%d min value is outside of the allowed memory range\n",
 3882			regno);
 3883		return err;
 3884	}
 3885
 3886	/* If we haven't set a max value then we need to bail since we can't be
 3887	 * sure we won't do bad things.
 3888	 * If reg->umax_value + off could overflow, treat that as unbounded too.
 3889	 */
 3890	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
 3891		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
 3892			regno);
 3893		return -EACCES;
 3894	}
 3895	err = __check_mem_access(env, regno, reg->umax_value + off, size,
 3896				 mem_size, zero_size_allowed);
 3897	if (err) {
 3898		verbose(env, "R%d max value is outside of the allowed memory range\n",
 3899			regno);
 3900		return err;
 3901	}
 3902
 3903	return 0;
 3904}
 3905
 3906static int __check_ptr_off_reg(struct bpf_verifier_env *env,
 3907			       const struct bpf_reg_state *reg, int regno,
 3908			       bool fixed_off_ok)
 3909{
 3910	/* Access to this pointer-typed register or passing it to a helper
 3911	 * is only allowed in its original, unmodified form.
 3912	 */
 3913
 3914	if (reg->off < 0) {
 3915		verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
 3916			reg_type_str(env, reg->type), regno, reg->off);
 3917		return -EACCES;
 3918	}
 3919
 3920	if (!fixed_off_ok && reg->off) {
 3921		verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
 3922			reg_type_str(env, reg->type), regno, reg->off);
 3923		return -EACCES;
 3924	}
 3925
 3926	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 3927		char tn_buf[48];
 3928
 3929		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3930		verbose(env, "variable %s access var_off=%s disallowed\n",
 3931			reg_type_str(env, reg->type), tn_buf);
 3932		return -EACCES;
 3933	}
 3934
 3935	return 0;
 3936}
 3937
 3938int check_ptr_off_reg(struct bpf_verifier_env *env,
 3939		      const struct bpf_reg_state *reg, int regno)
 3940{
 3941	return __check_ptr_off_reg(env, reg, regno, false);
 3942}
 3943
 3944static int map_kptr_match_type(struct bpf_verifier_env *env,
 3945			       struct btf_field *kptr_field,
 3946			       struct bpf_reg_state *reg, u32 regno)
 3947{
 3948	const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
 3949	int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
 3950	const char *reg_name = "";
 3951
 3952	/* Only unreferenced case accepts untrusted pointers */
 3953	if (kptr_field->type == BPF_KPTR_UNREF)
 3954		perm_flags |= PTR_UNTRUSTED;
 3955
 3956	if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
 3957		goto bad_type;
 3958
 3959	if (!btf_is_kernel(reg->btf)) {
 3960		verbose(env, "R%d must point to kernel BTF\n", regno);
 3961		return -EINVAL;
 3962	}
 3963	/* We need to verify reg->type and reg->btf, before accessing reg->btf */
 3964	reg_name = kernel_type_name(reg->btf, reg->btf_id);
 3965
 3966	/* For ref_ptr case, release function check should ensure we get one
 3967	 * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
 3968	 * normal store of unreferenced kptr, we must ensure var_off is zero.
 3969	 * Since ref_ptr cannot be accessed directly by BPF insns, checks for
 3970	 * reg->off and reg->ref_obj_id are not needed here.
 3971	 */
 3972	if (__check_ptr_off_reg(env, reg, regno, true))
 3973		return -EACCES;
 3974
 3975	/* A full type match is needed, as BTF can be vmlinux or module BTF, and
 3976	 * we also need to take into account the reg->off.
 3977	 *
 3978	 * We want to support cases like:
 3979	 *
 3980	 * struct foo {
 3981	 *         struct bar br;
 3982	 *         struct baz bz;
 3983	 * };
 3984	 *
 3985	 * struct foo *v;
 3986	 * v = func();	      // PTR_TO_BTF_ID
 3987	 * val->foo = v;      // reg->off is zero, btf and btf_id match type
 3988	 * val->bar = &v->br; // reg->off is still zero, but we need to retry with
 3989	 *                    // first member type of struct after comparison fails
 3990	 * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
 3991	 *                    // to match type
 3992	 *
 3993	 * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
 3994	 * is zero. We must also ensure that btf_struct_ids_match does not walk
 3995	 * the struct to match type against first member of struct, i.e. reject
 3996	 * second case from above. Hence, when type is BPF_KPTR_REF, we set
 3997	 * strict mode to true for type match.
 3998	 */
 3999	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
 4000				  kptr_field->kptr.btf, kptr_field->kptr.btf_id,
 4001				  kptr_field->type == BPF_KPTR_REF))
 4002		goto bad_type;
 4003	return 0;
 4004bad_type:
 4005	verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
 4006		reg_type_str(env, reg->type), reg_name);
 4007	verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
 4008	if (kptr_field->type == BPF_KPTR_UNREF)
 4009		verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
 4010			targ_name);
 4011	else
 4012		verbose(env, "\n");
 4013	return -EINVAL;
 4014}
 4015
 4016static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
 4017				 int value_regno, int insn_idx,
 4018				 struct btf_field *kptr_field)
 4019{
 4020	struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
 4021	int class = BPF_CLASS(insn->code);
 4022	struct bpf_reg_state *val_reg;
 4023
 4024	/* Things we already checked for in check_map_access and caller:
 4025	 *  - Reject cases where variable offset may touch kptr
 4026	 *  - size of access (must be BPF_DW)
 4027	 *  - tnum_is_const(reg->var_off)
 4028	 *  - kptr_field->offset == off + reg->var_off.value
 4029	 */
 4030	/* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
 4031	if (BPF_MODE(insn->code) != BPF_MEM) {
 4032		verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
 4033		return -EACCES;
 4034	}
 4035
 4036	/* We only allow loading referenced kptr, since it will be marked as
 4037	 * untrusted, similar to unreferenced kptr.
 4038	 */
 4039	if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
 4040		verbose(env, "store to referenced kptr disallowed\n");
 4041		return -EACCES;
 4042	}
 4043
 4044	if (class == BPF_LDX) {
 4045		val_reg = reg_state(env, value_regno);
 4046		/* We can simply mark the value_regno receiving the pointer
 4047		 * value from map as PTR_TO_BTF_ID, with the correct type.
 4048		 */
 4049		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
 4050				kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
 4051		/* For mark_ptr_or_null_reg */
 4052		val_reg->id = ++env->id_gen;
 4053	} else if (class == BPF_STX) {
 4054		val_reg = reg_state(env, value_regno);
 4055		if (!register_is_null(val_reg) &&
 4056		    map_kptr_match_type(env, kptr_field, val_reg, value_regno))
 4057			return -EACCES;
 4058	} else if (class == BPF_ST) {
 4059		if (insn->imm) {
 4060			verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
 4061				kptr_field->offset);
 4062			return -EACCES;
 4063		}
 4064	} else {
 4065		verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
 4066		return -EACCES;
 4067	}
 4068	return 0;
 4069}
 4070
 4071/* check read/write into a map element with possible variable offset */
 4072static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 4073			    int off, int size, bool zero_size_allowed,
 4074			    enum bpf_access_src src)
 4075{
 4076	struct bpf_verifier_state *vstate = env->cur_state;
 4077	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 4078	struct bpf_reg_state *reg = &state->regs[regno];
 4079	struct bpf_map *map = reg->map_ptr;
 4080	struct btf_record *rec;
 4081	int err, i;
 4082
 4083	err = check_mem_region_access(env, regno, off, size, map->value_size,
 4084				      zero_size_allowed);
 4085	if (err)
 4086		return err;
 4087
 4088	if (IS_ERR_OR_NULL(map->record))
 4089		return 0;
 4090	rec = map->record;
 4091	for (i = 0; i < rec->cnt; i++) {
 4092		struct btf_field *field = &rec->fields[i];
 4093		u32 p = field->offset;
 4094
 4095		/* If any part of a field  can be touched by load/store, reject
 4096		 * this program. To check that [x1, x2) overlaps with [y1, y2),
 4097		 * it is sufficient to check x1 < y2 && y1 < x2.
 4098		 */
 4099		if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
 4100		    p < reg->umax_value + off + size) {
 4101			switch (field->type) {
 4102			case BPF_KPTR_UNREF:
 4103			case BPF_KPTR_REF:
 4104				if (src != ACCESS_DIRECT) {
 4105					verbose(env, "kptr cannot be accessed indirectly by helper\n");
 4106					return -EACCES;
 4107				}
 4108				if (!tnum_is_const(reg->var_off)) {
 4109					verbose(env, "kptr access cannot have variable offset\n");
 4110					return -EACCES;
 4111				}
 4112				if (p != off + reg->var_off.value) {
 4113					verbose(env, "kptr access misaligned expected=%u off=%llu\n",
 4114						p, off + reg->var_off.value);
 4115					return -EACCES;
 4116				}
 4117				if (size != bpf_size_to_bytes(BPF_DW)) {
 4118					verbose(env, "kptr access size must be BPF_DW\n");
 4119					return -EACCES;
 4120				}
 4121				break;
 4122			default:
 4123				verbose(env, "%s cannot be accessed directly by load/store\n",
 4124					btf_field_type_name(field->type));
 4125				return -EACCES;
 4126			}
 4127		}
 4128	}
 4129	return 0;
 4130}
 4131
 4132#define MAX_PACKET_OFF 0xffff
 4133
 4134static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 4135				       const struct bpf_call_arg_meta *meta,
 4136				       enum bpf_access_type t)
 4137{
 4138	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 4139
 4140	switch (prog_type) {
 4141	/* Program types only with direct read access go here! */
 4142	case BPF_PROG_TYPE_LWT_IN:
 4143	case BPF_PROG_TYPE_LWT_OUT:
 4144	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 4145	case BPF_PROG_TYPE_SK_REUSEPORT:
 4146	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 4147	case BPF_PROG_TYPE_CGROUP_SKB:
 4148		if (t == BPF_WRITE)
 4149			return false;
 4150		fallthrough;
 4151
 4152	/* Program types with direct read + write access go here! */
 4153	case BPF_PROG_TYPE_SCHED_CLS:
 4154	case BPF_PROG_TYPE_SCHED_ACT:
 4155	case BPF_PROG_TYPE_XDP:
 4156	case BPF_PROG_TYPE_LWT_XMIT:
 4157	case BPF_PROG_TYPE_SK_SKB:
 4158	case BPF_PROG_TYPE_SK_MSG:
 4159		if (meta)
 4160			return meta->pkt_access;
 4161
 4162		env->seen_direct_write = true;
 4163		return true;
 4164
 4165	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 4166		if (t == BPF_WRITE)
 4167			env->seen_direct_write = true;
 4168
 4169		return true;
 4170
 4171	default:
 4172		return false;
 4173	}
 4174}
 4175
 4176static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 4177			       int size, bool zero_size_allowed)
 4178{
 4179	struct bpf_reg_state *regs = cur_regs(env);
 4180	struct bpf_reg_state *reg = &regs[regno];
 4181	int err;
 4182
 4183	/* We may have added a variable offset to the packet pointer; but any
 4184	 * reg->range we have comes after that.  We are only checking the fixed
 4185	 * offset.
 4186	 */
 4187
 4188	/* We don't allow negative numbers, because we aren't tracking enough
 4189	 * detail to prove they're safe.
 4190	 */
 4191	if (reg->smin_value < 0) {
 4192		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 4193			regno);
 4194		return -EACCES;
 4195	}
 4196
 4197	err = reg->range < 0 ? -EINVAL :
 4198	      __check_mem_access(env, regno, off, size, reg->range,
 4199				 zero_size_allowed);
 4200	if (err) {
 4201		verbose(env, "R%d offset is outside of the packet\n", regno);
 4202		return err;
 4203	}
 4204
 4205	/* __check_mem_access has made sure "off + size - 1" is within u16.
 4206	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
 4207	 * otherwise find_good_pkt_pointers would have refused to set range info
 4208	 * that __check_mem_access would have rejected this pkt access.
 4209	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
 4210	 */
 4211	env->prog->aux->max_pkt_offset =
 4212		max_t(u32, env->prog->aux->max_pkt_offset,
 4213		      off + reg->umax_value + size - 1);
 4214
 4215	return err;
 4216}
 4217
 4218/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 4219static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 4220			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
 4221			    struct btf **btf, u32 *btf_id)
 4222{
 4223	struct bpf_insn_access_aux info = {
 4224		.reg_type = *reg_type,
 4225		.log = &env->log,
 4226	};
 4227
 4228	if (env->ops->is_valid_access &&
 4229	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
 4230		/* A non zero info.ctx_field_size indicates that this field is a
 4231		 * candidate for later verifier transformation to load the whole
 4232		 * field and then apply a mask when accessed with a narrower
 4233		 * access than actual ctx access size. A zero info.ctx_field_size
 4234		 * will only allow for whole field access and rejects any other
 4235		 * type of narrower access.
 4236		 */
 4237		*reg_type = info.reg_type;
 4238
 4239		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
 4240			*btf = info.btf;
 4241			*btf_id = info.btf_id;
 4242		} else {
 4243			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 4244		}
 4245		/* remember the offset of last byte accessed in ctx */
 4246		if (env->prog->aux->max_ctx_offset < off + size)
 4247			env->prog->aux->max_ctx_offset = off + size;
 4248		return 0;
 4249	}
 4250
 4251	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
 4252	return -EACCES;
 4253}
 4254
 4255static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
 4256				  int size)
 4257{
 4258	if (size < 0 || off < 0 ||
 4259	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
 4260		verbose(env, "invalid access to flow keys off=%d size=%d\n",
 4261			off, size);
 4262		return -EACCES;
 4263	}
 4264	return 0;
 4265}
 4266
 4267static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 4268			     u32 regno, int off, int size,
 4269			     enum bpf_access_type t)
 4270{
 4271	struct bpf_reg_state *regs = cur_regs(env);
 4272	struct bpf_reg_state *reg = &regs[regno];
 4273	struct bpf_insn_access_aux info = {};
 4274	bool valid;
 4275
 4276	if (reg->smin_value < 0) {
 4277		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 4278			regno);
 4279		return -EACCES;
 4280	}
 4281
 4282	switch (reg->type) {
 4283	case PTR_TO_SOCK_COMMON:
 4284		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
 4285		break;
 4286	case PTR_TO_SOCKET:
 4287		valid = bpf_sock_is_valid_access(off, size, t, &info);
 4288		break;
 4289	case PTR_TO_TCP_SOCK:
 4290		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
 4291		break;
 4292	case PTR_TO_XDP_SOCK:
 4293		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
 4294		break;
 4295	default:
 4296		valid = false;
 4297	}
 4298
 4299
 4300	if (valid) {
 4301		env->insn_aux_data[insn_idx].ctx_field_size =
 4302			info.ctx_field_size;
 4303		return 0;
 4304	}
 4305
 4306	verbose(env, "R%d invalid %s access off=%d size=%d\n",
 4307		regno, reg_type_str(env, reg->type), off, size);
 4308
 4309	return -EACCES;
 4310}
 4311
 4312static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 4313{
 4314	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
 4315}
 4316
 4317static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 4318{
 4319	const struct bpf_reg_state *reg = reg_state(env, regno);
 4320
 4321	return reg->type == PTR_TO_CTX;
 4322}
 4323
 4324static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
 4325{
 4326	const struct bpf_reg_state *reg = reg_state(env, regno);
 4327
 4328	return type_is_sk_pointer(reg->type);
 4329}
 4330
 4331static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
 4332{
 4333	const struct bpf_reg_state *reg = reg_state(env, regno);
 4334
 4335	return type_is_pkt_pointer(reg->type);
 4336}
 4337
 4338static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
 4339{
 4340	const struct bpf_reg_state *reg = reg_state(env, regno);
 4341
 4342	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
 4343	return reg->type == PTR_TO_FLOW_KEYS;
 4344}
 4345
 4346static bool is_trusted_reg(const struct bpf_reg_state *reg)
 4347{
 4348	/* A referenced register is always trusted. */
 4349	if (reg->ref_obj_id)
 4350		return true;
 4351
 4352	/* If a register is not referenced, it is trusted if it has the
 4353	 * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
 4354	 * other type modifiers may be safe, but we elect to take an opt-in
 4355	 * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
 4356	 * not.
 4357	 *
 4358	 * Eventually, we should make PTR_TRUSTED the single source of truth
 4359	 * for whether a register is trusted.
 4360	 */
 4361	return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
 4362	       !bpf_type_has_unsafe_modifiers(reg->type);
 4363}
 4364
 4365static bool is_rcu_reg(const struct bpf_reg_state *reg)
 4366{
 4367	return reg->type & MEM_RCU;
 4368}
 4369
 4370static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
 4371				   const struct bpf_reg_state *reg,
 4372				   int off, int size, bool strict)
 4373{
 4374	struct tnum reg_off;
 4375	int ip_align;
 4376
 4377	/* Byte size accesses are always allowed. */
 4378	if (!strict || size == 1)
 4379		return 0;
 4380
 4381	/* For platforms that do not have a Kconfig enabling
 4382	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
 4383	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
 4384	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
 4385	 * to this code only in strict mode where we want to emulate
 4386	 * the NET_IP_ALIGN==2 checking.  Therefore use an
 4387	 * unconditional IP align value of '2'.
 4388	 */
 4389	ip_align = 2;
 4390
 4391	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
 4392	if (!tnum_is_aligned(reg_off, size)) {
 4393		char tn_buf[48];
 4394
 4395		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 4396		verbose(env,
 4397			"misaligned packet access off %d+%s+%d+%d size %d\n",
 4398			ip_align, tn_buf, reg->off, off, size);
 4399		return -EACCES;
 4400	}
 4401
 4402	return 0;
 4403}
 4404
 4405static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
 4406				       const struct bpf_reg_state *reg,
 4407				       const char *pointer_desc,
 4408				       int off, int size, bool strict)
 4409{
 4410	struct tnum reg_off;
 4411
 4412	/* Byte size accesses are always allowed. */
 4413	if (!strict || size == 1)
 4414		return 0;
 4415
 4416	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
 4417	if (!tnum_is_aligned(reg_off, size)) {
 4418		char tn_buf[48];
 4419
 4420		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 4421		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
 4422			pointer_desc, tn_buf, reg->off, off, size);
 4423		return -EACCES;
 4424	}
 4425
 4426	return 0;
 4427}
 4428
 4429static int check_ptr_alignment(struct bpf_verifier_env *env,
 4430			       const struct bpf_reg_state *reg, int off,
 4431			       int size, bool strict_alignment_once)
 4432{
 4433	bool strict = env->strict_alignment || strict_alignment_once;
 4434	const char *pointer_desc = "";
 4435
 4436	switch (reg->type) {
 4437	case PTR_TO_PACKET:
 4438	case PTR_TO_PACKET_META:
 4439		/* Special case, because of NET_IP_ALIGN. Given metadata sits
 4440		 * right in front, treat it the very same way.
 4441		 */
 4442		return check_pkt_ptr_alignment(env, reg, off, size, strict);
 4443	case PTR_TO_FLOW_KEYS:
 4444		pointer_desc = "flow keys ";
 4445		break;
 4446	case PTR_TO_MAP_KEY:
 4447		pointer_desc = "key ";
 4448		break;
 4449	case PTR_TO_MAP_VALUE:
 4450		pointer_desc = "value ";
 4451		break;
 4452	case PTR_TO_CTX:
 4453		pointer_desc = "context ";
 4454		break;
 4455	case PTR_TO_STACK:
 4456		pointer_desc = "stack ";
 4457		/* The stack spill tracking logic in check_stack_write_fixed_off()
 4458		 * and check_stack_read_fixed_off() relies on stack accesses being
 4459		 * aligned.
 4460		 */
 4461		strict = true;
 4462		break;
 4463	case PTR_TO_SOCKET:
 4464		pointer_desc = "sock ";
 4465		break;
 4466	case PTR_TO_SOCK_COMMON:
 4467		pointer_desc = "sock_common ";
 4468		break;
 4469	case PTR_TO_TCP_SOCK:
 4470		pointer_desc = "tcp_sock ";
 4471		break;
 4472	case PTR_TO_XDP_SOCK:
 4473		pointer_desc = "xdp_sock ";
 4474		break;
 4475	default:
 4476		break;
 4477	}
 4478	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
 4479					   strict);
 4480}
 4481
 4482static int update_stack_depth(struct bpf_verifier_env *env,
 4483			      const struct bpf_func_state *func,
 4484			      int off)
 4485{
 4486	u16 stack = env->subprog_info[func->subprogno].stack_depth;
 4487
 4488	if (stack >= -off)
 4489		return 0;
 4490
 4491	/* update known max for given subprogram */
 4492	env->subprog_info[func->subprogno].stack_depth = -off;
 4493	return 0;
 4494}
 4495
 4496/* starting from main bpf function walk all instructions of the function
 4497 * and recursively walk all callees that given function can call.
 4498 * Ignore jump and exit insns.
 4499 * Since recursion is prevented by check_cfg() this algorithm
 4500 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
 4501 */
 4502static int check_max_stack_depth(struct bpf_verifier_env *env)
 4503{
 4504	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
 4505	struct bpf_subprog_info *subprog = env->subprog_info;
 4506	struct bpf_insn *insn = env->prog->insnsi;
 4507	bool tail_call_reachable = false;
 4508	int ret_insn[MAX_CALL_FRAMES];
 4509	int ret_prog[MAX_CALL_FRAMES];
 4510	int j;
 4511
 4512process_func:
 4513	/* protect against potential stack overflow that might happen when
 4514	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
 4515	 * depth for such case down to 256 so that the worst case scenario
 4516	 * would result in 8k stack size (32 which is tailcall limit * 256 =
 4517	 * 8k).
 4518	 *
 4519	 * To get the idea what might happen, see an example:
 4520	 * func1 -> sub rsp, 128
 4521	 *  subfunc1 -> sub rsp, 256
 4522	 *  tailcall1 -> add rsp, 256
 4523	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
 4524	 *   subfunc2 -> sub rsp, 64
 4525	 *   subfunc22 -> sub rsp, 128
 4526	 *   tailcall2 -> add rsp, 128
 4527	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
 4528	 *
 4529	 * tailcall will unwind the current stack frame but it will not get rid
 4530	 * of caller's stack as shown on the example above.
 4531	 */
 4532	if (idx && subprog[idx].has_tail_call && depth >= 256) {
 4533		verbose(env,
 4534			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
 4535			depth);
 4536		return -EACCES;
 4537	}
 4538	/* round up to 32-bytes, since this is granularity
 4539	 * of interpreter stack size
 4540	 */
 4541	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
 4542	if (depth > MAX_BPF_STACK) {
 4543		verbose(env, "combined stack size of %d calls is %d. Too large\n",
 4544			frame + 1, depth);
 4545		return -EACCES;
 4546	}
 4547continue_func:
 4548	subprog_end = subprog[idx + 1].start;
 4549	for (; i < subprog_end; i++) {
 4550		int next_insn;
 4551
 4552		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
 4553			continue;
 4554		/* remember insn and function to return to */
 4555		ret_insn[frame] = i + 1;
 4556		ret_prog[frame] = idx;
 4557
 4558		/* find the callee */
 4559		next_insn = i + insn[i].imm + 1;
 4560		idx = find_subprog(env, next_insn);
 4561		if (idx < 0) {
 4562			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
 4563				  next_insn);
 4564			return -EFAULT;
 4565		}
 4566		if (subprog[idx].is_async_cb) {
 4567			if (subprog[idx].has_tail_call) {
 4568				verbose(env, "verifier bug. subprog has tail_call and async cb\n");
 4569				return -EFAULT;
 4570			}
 4571			 /* async callbacks don't increase bpf prog stack size */
 4572			continue;
 4573		}
 4574		i = next_insn;
 4575
 4576		if (subprog[idx].has_tail_call)
 4577			tail_call_reachable = true;
 4578
 4579		frame++;
 4580		if (frame >= MAX_CALL_FRAMES) {
 4581			verbose(env, "the call stack of %d frames is too deep !\n",
 4582				frame);
 4583			return -E2BIG;
 4584		}
 4585		goto process_func;
 4586	}
 4587	/* if tail call got detected across bpf2bpf calls then mark each of the
 4588	 * currently present subprog frames as tail call reachable subprogs;
 4589	 * this info will be utilized by JIT so that we will be preserving the
 4590	 * tail call counter throughout bpf2bpf calls combined with tailcalls
 4591	 */
 4592	if (tail_call_reachable)
 4593		for (j = 0; j < frame; j++)
 4594			subprog[ret_prog[j]].tail_call_reachable = true;
 4595	if (subprog[0].tail_call_reachable)
 4596		env->prog->aux->tail_call_reachable = true;
 4597
 4598	/* end of for() loop means the last insn of the 'subprog'
 4599	 * was reached. Doesn't matter whether it was JA or EXIT
 4600	 */
 4601	if (frame == 0)
 4602		return 0;
 4603	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
 4604	frame--;
 4605	i = ret_insn[frame];
 4606	idx = ret_prog[frame];
 4607	goto continue_func;
 4608}
 4609
 4610#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 4611static int get_callee_stack_depth(struct bpf_verifier_env *env,
 4612				  const struct bpf_insn *insn, int idx)
 4613{
 4614	int start = idx + insn->imm + 1, subprog;
 4615
 4616	subprog = find_subprog(env, start);
 4617	if (subprog < 0) {
 4618		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
 4619			  start);
 4620		return -EFAULT;
 4621	}
 4622	return env->subprog_info[subprog].stack_depth;
 4623}
 4624#endif
 4625
 4626static int __check_buffer_access(struct bpf_verifier_env *env,
 4627				 const char *buf_info,
 4628				 const struct bpf_reg_state *reg,
 4629				 int regno, int off, int size)
 4630{
 4631	if (off < 0) {
 4632		verbose(env,
 4633			"R%d invalid %s buffer access: off=%d, size=%d\n",
 4634			regno, buf_info, off, size);
 4635		return -EACCES;
 4636	}
 4637	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 4638		char tn_buf[48];
 4639
 4640		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 4641		verbose(env,
 4642			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
 4643			regno, off, tn_buf);
 4644		return -EACCES;
 4645	}
 4646
 4647	return 0;
 4648}
 4649
 4650static int check_tp_buffer_access(struct bpf_verifier_env *env,
 4651				  const struct bpf_reg_state *reg,
 4652				  int regno, int off, int size)
 4653{
 4654	int err;
 4655
 4656	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
 4657	if (err)
 4658		return err;
 4659
 4660	if (off + size > env->prog->aux->max_tp_access)
 4661		env->prog->aux->max_tp_access = off + size;
 4662
 4663	return 0;
 4664}
 4665
 4666static int check_buffer_access(struct bpf_verifier_env *env,
 4667			       const struct bpf_reg_state *reg,
 4668			       int regno, int off, int size,
 4669			       bool zero_size_allowed,
 4670			       u32 *max_access)
 4671{
 4672	const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
 4673	int err;
 4674
 4675	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
 4676	if (err)
 4677		return err;
 4678
 4679	if (off + size > *max_access)
 4680		*max_access = off + size;
 4681
 4682	return 0;
 4683}
 4684
 4685/* BPF architecture zero extends alu32 ops into 64-bit registesr */
 4686static void zext_32_to_64(struct bpf_reg_state *reg)
 4687{
 4688	reg->var_off = tnum_subreg(reg->var_off);
 4689	__reg_assign_32_into_64(reg);
 4690}
 4691
 4692/* truncate register to smaller size (in bytes)
 4693 * must be called with size < BPF_REG_SIZE
 4694 */
 4695static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 4696{
 4697	u64 mask;
 4698
 4699	/* clear high bits in bit representation */
 4700	reg->var_off = tnum_cast(reg->var_off, size);
 4701
 4702	/* fix arithmetic bounds */
 4703	mask = ((u64)1 << (size * 8)) - 1;
 4704	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
 4705		reg->umin_value &= mask;
 4706		reg->umax_value &= mask;
 4707	} else {
 4708		reg->umin_value = 0;
 4709		reg->umax_value = mask;
 4710	}
 4711	reg->smin_value = reg->umin_value;
 4712	reg->smax_value = reg->umax_value;
 4713
 4714	/* If size is smaller than 32bit register the 32bit register
 4715	 * values are also truncated so we push 64-bit bounds into
 4716	 * 32-bit bounds. Above were truncated < 32-bits already.
 4717	 */
 4718	if (size >= 4)
 4719		return;
 4720	__reg_combine_64_into_32(reg);
 4721}
 4722
 4723static bool bpf_map_is_rdonly(const struct bpf_map *map)
 4724{
 4725	/* A map is considered read-only if the following condition are true:
 4726	 *
 4727	 * 1) BPF program side cannot change any of the map content. The
 4728	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
 4729	 *    and was set at map creation time.
 4730	 * 2) The map value(s) have been initialized from user space by a
 4731	 *    loader and then "frozen", such that no new map update/delete
 4732	 *    operations from syscall side are possible for the rest of
 4733	 *    the map's lifetime from that point onwards.
 4734	 * 3) Any parallel/pending map update/delete operations from syscall
 4735	 *    side have been completed. Only after that point, it's safe to
 4736	 *    assume that map value(s) are immutable.
 4737	 */
 4738	return (map->map_flags & BPF_F_RDONLY_PROG) &&
 4739	       READ_ONCE(map->frozen) &&
 4740	       !bpf_map_write_active(map);
 4741}
 4742
 4743static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
 4744{
 4745	void *ptr;
 4746	u64 addr;
 4747	int err;
 4748
 4749	err = map->ops->map_direct_value_addr(map, &addr, off);
 4750	if (err)
 4751		return err;
 4752	ptr = (void *)(long)addr + off;
 4753
 4754	switch (size) {
 4755	case sizeof(u8):
 4756		*val = (u64)*(u8 *)ptr;
 4757		break;
 4758	case sizeof(u16):
 4759		*val = (u64)*(u16 *)ptr;
 4760		break;
 4761	case sizeof(u32):
 4762		*val = (u64)*(u32 *)ptr;
 4763		break;
 4764	case sizeof(u64):
 4765		*val = *(u64 *)ptr;
 4766		break;
 4767	default:
 4768		return -EINVAL;
 4769	}
 4770	return 0;
 4771}
 4772
 4773static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 4774				   struct bpf_reg_state *regs,
 4775				   int regno, int off, int size,
 4776				   enum bpf_access_type atype,
 4777				   int value_regno)
 4778{
 4779	struct bpf_reg_state *reg = regs + regno;
 4780	const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
 4781	const char *tname = btf_name_by_offset(reg->btf, t->name_off);
 4782	enum bpf_type_flag flag = 0;
 4783	u32 btf_id;
 4784	int ret;
 4785
 4786	if (!env->allow_ptr_leaks) {
 4787		verbose(env,
 4788			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
 4789			tname);
 4790		return -EPERM;
 4791	}
 4792	if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
 4793		verbose(env,
 4794			"Cannot access kernel 'struct %s' from non-GPL compatible program\n",
 4795			tname);
 4796		return -EINVAL;
 4797	}
 4798	if (off < 0) {
 4799		verbose(env,
 4800			"R%d is ptr_%s invalid negative access: off=%d\n",
 4801			regno, tname, off);
 4802		return -EACCES;
 4803	}
 4804	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 4805		char tn_buf[48];
 4806
 4807		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 4808		verbose(env,
 4809			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
 4810			regno, tname, off, tn_buf);
 4811		return -EACCES;
 4812	}
 4813
 4814	if (reg->type & MEM_USER) {
 4815		verbose(env,
 4816			"R%d is ptr_%s access user memory: off=%d\n",
 4817			regno, tname, off);
 4818		return -EACCES;
 4819	}
 4820
 4821	if (reg->type & MEM_PERCPU) {
 4822		verbose(env,
 4823			"R%d is ptr_%s access percpu memory: off=%d\n",
 4824			regno, tname, off);
 4825		return -EACCES;
 4826	}
 4827
 4828	if (env->ops->btf_struct_access && !type_is_alloc(reg->type)) {
 4829		if (!btf_is_kernel(reg->btf)) {
 4830			verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
 4831			return -EFAULT;
 4832		}
 4833		ret = env->ops->btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
 4834	} else {
 4835		/* Writes are permitted with default btf_struct_access for
 4836		 * program allocated objects (which always have ref_obj_id > 0),
 4837		 * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
 4838		 */
 4839		if (atype != BPF_READ && reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
 4840			verbose(env, "only read is supported\n");
 4841			return -EACCES;
 4842		}
 4843
 4844		if (type_is_alloc(reg->type) && !reg->ref_obj_id) {
 4845			verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
 4846			return -EFAULT;
 4847		}
 4848
 4849		ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag);
 4850	}
 4851
 4852	if (ret < 0)
 4853		return ret;
 4854
 4855	/* If this is an untrusted pointer, all pointers formed by walking it
 4856	 * also inherit the untrusted flag.
 4857	 */
 4858	if (type_flag(reg->type) & PTR_UNTRUSTED)
 4859		flag |= PTR_UNTRUSTED;
 4860
 4861	/* By default any pointer obtained from walking a trusted pointer is
 4862	 * no longer trusted except the rcu case below.
 4863	 */
 4864	flag &= ~PTR_TRUSTED;
 4865
 4866	if (flag & MEM_RCU) {
 4867		/* Mark value register as MEM_RCU only if it is protected by
 4868		 * bpf_rcu_read_lock() and the ptr reg is rcu or trusted. MEM_RCU
 4869		 * itself can already indicate trustedness inside the rcu
 4870		 * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
 4871		 * it could be null in some cases.
 4872		 */
 4873		if (!env->cur_state->active_rcu_lock ||
 4874		    !(is_trusted_reg(reg) || is_rcu_reg(reg)))
 4875			flag &= ~MEM_RCU;
 4876		else
 4877			flag |= PTR_MAYBE_NULL;
 4878	} else if (reg->type & MEM_RCU) {
 4879		/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
 4880		 * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
 4881		 */
 4882		flag |= PTR_UNTRUSTED;
 4883	}
 4884
 4885	if (atype == BPF_READ && value_regno >= 0)
 4886		mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
 4887
 4888	return 0;
 4889}
 4890
 4891static int check_ptr_to_map_access(struct bpf_verifier_env *env,
 4892				   struct bpf_reg_state *regs,
 4893				   int regno, int off, int size,
 4894				   enum bpf_access_type atype,
 4895				   int value_regno)
 4896{
 4897	struct bpf_reg_state *reg = regs + regno;
 4898	struct bpf_map *map = reg->map_ptr;
 4899	struct bpf_reg_state map_reg;
 4900	enum bpf_type_flag flag = 0;
 4901	const struct btf_type *t;
 4902	const char *tname;
 4903	u32 btf_id;
 4904	int ret;
 4905
 4906	if (!btf_vmlinux) {
 4907		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
 4908		return -ENOTSUPP;
 4909	}
 4910
 4911	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
 4912		verbose(env, "map_ptr access not supported for map type %d\n",
 4913			map->map_type);
 4914		return -ENOTSUPP;
 4915	}
 4916
 4917	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
 4918	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
 4919
 4920	if (!env->allow_ptr_leaks) {
 4921		verbose(env,
 4922			"'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
 4923			tname);
 4924		return -EPERM;
 4925	}
 4926
 4927	if (off < 0) {
 4928		verbose(env, "R%d is %s invalid negative access: off=%d\n",
 4929			regno, tname, off);
 4930		return -EACCES;
 4931	}
 4932
 4933	if (atype != BPF_READ) {
 4934		verbose(env, "only read from %s is supported\n", tname);
 4935		return -EACCES;
 4936	}
 4937
 4938	/* Simulate access to a PTR_TO_BTF_ID */
 4939	memset(&map_reg, 0, sizeof(map_reg));
 4940	mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
 4941	ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag);
 4942	if (ret < 0)
 4943		return ret;
 4944
 4945	if (value_regno >= 0)
 4946		mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
 4947
 4948	return 0;
 4949}
 4950
 4951/* Check that the stack access at the given offset is within bounds. The
 4952 * maximum valid offset is -1.
 4953 *
 4954 * The minimum valid offset is -MAX_BPF_STACK for writes, and
 4955 * -state->allocated_stack for reads.
 4956 */
 4957static int check_stack_slot_within_bounds(int off,
 4958					  struct bpf_func_state *state,
 4959					  enum bpf_access_type t)
 4960{
 4961	int min_valid_off;
 4962
 4963	if (t == BPF_WRITE)
 4964		min_valid_off = -MAX_BPF_STACK;
 4965	else
 4966		min_valid_off = -state->allocated_stack;
 4967
 4968	if (off < min_valid_off || off > -1)
 4969		return -EACCES;
 4970	return 0;
 4971}
 4972
 4973/* Check that the stack access at 'regno + off' falls within the maximum stack
 4974 * bounds.
 4975 *
 4976 * 'off' includes `regno->offset`, but not its dynamic part (if any).
 4977 */
 4978static int check_stack_access_within_bounds(
 4979		struct bpf_verifier_env *env,
 4980		int regno, int off, int access_size,
 4981		enum bpf_access_src src, enum bpf_access_type type)
 4982{
 4983	struct bpf_reg_state *regs = cur_regs(env);
 4984	struct bpf_reg_state *reg = regs + regno;
 4985	struct bpf_func_state *state = func(env, reg);
 4986	int min_off, max_off;
 4987	int err;
 4988	char *err_extra;
 4989
 4990	if (src == ACCESS_HELPER)
 4991		/* We don't know if helpers are reading or writing (or both). */
 4992		err_extra = " indirect access to";
 4993	else if (type == BPF_READ)
 4994		err_extra = " read from";
 4995	else
 4996		err_extra = " write to";
 4997
 4998	if (tnum_is_const(reg->var_off)) {
 4999		min_off = reg->var_off.value + off;
 5000		if (access_size > 0)
 5001			max_off = min_off + access_size - 1;
 5002		else
 5003			max_off = min_off;
 5004	} else {
 5005		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
 5006		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
 5007			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
 5008				err_extra, regno);
 5009			return -EACCES;
 5010		}
 5011		min_off = reg->smin_value + off;
 5012		if (access_size > 0)
 5013			max_off = reg->smax_value + off + access_size - 1;
 5014		else
 5015			max_off = min_off;
 5016	}
 5017
 5018	err = check_stack_slot_within_bounds(min_off, state, type);
 5019	if (!err)
 5020		err = check_stack_slot_within_bounds(max_off, state, type);
 5021
 5022	if (err) {
 5023		if (tnum_is_const(reg->var_off)) {
 5024			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
 5025				err_extra, regno, off, access_size);
 5026		} else {
 5027			char tn_buf[48];
 5028
 5029			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 5030			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
 5031				err_extra, regno, tn_buf, access_size);
 5032		}
 5033	}
 5034	return err;
 5035}
 5036
 5037/* check whether memory at (regno + off) is accessible for t = (read | write)
 5038 * if t==write, value_regno is a register which value is stored into memory
 5039 * if t==read, value_regno is a register which will receive the value from memory
 5040 * if t==write && value_regno==-1, some unknown value is stored into memory
 5041 * if t==read && value_regno==-1, don't care what we read from memory
 5042 */
 5043static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
 5044			    int off, int bpf_size, enum bpf_access_type t,
 5045			    int value_regno, bool strict_alignment_once)
 5046{
 5047	struct bpf_reg_state *regs = cur_regs(env);
 5048	struct bpf_reg_state *reg = regs + regno;
 5049	struct bpf_func_state *state;
 5050	int size, err = 0;
 5051
 5052	size = bpf_size_to_bytes(bpf_size);
 5053	if (size < 0)
 5054		return size;
 5055
 5056	/* alignment checks will add in reg->off themselves */
 5057	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
 5058	if (err)
 5059		return err;
 5060
 5061	/* for access checks, reg->off is just part of off */
 5062	off += reg->off;
 5063
 5064	if (reg->type == PTR_TO_MAP_KEY) {
 5065		if (t == BPF_WRITE) {
 5066			verbose(env, "write to change key R%d not allowed\n", regno);
 5067			return -EACCES;
 5068		}
 5069
 5070		err = check_mem_region_access(env, regno, off, size,
 5071					      reg->map_ptr->key_size, false);
 5072		if (err)
 5073			return err;
 5074		if (value_regno >= 0)
 5075			mark_reg_unknown(env, regs, value_regno);
 5076	} else if (reg->type == PTR_TO_MAP_VALUE) {
 5077		struct btf_field *kptr_field = NULL;
 5078
 5079		if (t == BPF_WRITE && value_regno >= 0 &&
 5080		    is_pointer_value(env, value_regno)) {
 5081			verbose(env, "R%d leaks addr into map\n", value_regno);
 5082			return -EACCES;
 5083		}
 5084		err = check_map_access_type(env, regno, off, size, t);
 5085		if (err)
 5086			return err;
 5087		err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
 5088		if (err)
 5089			return err;
 5090		if (tnum_is_const(reg->var_off))
 5091			kptr_field = btf_record_find(reg->map_ptr->record,
 5092						     off + reg->var_off.value, BPF_KPTR);
 5093		if (kptr_field) {
 5094			err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
 5095		} else if (t == BPF_READ && value_regno >= 0) {
 5096			struct bpf_map *map = reg->map_ptr;
 5097
 5098			/* if map is read-only, track its contents as scalars */
 5099			if (tnum_is_const(reg->var_off) &&
 5100			    bpf_map_is_rdonly(map) &&
 5101			    map->ops->map_direct_value_addr) {
 5102				int map_off = off + reg->var_off.value;
 5103				u64 val = 0;
 5104
 5105				err = bpf_map_direct_read(map, map_off, size,
 5106							  &val);
 5107				if (err)
 5108					return err;
 5109
 5110				regs[value_regno].type = SCALAR_VALUE;
 5111				__mark_reg_known(&regs[value_regno], val);
 5112			} else {
 5113				mark_reg_unknown(env, regs, value_regno);
 5114			}
 5115		}
 5116	} else if (base_type(reg->type) == PTR_TO_MEM) {
 5117		bool rdonly_mem = type_is_rdonly_mem(reg->type);
 5118
 5119		if (type_may_be_null(reg->type)) {
 5120			verbose(env, "R%d invalid mem access '%s'\n", regno,
 5121				reg_type_str(env, reg->type));
 5122			return -EACCES;
 5123		}
 5124
 5125		if (t == BPF_WRITE && rdonly_mem) {
 5126			verbose(env, "R%d cannot write into %s\n",
 5127				regno, reg_type_str(env, reg->type));
 5128			return -EACCES;
 5129		}
 5130
 5131		if (t == BPF_WRITE && value_regno >= 0 &&
 5132		    is_pointer_value(env, value_regno)) {
 5133			verbose(env, "R%d leaks addr into mem\n", value_regno);
 5134			return -EACCES;
 5135		}
 5136
 5137		err = check_mem_region_access(env, regno, off, size,
 5138					      reg->mem_size, false);
 5139		if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
 5140			mark_reg_unknown(env, regs, value_regno);
 5141	} else if (reg->type == PTR_TO_CTX) {
 5142		enum bpf_reg_type reg_type = SCALAR_VALUE;
 5143		struct btf *btf = NULL;
 5144		u32 btf_id = 0;
 5145
 5146		if (t == BPF_WRITE && value_regno >= 0 &&
 5147		    is_pointer_value(env, value_regno)) {
 5148			verbose(env, "R%d leaks addr into ctx\n", value_regno);
 5149			return -EACCES;
 5150		}
 5151
 5152		err = check_ptr_off_reg(env, reg, regno);
 5153		if (err < 0)
 5154			return err;
 5155
 5156		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
 5157				       &btf_id);
 5158		if (err)
 5159			verbose_linfo(env, insn_idx, "; ");
 5160		if (!err && t == BPF_READ && value_regno >= 0) {
 5161			/* ctx access returns either a scalar, or a
 5162			 * PTR_TO_PACKET[_META,_END]. In the latter
 5163			 * case, we know the offset is zero.
 5164			 */
 5165			if (reg_type == SCALAR_VALUE) {
 5166				mark_reg_unknown(env, regs, value_regno);
 5167			} else {
 5168				mark_reg_known_zero(env, regs,
 5169						    value_regno);
 5170				if (type_may_be_null(reg_type))
 5171					regs[value_regno].id = ++env->id_gen;
 5172				/* A load of ctx field could have different
 5173				 * actual load size with the one encoded in the
 5174				 * insn. When the dst is PTR, it is for sure not
 5175				 * a sub-register.
 5176				 */
 5177				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
 5178				if (base_type(reg_type) == PTR_TO_BTF_ID) {
 5179					regs[value_regno].btf = btf;
 5180					regs[value_regno].btf_id = btf_id;
 5181				}
 5182			}
 5183			regs[value_regno].type = reg_type;
 5184		}
 5185
 5186	} else if (reg->type == PTR_TO_STACK) {
 5187		/* Basic bounds checks. */
 5188		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
 5189		if (err)
 5190			return err;
 5191
 5192		state = func(env, reg);
 5193		err = update_stack_depth(env, state, off);
 5194		if (err)
 5195			return err;
 5196
 5197		if (t == BPF_READ)
 5198			err = check_stack_read(env, regno, off, size,
 5199					       value_regno);
 5200		else
 5201			err = check_stack_write(env, regno, off, size,
 5202						value_regno, insn_idx);
 5203	} else if (reg_is_pkt_pointer(reg)) {
 5204		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
 5205			verbose(env, "cannot write into packet\n");
 5206			return -EACCES;
 5207		}
 5208		if (t == BPF_WRITE && value_regno >= 0 &&
 5209		    is_pointer_value(env, value_regno)) {
 5210			verbose(env, "R%d leaks addr into packet\n",
 5211				value_regno);
 5212			return -EACCES;
 5213		}
 5214		err = check_packet_access(env, regno, off, size, false);
 5215		if (!err && t == BPF_READ && value_regno >= 0)
 5216			mark_reg_unknown(env, regs, value_regno);
 5217	} else if (reg->type == PTR_TO_FLOW_KEYS) {
 5218		if (t == BPF_WRITE && value_regno >= 0 &&
 5219		    is_pointer_value(env, value_regno)) {
 5220			verbose(env, "R%d leaks addr into flow keys\n",
 5221				value_regno);
 5222			return -EACCES;
 5223		}
 5224
 5225		err = check_flow_keys_access(env, off, size);
 5226		if (!err && t == BPF_READ && value_regno >= 0)
 5227			mark_reg_unknown(env, regs, value_regno);
 5228	} else if (type_is_sk_pointer(reg->type)) {
 5229		if (t == BPF_WRITE) {
 5230			verbose(env, "R%d cannot write into %s\n",
 5231				regno, reg_type_str(env, reg->type));
 5232			return -EACCES;
 5233		}
 5234		err = check_sock_access(env, insn_idx, regno, off, size, t);
 5235		if (!err && value_regno >= 0)
 5236			mark_reg_unknown(env, regs, value_regno);
 5237	} else if (reg->type == PTR_TO_TP_BUFFER) {
 5238		err = check_tp_buffer_access(env, reg, regno, off, size);
 5239		if (!err && t == BPF_READ && value_regno >= 0)
 5240			mark_reg_unknown(env, regs, value_regno);
 5241	} else if (base_type(reg->type) == PTR_TO_BTF_ID &&
 5242		   !type_may_be_null(reg->type)) {
 5243		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
 5244					      value_regno);
 5245	} else if (reg->type == CONST_PTR_TO_MAP) {
 5246		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
 5247					      value_regno);
 5248	} else if (base_type(reg->type) == PTR_TO_BUF) {
 5249		bool rdonly_mem = type_is_rdonly_mem(reg->type);
 5250		u32 *max_access;
 5251
 5252		if (rdonly_mem) {
 5253			if (t == BPF_WRITE) {
 5254				verbose(env, "R%d cannot write into %s\n",
 5255					regno, reg_type_str(env, reg->type));
 5256				return -EACCES;
 5257			}
 5258			max_access = &env->prog->aux->max_rdonly_access;
 5259		} else {
 5260			max_access = &env->prog->aux->max_rdwr_access;
 5261		}
 5262
 5263		err = check_buffer_access(env, reg, regno, off, size, false,
 5264					  max_access);
 5265
 5266		if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
 5267			mark_reg_unknown(env, regs, value_regno);
 5268	} else {
 5269		verbose(env, "R%d invalid mem access '%s'\n", regno,
 5270			reg_type_str(env, reg->type));
 5271		return -EACCES;
 5272	}
 5273
 5274	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
 5275	    regs[value_regno].type == SCALAR_VALUE) {
 5276		/* b/h/w load zero-extends, mark upper bits as known 0 */
 5277		coerce_reg_to_size(&regs[value_regno], size);
 5278	}
 5279	return err;
 5280}
 5281
 5282static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
 5283{
 5284	int load_reg;
 5285	int err;
 5286
 5287	switch (insn->imm) {
 5288	case BPF_ADD:
 5289	case BPF_ADD | BPF_FETCH:
 5290	case BPF_AND:
 5291	case BPF_AND | BPF_FETCH:
 5292	case BPF_OR:
 5293	case BPF_OR | BPF_FETCH:
 5294	case BPF_XOR:
 5295	case BPF_XOR | BPF_FETCH:
 5296	case BPF_XCHG:
 5297	case BPF_CMPXCHG:
 5298		break;
 5299	default:
 5300		verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
 5301		return -EINVAL;
 5302	}
 5303
 5304	if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
 5305		verbose(env, "invalid atomic operand size\n");
 5306		return -EINVAL;
 5307	}
 5308
 5309	/* check src1 operand */
 5310	err = check_reg_arg(env, insn->src_reg, SRC_OP);
 5311	if (err)
 5312		return err;
 5313
 5314	/* check src2 operand */
 5315	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 5316	if (err)
 5317		return err;
 5318
 5319	if (insn->imm == BPF_CMPXCHG) {
 5320		/* Check comparison of R0 with memory location */
 5321		const u32 aux_reg = BPF_REG_0;
 5322
 5323		err = check_reg_arg(env, aux_reg, SRC_OP);
 5324		if (err)
 5325			return err;
 5326
 5327		if (is_pointer_value(env, aux_reg)) {
 5328			verbose(env, "R%d leaks addr into mem\n", aux_reg);
 5329			return -EACCES;
 5330		}
 5331	}
 5332
 5333	if (is_pointer_value(env, insn->src_reg)) {
 5334		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
 5335		return -EACCES;
 5336	}
 5337
 5338	if (is_ctx_reg(env, insn->dst_reg) ||
 5339	    is_pkt_reg(env, insn->dst_reg) ||
 5340	    is_flow_key_reg(env, insn->dst_reg) ||
 5341	    is_sk_reg(env, insn->dst_reg)) {
 5342		verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
 5343			insn->dst_reg,
 5344			reg_type_str(env, reg_state(env, insn->dst_reg)->type));
 5345		return -EACCES;
 5346	}
 5347
 5348	if (insn->imm & BPF_FETCH) {
 5349		if (insn->imm == BPF_CMPXCHG)
 5350			load_reg = BPF_REG_0;
 5351		else
 5352			load_reg = insn->src_reg;
 5353
 5354		/* check and record load of old value */
 5355		err = check_reg_arg(env, load_reg, DST_OP);
 5356		if (err)
 5357			return err;
 5358	} else {
 5359		/* This instruction accesses a memory location but doesn't
 5360		 * actually load it into a register.
 5361		 */
 5362		load_reg = -1;
 5363	}
 5364
 5365	/* Check whether we can read the memory, with second call for fetch
 5366	 * case to simulate the register fill.
 5367	 */
 5368	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 5369			       BPF_SIZE(insn->code), BPF_READ, -1, true);
 5370	if (!err && load_reg >= 0)
 5371		err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 5372				       BPF_SIZE(insn->code), BPF_READ, load_reg,
 5373				       true);
 5374	if (err)
 5375		return err;
 5376
 5377	/* Check whether we can write into the same memory. */
 5378	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 5379			       BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 5380	if (err)
 5381		return err;
 5382
 5383	return 0;
 5384}
 5385
 5386/* When register 'regno' is used to read the stack (either directly or through
 5387 * a helper function) make sure that it's within stack boundary and, depending
 5388 * on the access type, that all elements of the stack are initialized.
 5389 *
 5390 * 'off' includes 'regno->off', but not its dynamic part (if any).
 5391 *
 5392 * All registers that have been spilled on the stack in the slots within the
 5393 * read offsets are marked as read.
 5394 */
 5395static int check_stack_range_initialized(
 5396		struct bpf_verifier_env *env, int regno, int off,
 5397		int access_size, bool zero_size_allowed,
 5398		enum bpf_access_src type, struct bpf_call_arg_meta *meta)
 5399{
 5400	struct bpf_reg_state *reg = reg_state(env, regno);
 5401	struct bpf_func_state *state = func(env, reg);
 5402	int err, min_off, max_off, i, j, slot, spi;
 5403	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
 5404	enum bpf_access_type bounds_check_type;
 5405	/* Some accesses can write anything into the stack, others are
 5406	 * read-only.
 5407	 */
 5408	bool clobber = false;
 5409
 5410	if (access_size == 0 && !zero_size_allowed) {
 5411		verbose(env, "invalid zero-sized read\n");
 5412		return -EACCES;
 5413	}
 5414
 5415	if (type == ACCESS_HELPER) {
 5416		/* The bounds checks for writes are more permissive than for
 5417		 * reads. However, if raw_mode is not set, we'll do extra
 5418		 * checks below.
 5419		 */
 5420		bounds_check_type = BPF_WRITE;
 5421		clobber = true;
 5422	} else {
 5423		bounds_check_type = BPF_READ;
 5424	}
 5425	err = check_stack_access_within_bounds(env, regno, off, access_size,
 5426					       type, bounds_check_type);
 5427	if (err)
 5428		return err;
 5429
 5430
 5431	if (tnum_is_const(reg->var_off)) {
 5432		min_off = max_off = reg->var_off.value + off;
 5433	} else {
 5434		/* Variable offset is prohibited for unprivileged mode for
 5435		 * simplicity since it requires corresponding support in
 5436		 * Spectre masking for stack ALU.
 5437		 * See also retrieve_ptr_limit().
 5438		 */
 5439		if (!env->bypass_spec_v1) {
 5440			char tn_buf[48];
 5441
 5442			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 5443			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
 5444				regno, err_extra, tn_buf);
 5445			return -EACCES;
 5446		}
 5447		/* Only initialized buffer on stack is allowed to be accessed
 5448		 * with variable offset. With uninitialized buffer it's hard to
 5449		 * guarantee that whole memory is marked as initialized on
 5450		 * helper return since specific bounds are unknown what may
 5451		 * cause uninitialized stack leaking.
 5452		 */
 5453		if (meta && meta->raw_mode)
 5454			meta = NULL;
 5455
 5456		min_off = reg->smin_value + off;
 5457		max_off = reg->smax_value + off;
 5458	}
 5459
 5460	if (meta && meta->raw_mode) {
 5461		meta->access_size = access_size;
 5462		meta->regno = regno;
 5463		return 0;
 5464	}
 5465
 5466	for (i = min_off; i < max_off + access_size; i++) {
 5467		u8 *stype;
 5468
 5469		slot = -i - 1;
 5470		spi = slot / BPF_REG_SIZE;
 5471		if (state->allocated_stack <= slot)
 5472			goto err;
 5473		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
 5474		if (*stype == STACK_MISC)
 5475			goto mark;
 5476		if (*stype == STACK_ZERO) {
 5477			if (clobber) {
 5478				/* helper can write anything into the stack */
 5479				*stype = STACK_MISC;
 5480			}
 5481			goto mark;
 5482		}
 5483
 5484		if (is_spilled_reg(&state->stack[spi]) &&
 5485		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
 5486		     env->allow_ptr_leaks)) {
 5487			if (clobber) {
 5488				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
 5489				for (j = 0; j < BPF_REG_SIZE; j++)
 5490					scrub_spilled_slot(&state->stack[spi].slot_type[j]);
 5491			}
 5492			goto mark;
 5493		}
 5494
 5495err:
 5496		if (tnum_is_const(reg->var_off)) {
 5497			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
 5498				err_extra, regno, min_off, i - min_off, access_size);
 5499		} else {
 5500			char tn_buf[48];
 5501
 5502			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 5503			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
 5504				err_extra, regno, tn_buf, i - min_off, access_size);
 5505		}
 5506		return -EACCES;
 5507mark:
 5508		/* reading any byte out of 8-byte 'spill_slot' will cause
 5509		 * the whole slot to be marked as 'read'
 5510		 */
 5511		mark_reg_read(env, &state->stack[spi].spilled_ptr,
 5512			      state->stack[spi].spilled_ptr.parent,
 5513			      REG_LIVE_READ64);
 5514		/* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
 5515		 * be sure that whether stack slot is written to or not. Hence,
 5516		 * we must still conservatively propagate reads upwards even if
 5517		 * helper may write to the entire memory range.
 5518		 */
 5519	}
 5520	return update_stack_depth(env, state, min_off);
 5521}
 5522
 5523static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 5524				   int access_size, bool zero_size_allowed,
 5525				   struct bpf_call_arg_meta *meta)
 5526{
 5527	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 5528	u32 *max_access;
 5529
 5530	switch (base_type(reg->type)) {
 5531	case PTR_TO_PACKET:
 5532	case PTR_TO_PACKET_META:
 5533		return check_packet_access(env, regno, reg->off, access_size,
 5534					   zero_size_allowed);
 5535	case PTR_TO_MAP_KEY:
 5536		if (meta && meta->raw_mode) {
 5537			verbose(env, "R%d cannot write into %s\n", regno,
 5538				reg_type_str(env, reg->type));
 5539			return -EACCES;
 5540		}
 5541		return check_mem_region_access(env, regno, reg->off, access_size,
 5542					       reg->map_ptr->key_size, false);
 5543	case PTR_TO_MAP_VALUE:
 5544		if (check_map_access_type(env, regno, reg->off, access_size,
 5545					  meta && meta->raw_mode ? BPF_WRITE :
 5546					  BPF_READ))
 5547			return -EACCES;
 5548		return check_map_access(env, regno, reg->off, access_size,
 5549					zero_size_allowed, ACCESS_HELPER);
 5550	case PTR_TO_MEM:
 5551		if (type_is_rdonly_mem(reg->type)) {
 5552			if (meta && meta->raw_mode) {
 5553				verbose(env, "R%d cannot write into %s\n", regno,
 5554					reg_type_str(env, reg->type));
 5555				return -EACCES;
 5556			}
 5557		}
 5558		return check_mem_region_access(env, regno, reg->off,
 5559					       access_size, reg->mem_size,
 5560					       zero_size_allowed);
 5561	case PTR_TO_BUF:
 5562		if (type_is_rdonly_mem(reg->type)) {
 5563			if (meta && meta->raw_mode) {
 5564				verbose(env, "R%d cannot write into %s\n", regno,
 5565					reg_type_str(env, reg->type));
 5566				return -EACCES;
 5567			}
 5568
 5569			max_access = &env->prog->aux->max_rdonly_access;
 5570		} else {
 5571			max_access = &env->prog->aux->max_rdwr_access;
 5572		}
 5573		return check_buffer_access(env, reg, regno, reg->off,
 5574					   access_size, zero_size_allowed,
 5575					   max_access);
 5576	case PTR_TO_STACK:
 5577		return check_stack_range_initialized(
 5578				env,
 5579				regno, reg->off, access_size,
 5580				zero_size_allowed, ACCESS_HELPER, meta);
 5581	case PTR_TO_CTX:
 5582		/* in case the function doesn't know how to access the context,
 5583		 * (because we are in a program of type SYSCALL for example), we
 5584		 * can not statically check its size.
 5585		 * Dynamically check it now.
 5586		 */
 5587		if (!env->ops->convert_ctx_access) {
 5588			enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
 5589			int offset = access_size - 1;
 5590
 5591			/* Allow zero-byte read from PTR_TO_CTX */
 5592			if (access_size == 0)
 5593				return zero_size_allowed ? 0 : -EACCES;
 5594
 5595			return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
 5596						atype, -1, false);
 5597		}
 5598
 5599		fallthrough;
 5600	default: /* scalar_value or invalid ptr */
 5601		/* Allow zero-byte read from NULL, regardless of pointer type */
 5602		if (zero_size_allowed && access_size == 0 &&
 5603		    register_is_null(reg))
 5604			return 0;
 5605
 5606		verbose(env, "R%d type=%s ", regno,
 5607			reg_type_str(env, reg->type));
 5608		verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
 5609		return -EACCES;
 5610	}
 5611}
 5612
 5613static int check_mem_size_reg(struct bpf_verifier_env *env,
 5614			      struct bpf_reg_state *reg, u32 regno,
 5615			      bool zero_size_allowed,
 5616			      struct bpf_call_arg_meta *meta)
 5617{
 5618	int err;
 5619
 5620	/* This is used to refine r0 return value bounds for helpers
 5621	 * that enforce this value as an upper bound on return values.
 5622	 * See do_refine_retval_range() for helpers that can refine
 5623	 * the return value. C type of helper is u32 so we pull register
 5624	 * bound from umax_value however, if negative verifier errors
 5625	 * out. Only upper bounds can be learned because retval is an
 5626	 * int type and negative retvals are allowed.
 5627	 */
 5628	meta->msize_max_value = reg->umax_value;
 5629
 5630	/* The register is SCALAR_VALUE; the access check
 5631	 * happens using its boundaries.
 5632	 */
 5633	if (!tnum_is_const(reg->var_off))
 5634		/* For unprivileged variable accesses, disable raw
 5635		 * mode so that the program is required to
 5636		 * initialize all the memory that the helper could
 5637		 * just partially fill up.
 5638		 */
 5639		meta = NULL;
 5640
 5641	if (reg->smin_value < 0) {
 5642		verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
 5643			regno);
 5644		return -EACCES;
 5645	}
 5646
 5647	if (reg->umin_value == 0) {
 5648		err = check_helper_mem_access(env, regno - 1, 0,
 5649					      zero_size_allowed,
 5650					      meta);
 5651		if (err)
 5652			return err;
 5653	}
 5654
 5655	if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
 5656		verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
 5657			regno);
 5658		return -EACCES;
 5659	}
 5660	err = check_helper_mem_access(env, regno - 1,
 5661				      reg->umax_value,
 5662				      zero_size_allowed, meta);
 5663	if (!err)
 5664		err = mark_chain_precision(env, regno);
 5665	return err;
 5666}
 5667
 5668int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 5669		   u32 regno, u32 mem_size)
 5670{
 5671	bool may_be_null = type_may_be_null(reg->type);
 5672	struct bpf_reg_state saved_reg;
 5673	struct bpf_call_arg_meta meta;
 5674	int err;
 5675
 5676	if (register_is_null(reg))
 5677		return 0;
 5678
 5679	memset(&meta, 0, sizeof(meta));
 5680	/* Assuming that the register contains a value check if the memory
 5681	 * access is safe. Temporarily save and restore the register's state as
 5682	 * the conversion shouldn't be visible to a caller.
 5683	 */
 5684	if (may_be_null) {
 5685		saved_reg = *reg;
 5686		mark_ptr_not_null_reg(reg);
 5687	}
 5688
 5689	err = check_helper_mem_access(env, regno, mem_size, true, &meta);
 5690	/* Check access for BPF_WRITE */
 5691	meta.raw_mode = true;
 5692	err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
 5693
 5694	if (may_be_null)
 5695		*reg = saved_reg;
 5696
 5697	return err;
 5698}
 5699
 5700static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 5701				    u32 regno)
 5702{
 5703	struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
 5704	bool may_be_null = type_may_be_null(mem_reg->type);
 5705	struct bpf_reg_state saved_reg;
 5706	struct bpf_call_arg_meta meta;
 5707	int err;
 5708
 5709	WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
 5710
 5711	memset(&meta, 0, sizeof(meta));
 5712
 5713	if (may_be_null) {
 5714		saved_reg = *mem_reg;
 5715		mark_ptr_not_null_reg(mem_reg);
 5716	}
 5717
 5718	err = check_mem_size_reg(env, reg, regno, true, &meta);
 5719	/* Check access for BPF_WRITE */
 5720	meta.raw_mode = true;
 5721	err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
 5722
 5723	if (may_be_null)
 5724		*mem_reg = saved_reg;
 5725	return err;
 5726}
 5727
 5728/* Implementation details:
 5729 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
 5730 * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
 5731 * Two bpf_map_lookups (even with the same key) will have different reg->id.
 5732 * Two separate bpf_obj_new will also have different reg->id.
 5733 * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
 5734 * clears reg->id after value_or_null->value transition, since the verifier only
 5735 * cares about the range of access to valid map value pointer and doesn't care
 5736 * about actual address of the map element.
 5737 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
 5738 * reg->id > 0 after value_or_null->value transition. By doing so
 5739 * two bpf_map_lookups will be considered two different pointers that
 5740 * point to different bpf_spin_locks. Likewise for pointers to allocated objects
 5741 * returned from bpf_obj_new.
 5742 * The verifier allows taking only one bpf_spin_lock at a time to avoid
 5743 * dead-locks.
 5744 * Since only one bpf_spin_lock is allowed the checks are simpler than
 5745 * reg_is_refcounted() logic. The verifier needs to remember only
 5746 * one spin_lock instead of array of acquired_refs.
 5747 * cur_state->active_lock remembers which map value element or allocated
 5748 * object got locked and clears it after bpf_spin_unlock.
 5749 */
 5750static int process_spin_lock(struct bpf_verifier_env *env, int regno,
 5751			     bool is_lock)
 5752{
 5753	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 5754	struct bpf_verifier_state *cur = env->cur_state;
 5755	bool is_const = tnum_is_const(reg->var_off);
 5756	u64 val = reg->var_off.value;
 5757	struct bpf_map *map = NULL;
 5758	struct btf *btf = NULL;
 5759	struct btf_record *rec;
 5760
 5761	if (!is_const) {
 5762		verbose(env,
 5763			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
 5764			regno);
 5765		return -EINVAL;
 5766	}
 5767	if (reg->type == PTR_TO_MAP_VALUE) {
 5768		map = reg->map_ptr;
 5769		if (!map->btf) {
 5770			verbose(env,
 5771				"map '%s' has to have BTF in order to use bpf_spin_lock\n",
 5772				map->name);
 5773			return -EINVAL;
 5774		}
 5775	} else {
 5776		btf = reg->btf;
 5777	}
 5778
 5779	rec = reg_btf_record(reg);
 5780	if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
 5781		verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
 5782			map ? map->name : "kptr");
 5783		return -EINVAL;
 5784	}
 5785	if (rec->spin_lock_off != val + reg->off) {
 5786		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
 5787			val + reg->off, rec->spin_lock_off);
 5788		return -EINVAL;
 5789	}
 5790	if (is_lock) {
 5791		if (cur->active_lock.ptr) {
 5792			verbose(env,
 5793				"Locking two bpf_spin_locks are not allowed\n");
 5794			return -EINVAL;
 5795		}
 5796		if (map)
 5797			cur->active_lock.ptr = map;
 5798		else
 5799			cur->active_lock.ptr = btf;
 5800		cur->active_lock.id = reg->id;
 5801	} else {
 5802		struct bpf_func_state *fstate = cur_func(env);
 5803		void *ptr;
 5804		int i;
 5805
 5806		if (map)
 5807			ptr = map;
 5808		else
 5809			ptr = btf;
 5810
 5811		if (!cur->active_lock.ptr) {
 5812			verbose(env, "bpf_spin_unlock without taking a lock\n");
 5813			return -EINVAL;
 5814		}
 5815		if (cur->active_lock.ptr != ptr ||
 5816		    cur->active_lock.id != reg->id) {
 5817			verbose(env, "bpf_spin_unlock of different lock\n");
 5818			return -EINVAL;
 5819		}
 5820		cur->active_lock.ptr = NULL;
 5821		cur->active_lock.id = 0;
 5822
 5823		for (i = fstate->acquired_refs - 1; i >= 0; i--) {
 5824			int err;
 5825
 5826			/* Complain on error because this reference state cannot
 5827			 * be freed before this point, as bpf_spin_lock critical
 5828			 * section does not allow functions that release the
 5829			 * allocated object immediately.
 5830			 */
 5831			if (!fstate->refs[i].release_on_unlock)
 5832				continue;
 5833			err = release_reference(env, fstate->refs[i].id);
 5834			if (err) {
 5835				verbose(env, "failed to release release_on_unlock reference");
 5836				return err;
 5837			}
 5838		}
 5839	}
 5840	return 0;
 5841}
 5842
 5843static int process_timer_func(struct bpf_verifier_env *env, int regno,
 5844			      struct bpf_call_arg_meta *meta)
 5845{
 5846	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 5847	bool is_const = tnum_is_const(reg->var_off);
 5848	struct bpf_map *map = reg->map_ptr;
 5849	u64 val = reg->var_off.value;
 5850
 5851	if (!is_const) {
 5852		verbose(env,
 5853			"R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
 5854			regno);
 5855		return -EINVAL;
 5856	}
 5857	if (!map->btf) {
 5858		verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
 5859			map->name);
 5860		return -EINVAL;
 5861	}
 5862	if (!btf_record_has_field(map->record, BPF_TIMER)) {
 5863		verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
 5864		return -EINVAL;
 5865	}
 5866	if (map->record->timer_off != val + reg->off) {
 5867		verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
 5868			val + reg->off, map->record->timer_off);
 5869		return -EINVAL;
 5870	}
 5871	if (meta->map_ptr) {
 5872		verbose(env, "verifier bug. Two map pointers in a timer helper\n");
 5873		return -EFAULT;
 5874	}
 5875	meta->map_uid = reg->map_uid;
 5876	meta->map_ptr = map;
 5877	return 0;
 5878}
 5879
 5880static int process_kptr_func(struct bpf_verifier_env *env, int regno,
 5881			     struct bpf_call_arg_meta *meta)
 5882{
 5883	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 5884	struct bpf_map *map_ptr = reg->map_ptr;
 5885	struct btf_field *kptr_field;
 5886	u32 kptr_off;
 5887
 5888	if (!tnum_is_const(reg->var_off)) {
 5889		verbose(env,
 5890			"R%d doesn't have constant offset. kptr has to be at the constant offset\n",
 5891			regno);
 5892		return -EINVAL;
 5893	}
 5894	if (!map_ptr->btf) {
 5895		verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
 5896			map_ptr->name);
 5897		return -EINVAL;
 5898	}
 5899	if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
 5900		verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
 5901		return -EINVAL;
 5902	}
 5903
 5904	meta->map_ptr = map_ptr;
 5905	kptr_off = reg->off + reg->var_off.value;
 5906	kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
 5907	if (!kptr_field) {
 5908		verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
 5909		return -EACCES;
 5910	}
 5911	if (kptr_field->type != BPF_KPTR_REF) {
 5912		verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
 5913		return -EACCES;
 5914	}
 5915	meta->kptr_field = kptr_field;
 5916	return 0;
 5917}
 5918
 5919/* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
 5920 * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
 5921 *
 5922 * In both cases we deal with the first 8 bytes, but need to mark the next 8
 5923 * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
 5924 * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
 5925 *
 5926 * Mutability of bpf_dynptr is at two levels, one is at the level of struct
 5927 * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
 5928 * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
 5929 * mutate the view of the dynptr and also possibly destroy it. In the latter
 5930 * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
 5931 * memory that dynptr points to.
 5932 *
 5933 * The verifier will keep track both levels of mutation (bpf_dynptr's in
 5934 * reg->type and the memory's in reg->dynptr.type), but there is no support for
 5935 * readonly dynptr view yet, hence only the first case is tracked and checked.
 5936 *
 5937 * This is consistent with how C applies the const modifier to a struct object,
 5938 * where the pointer itself inside bpf_dynptr becomes const but not what it
 5939 * points to.
 5940 *
 5941 * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
 5942 * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
 5943 */
 5944int process_dynptr_func(struct bpf_verifier_env *env, int regno,
 5945			enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
 5946{
 5947	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 5948
 5949	/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
 5950	 * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
 5951	 */
 5952	if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
 5953		verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
 5954		return -EFAULT;
 5955	}
 5956	/* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
 5957	 * check_func_arg_reg_off's logic. We only need to check offset
 5958	 * alignment for PTR_TO_STACK.
 5959	 */
 5960	if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
 5961		verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
 5962		return -EINVAL;
 5963	}
 5964	/*  MEM_UNINIT - Points to memory that is an appropriate candidate for
 5965	 *		 constructing a mutable bpf_dynptr object.
 5966	 *
 5967	 *		 Currently, this is only possible with PTR_TO_STACK
 5968	 *		 pointing to a region of at least 16 bytes which doesn't
 5969	 *		 contain an existing bpf_dynptr.
 5970	 *
 5971	 *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
 5972	 *		 mutated or destroyed. However, the memory it points to
 5973	 *		 may be mutated.
 5974	 *
 5975	 *  None       - Points to a initialized dynptr that can be mutated and
 5976	 *		 destroyed, including mutation of the memory it points
 5977	 *		 to.
 5978	 */
 5979	if (arg_type & MEM_UNINIT) {
 5980		if (!is_dynptr_reg_valid_uninit(env, reg)) {
 5981			verbose(env, "Dynptr has to be an uninitialized dynptr\n");
 5982			return -EINVAL;
 5983		}
 5984
 5985		/* We only support one dynptr being uninitialized at the moment,
 5986		 * which is sufficient for the helper functions we have right now.
 5987		 */
 5988		if (meta->uninit_dynptr_regno) {
 5989			verbose(env, "verifier internal error: multiple uninitialized dynptr args\n");
 5990			return -EFAULT;
 5991		}
 5992
 5993		meta->uninit_dynptr_regno = regno;
 5994	} else /* MEM_RDONLY and None case from above */ {
 5995		/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
 5996		if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
 5997			verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
 5998			return -EINVAL;
 5999		}
 6000
 6001		if (!is_dynptr_reg_valid_init(env, reg)) {
 6002			verbose(env,
 6003				"Expected an initialized dynptr as arg #%d\n",
 6004				regno);
 6005			return -EINVAL;
 6006		}
 6007
 6008		/* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
 6009		if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
 6010			const char *err_extra = "";
 6011
 6012			switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
 6013			case DYNPTR_TYPE_LOCAL:
 6014				err_extra = "local";
 6015				break;
 6016			case DYNPTR_TYPE_RINGBUF:
 6017				err_extra = "ringbuf";
 6018				break;
 6019			default:
 6020				err_extra = "<unknown>";
 6021				break;
 6022			}
 6023			verbose(env,
 6024				"Expected a dynptr of type %s as arg #%d\n",
 6025				err_extra, regno);
 6026			return -EINVAL;
 6027		}
 6028	}
 6029	return 0;
 6030}
 6031
 6032static bool arg_type_is_mem_size(enum bpf_arg_type type)
 6033{
 6034	return type == ARG_CONST_SIZE ||
 6035	       type == ARG_CONST_SIZE_OR_ZERO;
 6036}
 6037
 6038static bool arg_type_is_release(enum bpf_arg_type type)
 6039{
 6040	return type & OBJ_RELEASE;
 6041}
 6042
 6043static bool arg_type_is_dynptr(enum bpf_arg_type type)
 6044{
 6045	return base_type(type) == ARG_PTR_TO_DYNPTR;
 6046}
 6047
 6048static int int_ptr_type_to_size(enum bpf_arg_type type)
 6049{
 6050	if (type == ARG_PTR_TO_INT)
 6051		return sizeof(u32);
 6052	else if (type == ARG_PTR_TO_LONG)
 6053		return sizeof(u64);
 6054
 6055	return -EINVAL;
 6056}
 6057
 6058static int resolve_map_arg_type(struct bpf_verifier_env *env,
 6059				 const struct bpf_call_arg_meta *meta,
 6060				 enum bpf_arg_type *arg_type)
 6061{
 6062	if (!meta->map_ptr) {
 6063		/* kernel subsystem misconfigured verifier */
 6064		verbose(env, "invalid map_ptr to access map->type\n");
 6065		return -EACCES;
 6066	}
 6067
 6068	switch (meta->map_ptr->map_type) {
 6069	case BPF_MAP_TYPE_SOCKMAP:
 6070	case BPF_MAP_TYPE_SOCKHASH:
 6071		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
 6072			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
 6073		} else {
 6074			verbose(env, "invalid arg_type for sockmap/sockhash\n");
 6075			return -EINVAL;
 6076		}
 6077		break;
 6078	case BPF_MAP_TYPE_BLOOM_FILTER:
 6079		if (meta->func_id == BPF_FUNC_map_peek_elem)
 6080			*arg_type = ARG_PTR_TO_MAP_VALUE;
 6081		break;
 6082	default:
 6083		break;
 6084	}
 6085	return 0;
 6086}
 6087
 6088struct bpf_reg_types {
 6089	const enum bpf_reg_type types[10];
 6090	u32 *btf_id;
 6091};
 6092
 6093static const struct bpf_reg_types sock_types = {
 6094	.types = {
 6095		PTR_TO_SOCK_COMMON,
 6096		PTR_TO_SOCKET,
 6097		PTR_TO_TCP_SOCK,
 6098		PTR_TO_XDP_SOCK,
 6099	},
 6100};
 6101
 6102#ifdef CONFIG_NET
 6103static const struct bpf_reg_types btf_id_sock_common_types = {
 6104	.types = {
 6105		PTR_TO_SOCK_COMMON,
 6106		PTR_TO_SOCKET,
 6107		PTR_TO_TCP_SOCK,
 6108		PTR_TO_XDP_SOCK,
 6109		PTR_TO_BTF_ID,
 6110		PTR_TO_BTF_ID | PTR_TRUSTED,
 6111	},
 6112	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
 6113};
 6114#endif
 6115
 6116static const struct bpf_reg_types mem_types = {
 6117	.types = {
 6118		PTR_TO_STACK,
 6119		PTR_TO_PACKET,
 6120		PTR_TO_PACKET_META,
 6121		PTR_TO_MAP_KEY,
 6122		PTR_TO_MAP_VALUE,
 6123		PTR_TO_MEM,
 6124		PTR_TO_MEM | MEM_RINGBUF,
 6125		PTR_TO_BUF,
 6126	},
 6127};
 6128
 6129static const struct bpf_reg_types int_ptr_types = {
 6130	.types = {
 6131		PTR_TO_STACK,
 6132		PTR_TO_PACKET,
 6133		PTR_TO_PACKET_META,
 6134		PTR_TO_MAP_KEY,
 6135		PTR_TO_MAP_VALUE,
 6136	},
 6137};
 6138
 6139static const struct bpf_reg_types spin_lock_types = {
 6140	.types = {
 6141		PTR_TO_MAP_VALUE,
 6142		PTR_TO_BTF_ID | MEM_ALLOC,
 6143	}
 6144};
 6145
 6146static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
 6147static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
 6148static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
 6149static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
 6150static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
 6151static const struct bpf_reg_types btf_ptr_types = {
 6152	.types = {
 6153		PTR_TO_BTF_ID,
 6154		PTR_TO_BTF_ID | PTR_TRUSTED,
 6155		PTR_TO_BTF_ID | MEM_RCU,
 6156	},
 6157};
 6158static const struct bpf_reg_types percpu_btf_ptr_types = {
 6159	.types = {
 6160		PTR_TO_BTF_ID | MEM_PERCPU,
 6161		PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
 6162	}
 6163};
 6164static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
 6165static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
 6166static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
 6167static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 6168static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
 6169static const struct bpf_reg_types dynptr_types = {
 6170	.types = {
 6171		PTR_TO_STACK,
 6172		CONST_PTR_TO_DYNPTR,
 6173	}
 6174};
 6175
 6176static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 6177	[ARG_PTR_TO_MAP_KEY]		= &mem_types,
 6178	[ARG_PTR_TO_MAP_VALUE]		= &mem_types,
 6179	[ARG_CONST_SIZE]		= &scalar_types,
 6180	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
 6181	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
 6182	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
 6183	[ARG_PTR_TO_CTX]		= &context_types,
 6184	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
 6185#ifdef CONFIG_NET
 6186	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
 6187#endif
 6188	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
 6189	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
 6190	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
 6191	[ARG_PTR_TO_MEM]		= &mem_types,
 6192	[ARG_PTR_TO_RINGBUF_MEM]	= &ringbuf_mem_types,
 6193	[ARG_PTR_TO_INT]		= &int_ptr_types,
 6194	[ARG_PTR_TO_LONG]		= &int_ptr_types,
 6195	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
 6196	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
 6197	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
 6198	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
 6199	[ARG_PTR_TO_TIMER]		= &timer_types,
 6200	[ARG_PTR_TO_KPTR]		= &kptr_types,
 6201	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
 6202};
 6203
 6204static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
 6205			  enum bpf_arg_type arg_type,
 6206			  const u32 *arg_btf_id,
 6207			  struct bpf_call_arg_meta *meta)
 6208{
 6209	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 6210	enum bpf_reg_type expected, type = reg->type;
 6211	const struct bpf_reg_types *compatible;
 6212	int i, j;
 6213
 6214	compatible = compatible_reg_types[base_type(arg_type)];
 6215	if (!compatible) {
 6216		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
 6217		return -EFAULT;
 6218	}
 6219
 6220	/* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
 6221	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
 6222	 *
 6223	 * Same for MAYBE_NULL:
 6224	 *
 6225	 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
 6226	 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
 6227	 *
 6228	 * Therefore we fold these flags depending on the arg_type before comparison.
 6229	 */
 6230	if (arg_type & MEM_RDONLY)
 6231		type &= ~MEM_RDONLY;
 6232	if (arg_type & PTR_MAYBE_NULL)
 6233		type &= ~PTR_MAYBE_NULL;
 6234
 6235	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
 6236		expected = compatible->types[i];
 6237		if (expected == NOT_INIT)
 6238			break;
 6239
 6240		if (type == expected)
 6241			goto found;
 6242	}
 6243
 6244	verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
 6245	for (j = 0; j + 1 < i; j++)
 6246		verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
 6247	verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
 6248	return -EACCES;
 6249
 6250found:
 6251	if (reg->type == PTR_TO_BTF_ID || reg->type & PTR_TRUSTED) {
 6252		/* For bpf_sk_release, it needs to match against first member
 6253		 * 'struct sock_common', hence make an exception for it. This
 6254		 * allows bpf_sk_release to work for multiple socket types.
 6255		 */
 6256		bool strict_type_match = arg_type_is_release(arg_type) &&
 6257					 meta->func_id != BPF_FUNC_sk_release;
 6258
 6259		if (!arg_btf_id) {
 6260			if (!compatible->btf_id) {
 6261				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
 6262				return -EFAULT;
 6263			}
 6264			arg_btf_id = compatible->btf_id;
 6265		}
 6266
 6267		if (meta->func_id == BPF_FUNC_kptr_xchg) {
 6268			if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
 6269				return -EACCES;
 6270		} else {
 6271			if (arg_btf_id == BPF_PTR_POISON) {
 6272				verbose(env, "verifier internal error:");
 6273				verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
 6274					regno);
 6275				return -EACCES;
 6276			}
 6277
 6278			if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
 6279						  btf_vmlinux, *arg_btf_id,
 6280						  strict_type_match)) {
 6281				verbose(env, "R%d is of type %s but %s is expected\n",
 6282					regno, kernel_type_name(reg->btf, reg->btf_id),
 6283					kernel_type_name(btf_vmlinux, *arg_btf_id));
 6284				return -EACCES;
 6285			}
 6286		}
 6287	} else if (type_is_alloc(reg->type)) {
 6288		if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock) {
 6289			verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
 6290			return -EFAULT;
 6291		}
 6292	}
 6293
 6294	return 0;
 6295}
 6296
 6297int check_func_arg_reg_off(struct bpf_verifier_env *env,
 6298			   const struct bpf_reg_state *reg, int regno,
 6299			   enum bpf_arg_type arg_type)
 6300{
 6301	u32 type = reg->type;
 6302
 6303	/* When referenced register is passed to release function, its fixed
 6304	 * offset must be 0.
 6305	 *
 6306	 * We will check arg_type_is_release reg has ref_obj_id when storing
 6307	 * meta->release_regno.
 6308	 */
 6309	if (arg_type_is_release(arg_type)) {
 6310		/* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
 6311		 * may not directly point to the object being released, but to
 6312		 * dynptr pointing to such object, which might be at some offset
 6313		 * on the stack. In that case, we simply to fallback to the
 6314		 * default handling.
 6315		 */
 6316		if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
 6317			return 0;
 6318		/* Doing check_ptr_off_reg check for the offset will catch this
 6319		 * because fixed_off_ok is false, but checking here allows us
 6320		 * to give the user a better error message.
 6321		 */
 6322		if (reg->off) {
 6323			verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
 6324				regno);
 6325			return -EINVAL;
 6326		}
 6327		return __check_ptr_off_reg(env, reg, regno, false);
 6328	}
 6329
 6330	switch (type) {
 6331	/* Pointer types where both fixed and variable offset is explicitly allowed: */
 6332	case PTR_TO_STACK:
 6333	case PTR_TO_PACKET:
 6334	case PTR_TO_PACKET_META:
 6335	case PTR_TO_MAP_KEY:
 6336	case PTR_TO_MAP_VALUE:
 6337	case PTR_TO_MEM:
 6338	case PTR_TO_MEM | MEM_RDONLY:
 6339	case PTR_TO_MEM | MEM_RINGBUF:
 6340	case PTR_TO_BUF:
 6341	case PTR_TO_BUF | MEM_RDONLY:
 6342	case SCALAR_VALUE:
 6343		return 0;
 6344	/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
 6345	 * fixed offset.
 6346	 */
 6347	case PTR_TO_BTF_ID:
 6348	case PTR_TO_BTF_ID | MEM_ALLOC:
 6349	case PTR_TO_BTF_ID | PTR_TRUSTED:
 6350	case PTR_TO_BTF_ID | MEM_RCU:
 6351	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
 6352		/* When referenced PTR_TO_BTF_ID is passed to release function,
 6353		 * its fixed offset must be 0. In the other cases, fixed offset
 6354		 * can be non-zero. This was already checked above. So pass
 6355		 * fixed_off_ok as true to allow fixed offset for all other
 6356		 * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
 6357		 * still need to do checks instead of returning.
 6358		 */
 6359		return __check_ptr_off_reg(env, reg, regno, true);
 6360	default:
 6361		return __check_ptr_off_reg(env, reg, regno, false);
 6362	}
 6363}
 6364
 6365static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 6366{
 6367	struct bpf_func_state *state = func(env, reg);
 6368	int spi;
 6369
 6370	if (reg->type == CONST_PTR_TO_DYNPTR)
 6371		return reg->ref_obj_id;
 6372
 6373	spi = get_spi(reg->off);
 6374	return state->stack[spi].spilled_ptr.ref_obj_id;
 6375}
 6376
 6377static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 6378			  struct bpf_call_arg_meta *meta,
 6379			  const struct bpf_func_proto *fn)
 6380{
 6381	u32 regno = BPF_REG_1 + arg;
 6382	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 6383	enum bpf_arg_type arg_type = fn->arg_type[arg];
 6384	enum bpf_reg_type type = reg->type;
 6385	u32 *arg_btf_id = NULL;
 6386	int err = 0;
 6387
 6388	if (arg_type == ARG_DONTCARE)
 6389		return 0;
 6390
 6391	err = check_reg_arg(env, regno, SRC_OP);
 6392	if (err)
 6393		return err;
 6394
 6395	if (arg_type == ARG_ANYTHING) {
 6396		if (is_pointer_value(env, regno)) {
 6397			verbose(env, "R%d leaks addr into helper function\n",
 6398				regno);
 6399			return -EACCES;
 6400		}
 6401		return 0;
 6402	}
 6403
 6404	if (type_is_pkt_pointer(type) &&
 6405	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
 6406		verbose(env, "helper access to the packet is not allowed\n");
 6407		return -EACCES;
 6408	}
 6409
 6410	if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
 6411		err = resolve_map_arg_type(env, meta, &arg_type);
 6412		if (err)
 6413			return err;
 6414	}
 6415
 6416	if (register_is_null(reg) && type_may_be_null(arg_type))
 6417		/* A NULL register has a SCALAR_VALUE type, so skip
 6418		 * type checking.
 6419		 */
 6420		goto skip_type_check;
 6421
 6422	/* arg_btf_id and arg_size are in a union. */
 6423	if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
 6424	    base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
 6425		arg_btf_id = fn->arg_btf_id[arg];
 6426
 6427	err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
 6428	if (err)
 6429		return err;
 6430
 6431	err = check_func_arg_reg_off(env, reg, regno, arg_type);
 6432	if (err)
 6433		return err;
 6434
 6435skip_type_check:
 6436	if (arg_type_is_release(arg_type)) {
 6437		if (arg_type_is_dynptr(arg_type)) {
 6438			struct bpf_func_state *state = func(env, reg);
 6439			int spi;
 6440
 6441			/* Only dynptr created on stack can be released, thus
 6442			 * the get_spi and stack state checks for spilled_ptr
 6443			 * should only be done before process_dynptr_func for
 6444			 * PTR_TO_STACK.
 6445			 */
 6446			if (reg->type == PTR_TO_STACK) {
 6447				spi = get_spi(reg->off);
 6448				if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
 6449				    !state->stack[spi].spilled_ptr.ref_obj_id) {
 6450					verbose(env, "arg %d is an unacquired reference\n", regno);
 6451					return -EINVAL;
 6452				}
 6453			} else {
 6454				verbose(env, "cannot release unowned const bpf_dynptr\n");
 6455				return -EINVAL;
 6456			}
 6457		} else if (!reg->ref_obj_id && !register_is_null(reg)) {
 6458			verbose(env, "R%d must be referenced when passed to release function\n",
 6459				regno);
 6460			return -EINVAL;
 6461		}
 6462		if (meta->release_regno) {
 6463			verbose(env, "verifier internal error: more than one release argument\n");
 6464			return -EFAULT;
 6465		}
 6466		meta->release_regno = regno;
 6467	}
 6468
 6469	if (reg->ref_obj_id) {
 6470		if (meta->ref_obj_id) {
 6471			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
 6472				regno, reg->ref_obj_id,
 6473				meta->ref_obj_id);
 6474			return -EFAULT;
 6475		}
 6476		meta->ref_obj_id = reg->ref_obj_id;
 6477	}
 6478
 6479	switch (base_type(arg_type)) {
 6480	case ARG_CONST_MAP_PTR:
 6481		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
 6482		if (meta->map_ptr) {
 6483			/* Use map_uid (which is unique id of inner map) to reject:
 6484			 * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
 6485			 * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
 6486			 * if (inner_map1 && inner_map2) {
 6487			 *     timer = bpf_map_lookup_elem(inner_map1);
 6488			 *     if (timer)
 6489			 *         // mismatch would have been allowed
 6490			 *         bpf_timer_init(timer, inner_map2);
 6491			 * }
 6492			 *
 6493			 * Comparing map_ptr is enough to distinguish normal and outer maps.
 6494			 */
 6495			if (meta->map_ptr != reg->map_ptr ||
 6496			    meta->map_uid != reg->map_uid) {
 6497				verbose(env,
 6498					"timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
 6499					meta->map_uid, reg->map_uid);
 6500				return -EINVAL;
 6501			}
 6502		}
 6503		meta->map_ptr = reg->map_ptr;
 6504		meta->map_uid = reg->map_uid;
 6505		break;
 6506	case ARG_PTR_TO_MAP_KEY:
 6507		/* bpf_map_xxx(..., map_ptr, ..., key) call:
 6508		 * check that [key, key + map->key_size) are within
 6509		 * stack limits and initialized
 6510		 */
 6511		if (!meta->map_ptr) {
 6512			/* in function declaration map_ptr must come before
 6513			 * map_key, so that it's verified and known before
 6514			 * we have to check map_key here. Otherwise it means
 6515			 * that kernel subsystem misconfigured verifier
 6516			 */
 6517			verbose(env, "invalid map_ptr to access map->key\n");
 6518			return -EACCES;
 6519		}
 6520		err = check_helper_mem_access(env, regno,
 6521					      meta->map_ptr->key_size, false,
 6522					      NULL);
 6523		break;
 6524	case ARG_PTR_TO_MAP_VALUE:
 6525		if (type_may_be_null(arg_type) && register_is_null(reg))
 6526			return 0;
 6527
 6528		/* bpf_map_xxx(..., map_ptr, ..., value) call:
 6529		 * check [value, value + map->value_size) validity
 6530		 */
 6531		if (!meta->map_ptr) {
 6532			/* kernel subsystem misconfigured verifier */
 6533			verbose(env, "invalid map_ptr to access map->value\n");
 6534			return -EACCES;
 6535		}
 6536		meta->raw_mode = arg_type & MEM_UNINIT;
 6537		err = check_helper_mem_access(env, regno,
 6538					      meta->map_ptr->value_size, false,
 6539					      meta);
 6540		break;
 6541	case ARG_PTR_TO_PERCPU_BTF_ID:
 6542		if (!reg->btf_id) {
 6543			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
 6544			return -EACCES;
 6545		}
 6546		meta->ret_btf = reg->btf;
 6547		meta->ret_btf_id = reg->btf_id;
 6548		break;
 6549	case ARG_PTR_TO_SPIN_LOCK:
 6550		if (meta->func_id == BPF_FUNC_spin_lock) {
 6551			err = process_spin_lock(env, regno, true);
 6552			if (err)
 6553				return err;
 6554		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
 6555			err = process_spin_lock(env, regno, false);
 6556			if (err)
 6557				return err;
 6558		} else {
 6559			verbose(env, "verifier internal error\n");
 6560			return -EFAULT;
 6561		}
 6562		break;
 6563	case ARG_PTR_TO_TIMER:
 6564		err = process_timer_func(env, regno, meta);
 6565		if (err)
 6566			return err;
 6567		break;
 6568	case ARG_PTR_TO_FUNC:
 6569		meta->subprogno = reg->subprogno;
 6570		break;
 6571	case ARG_PTR_TO_MEM:
 6572		/* The access to this pointer is only checked when we hit the
 6573		 * next is_mem_size argument below.
 6574		 */
 6575		meta->raw_mode = arg_type & MEM_UNINIT;
 6576		if (arg_type & MEM_FIXED_SIZE) {
 6577			err = check_helper_mem_access(env, regno,
 6578						      fn->arg_size[arg], false,
 6579						      meta);
 6580		}
 6581		break;
 6582	case ARG_CONST_SIZE:
 6583		err = check_mem_size_reg(env, reg, regno, false, meta);
 6584		break;
 6585	case ARG_CONST_SIZE_OR_ZERO:
 6586		err = check_mem_size_reg(env, reg, regno, true, meta);
 6587		break;
 6588	case ARG_PTR_TO_DYNPTR:
 6589		err = process_dynptr_func(env, regno, arg_type, meta);
 6590		if (err)
 6591			return err;
 6592		break;
 6593	case ARG_CONST_ALLOC_SIZE_OR_ZERO:
 6594		if (!tnum_is_const(reg->var_off)) {
 6595			verbose(env, "R%d is not a known constant'\n",
 6596				regno);
 6597			return -EACCES;
 6598		}
 6599		meta->mem_size = reg->var_off.value;
 6600		err = mark_chain_precision(env, regno);
 6601		if (err)
 6602			return err;
 6603		break;
 6604	case ARG_PTR_TO_INT:
 6605	case ARG_PTR_TO_LONG:
 6606	{
 6607		int size = int_ptr_type_to_size(arg_type);
 6608
 6609		err = check_helper_mem_access(env, regno, size, false, meta);
 6610		if (err)
 6611			return err;
 6612		err = check_ptr_alignment(env, reg, 0, size, true);
 6613		break;
 6614	}
 6615	case ARG_PTR_TO_CONST_STR:
 6616	{
 6617		struct bpf_map *map = reg->map_ptr;
 6618		int map_off;
 6619		u64 map_addr;
 6620		char *str_ptr;
 6621
 6622		if (!bpf_map_is_rdonly(map)) {
 6623			verbose(env, "R%d does not point to a readonly map'\n", regno);
 6624			return -EACCES;
 6625		}
 6626
 6627		if (!tnum_is_const(reg->var_off)) {
 6628			verbose(env, "R%d is not a constant address'\n", regno);
 6629			return -EACCES;
 6630		}
 6631
 6632		if (!map->ops->map_direct_value_addr) {
 6633			verbose(env, "no direct value access support for this map type\n");
 6634			return -EACCES;
 6635		}
 6636
 6637		err = check_map_access(env, regno, reg->off,
 6638				       map->value_size - reg->off, false,
 6639				       ACCESS_HELPER);
 6640		if (err)
 6641			return err;
 6642
 6643		map_off = reg->off + reg->var_off.value;
 6644		err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
 6645		if (err) {
 6646			verbose(env, "direct value access on string failed\n");
 6647			return err;
 6648		}
 6649
 6650		str_ptr = (char *)(long)(map_addr);
 6651		if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
 6652			verbose(env, "string is not zero-terminated\n");
 6653			return -EINVAL;
 6654		}
 6655		break;
 6656	}
 6657	case ARG_PTR_TO_KPTR:
 6658		err = process_kptr_func(env, regno, meta);
 6659		if (err)
 6660			return err;
 6661		break;
 6662	}
 6663
 6664	return err;
 6665}
 6666
 6667static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
 6668{
 6669	enum bpf_attach_type eatype = env->prog->expected_attach_type;
 6670	enum bpf_prog_type type = resolve_prog_type(env->prog);
 6671
 6672	if (func_id != BPF_FUNC_map_update_elem)
 6673		return false;
 6674
 6675	/* It's not possible to get access to a locked struct sock in these
 6676	 * contexts, so updating is safe.
 6677	 */
 6678	switch (type) {
 6679	case BPF_PROG_TYPE_TRACING:
 6680		if (eatype == BPF_TRACE_ITER)
 6681			return true;
 6682		break;
 6683	case BPF_PROG_TYPE_SOCKET_FILTER:
 6684	case BPF_PROG_TYPE_SCHED_CLS:
 6685	case BPF_PROG_TYPE_SCHED_ACT:
 6686	case BPF_PROG_TYPE_XDP:
 6687	case BPF_PROG_TYPE_SK_REUSEPORT:
 6688	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 6689	case BPF_PROG_TYPE_SK_LOOKUP:
 6690		return true;
 6691	default:
 6692		break;
 6693	}
 6694
 6695	verbose(env, "cannot update sockmap in this context\n");
 6696	return false;
 6697}
 6698
 6699static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
 6700{
 6701	return env->prog->jit_requested &&
 6702	       bpf_jit_supports_subprog_tailcalls();
 6703}
 6704
 6705static int check_map_func_compatibility(struct bpf_verifier_env *env,
 6706					struct bpf_map *map, int func_id)
 6707{
 6708	if (!map)
 6709		return 0;
 6710
 6711	/* We need a two way check, first is from map perspective ... */
 6712	switch (map->map_type) {
 6713	case BPF_MAP_TYPE_PROG_ARRAY:
 6714		if (func_id != BPF_FUNC_tail_call)
 6715			goto error;
 6716		break;
 6717	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 6718		if (func_id != BPF_FUNC_perf_event_read &&
 6719		    func_id != BPF_FUNC_perf_event_output &&
 6720		    func_id != BPF_FUNC_skb_output &&
 6721		    func_id != BPF_FUNC_perf_event_read_value &&
 6722		    func_id != BPF_FUNC_xdp_output)
 6723			goto error;
 6724		break;
 6725	case BPF_MAP_TYPE_RINGBUF:
 6726		if (func_id != BPF_FUNC_ringbuf_output &&
 6727		    func_id != BPF_FUNC_ringbuf_reserve &&
 6728		    func_id != BPF_FUNC_ringbuf_query &&
 6729		    func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
 6730		    func_id != BPF_FUNC_ringbuf_submit_dynptr &&
 6731		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
 6732			goto error;
 6733		break;
 6734	case BPF_MAP_TYPE_USER_RINGBUF:
 6735		if (func_id != BPF_FUNC_user_ringbuf_drain)
 6736			goto error;
 6737		break;
 6738	case BPF_MAP_TYPE_STACK_TRACE:
 6739		if (func_id != BPF_FUNC_get_stackid)
 6740			goto error;
 6741		break;
 6742	case BPF_MAP_TYPE_CGROUP_ARRAY:
 6743		if (func_id != BPF_FUNC_skb_under_cgroup &&
 6744		    func_id != BPF_FUNC_current_task_under_cgroup)
 6745			goto error;
 6746		break;
 6747	case BPF_MAP_TYPE_CGROUP_STORAGE:
 6748	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
 6749		if (func_id != BPF_FUNC_get_local_storage)
 6750			goto error;
 6751		break;
 6752	case BPF_MAP_TYPE_DEVMAP:
 6753	case BPF_MAP_TYPE_DEVMAP_HASH:
 6754		if (func_id != BPF_FUNC_redirect_map &&
 6755		    func_id != BPF_FUNC_map_lookup_elem)
 6756			goto error;
 6757		break;
 6758	/* Restrict bpf side of cpumap and xskmap, open when use-cases
 6759	 * appear.
 6760	 */
 6761	case BPF_MAP_TYPE_CPUMAP:
 6762		if (func_id != BPF_FUNC_redirect_map)
 6763			goto error;
 6764		break;
 6765	case BPF_MAP_TYPE_XSKMAP:
 6766		if (func_id != BPF_FUNC_redirect_map &&
 6767		    func_id != BPF_FUNC_map_lookup_elem)
 6768			goto error;
 6769		break;
 6770	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 6771	case BPF_MAP_TYPE_HASH_OF_MAPS:
 6772		if (func_id != BPF_FUNC_map_lookup_elem)
 6773			goto error;
 6774		break;
 6775	case BPF_MAP_TYPE_SOCKMAP:
 6776		if (func_id != BPF_FUNC_sk_redirect_map &&
 6777		    func_id != BPF_FUNC_sock_map_update &&
 6778		    func_id != BPF_FUNC_map_delete_elem &&
 6779		    func_id != BPF_FUNC_msg_redirect_map &&
 6780		    func_id != BPF_FUNC_sk_select_reuseport &&
 6781		    func_id != BPF_FUNC_map_lookup_elem &&
 6782		    !may_update_sockmap(env, func_id))
 6783			goto error;
 6784		break;
 6785	case BPF_MAP_TYPE_SOCKHASH:
 6786		if (func_id != BPF_FUNC_sk_redirect_hash &&
 6787		    func_id != BPF_FUNC_sock_hash_update &&
 6788		    func_id != BPF_FUNC_map_delete_elem &&
 6789		    func_id != BPF_FUNC_msg_redirect_hash &&
 6790		    func_id != BPF_FUNC_sk_select_reuseport &&
 6791		    func_id != BPF_FUNC_map_lookup_elem &&
 6792		    !may_update_sockmap(env, func_id))
 6793			goto error;
 6794		break;
 6795	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
 6796		if (func_id != BPF_FUNC_sk_select_reuseport)
 6797			goto error;
 6798		break;
 6799	case BPF_MAP_TYPE_QUEUE:
 6800	case BPF_MAP_TYPE_STACK:
 6801		if (func_id != BPF_FUNC_map_peek_elem &&
 6802		    func_id != BPF_FUNC_map_pop_elem &&
 6803		    func_id != BPF_FUNC_map_push_elem)
 6804			goto error;
 6805		break;
 6806	case BPF_MAP_TYPE_SK_STORAGE:
 6807		if (func_id != BPF_FUNC_sk_storage_get &&
 6808		    func_id != BPF_FUNC_sk_storage_delete)
 6809			goto error;
 6810		break;
 6811	case BPF_MAP_TYPE_INODE_STORAGE:
 6812		if (func_id != BPF_FUNC_inode_storage_get &&
 6813		    func_id != BPF_FUNC_inode_storage_delete)
 6814			goto error;
 6815		break;
 6816	case BPF_MAP_TYPE_TASK_STORAGE:
 6817		if (func_id != BPF_FUNC_task_storage_get &&
 6818		    func_id != BPF_FUNC_task_storage_delete)
 6819			goto error;
 6820		break;
 6821	case BPF_MAP_TYPE_CGRP_STORAGE:
 6822		if (func_id != BPF_FUNC_cgrp_storage_get &&
 6823		    func_id != BPF_FUNC_cgrp_storage_delete)
 6824			goto error;
 6825		break;
 6826	case BPF_MAP_TYPE_BLOOM_FILTER:
 6827		if (func_id != BPF_FUNC_map_peek_elem &&
 6828		    func_id != BPF_FUNC_map_push_elem)
 6829			goto error;
 6830		break;
 6831	default:
 6832		break;
 6833	}
 6834
 6835	/* ... and second from the function itself. */
 6836	switch (func_id) {
 6837	case BPF_FUNC_tail_call:
 6838		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
 6839			goto error;
 6840		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
 6841			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
 6842			return -EINVAL;
 6843		}
 6844		break;
 6845	case BPF_FUNC_perf_event_read:
 6846	case BPF_FUNC_perf_event_output:
 6847	case BPF_FUNC_perf_event_read_value:
 6848	case BPF_FUNC_skb_output:
 6849	case BPF_FUNC_xdp_output:
 6850		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
 6851			goto error;
 6852		break;
 6853	case BPF_FUNC_ringbuf_output:
 6854	case BPF_FUNC_ringbuf_reserve:
 6855	case BPF_FUNC_ringbuf_query:
 6856	case BPF_FUNC_ringbuf_reserve_dynptr:
 6857	case BPF_FUNC_ringbuf_submit_dynptr:
 6858	case BPF_FUNC_ringbuf_discard_dynptr:
 6859		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
 6860			goto error;
 6861		break;
 6862	case BPF_FUNC_user_ringbuf_drain:
 6863		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
 6864			goto error;
 6865		break;
 6866	case BPF_FUNC_get_stackid:
 6867		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 6868			goto error;
 6869		break;
 6870	case BPF_FUNC_current_task_under_cgroup:
 6871	case BPF_FUNC_skb_under_cgroup:
 6872		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 6873			goto error;
 6874		break;
 6875	case BPF_FUNC_redirect_map:
 6876		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
 6877		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
 6878		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
 6879		    map->map_type != BPF_MAP_TYPE_XSKMAP)
 6880			goto error;
 6881		break;
 6882	case BPF_FUNC_sk_redirect_map:
 6883	case BPF_FUNC_msg_redirect_map:
 6884	case BPF_FUNC_sock_map_update:
 6885		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
 6886			goto error;
 6887		break;
 6888	case BPF_FUNC_sk_redirect_hash:
 6889	case BPF_FUNC_msg_redirect_hash:
 6890	case BPF_FUNC_sock_hash_update:
 6891		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
 6892			goto error;
 6893		break;
 6894	case BPF_FUNC_get_local_storage:
 6895		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 6896		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
 6897			goto error;
 6898		break;
 6899	case BPF_FUNC_sk_select_reuseport:
 6900		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
 6901		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
 6902		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
 6903			goto error;
 6904		break;
 6905	case BPF_FUNC_map_pop_elem:
 6906		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
 6907		    map->map_type != BPF_MAP_TYPE_STACK)
 6908			goto error;
 6909		break;
 6910	case BPF_FUNC_map_peek_elem:
 6911	case BPF_FUNC_map_push_elem:
 6912		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
 6913		    map->map_type != BPF_MAP_TYPE_STACK &&
 6914		    map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
 6915			goto error;
 6916		break;
 6917	case BPF_FUNC_map_lookup_percpu_elem:
 6918		if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
 6919		    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
 6920		    map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
 6921			goto error;
 6922		break;
 6923	case BPF_FUNC_sk_storage_get:
 6924	case BPF_FUNC_sk_storage_delete:
 6925		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 6926			goto error;
 6927		break;
 6928	case BPF_FUNC_inode_storage_get:
 6929	case BPF_FUNC_inode_storage_delete:
 6930		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
 6931			goto error;
 6932		break;
 6933	case BPF_FUNC_task_storage_get:
 6934	case BPF_FUNC_task_storage_delete:
 6935		if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
 6936			goto error;
 6937		break;
 6938	case BPF_FUNC_cgrp_storage_get:
 6939	case BPF_FUNC_cgrp_storage_delete:
 6940		if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
 6941			goto error;
 6942		break;
 6943	default:
 6944		break;
 6945	}
 6946
 6947	return 0;
 6948error:
 6949	verbose(env, "cannot pass map_type %d into func %s#%d\n",
 6950		map->map_type, func_id_name(func_id), func_id);
 6951	return -EINVAL;
 6952}
 6953
 6954static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
 6955{
 6956	int count = 0;
 6957
 6958	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
 6959		count++;
 6960	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
 6961		count++;
 6962	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
 6963		count++;
 6964	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
 6965		count++;
 6966	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
 6967		count++;
 6968
 6969	/* We only support one arg being in raw mode at the moment,
 6970	 * which is sufficient for the helper functions we have
 6971	 * right now.
 6972	 */
 6973	return count <= 1;
 6974}
 6975
 6976static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
 6977{
 6978	bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
 6979	bool has_size = fn->arg_size[arg] != 0;
 6980	bool is_next_size = false;
 6981
 6982	if (arg + 1 < ARRAY_SIZE(fn->arg_type))
 6983		is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
 6984
 6985	if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
 6986		return is_next_size;
 6987
 6988	return has_size == is_next_size || is_next_size == is_fixed;
 6989}
 6990
 6991static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
 6992{
 6993	/* bpf_xxx(..., buf, len) call will access 'len'
 6994	 * bytes from memory 'buf'. Both arg types need
 6995	 * to be paired, so make sure there's no buggy
 6996	 * helper function specification.
 6997	 */
 6998	if (arg_type_is_mem_size(fn->arg1_type) ||
 6999	    check_args_pair_invalid(fn, 0) ||
 7000	    check_args_pair_invalid(fn, 1) ||
 7001	    check_args_pair_invalid(fn, 2) ||
 7002	    check_args_pair_invalid(fn, 3) ||
 7003	    check_args_pair_invalid(fn, 4))
 7004		return false;
 7005
 7006	return true;
 7007}
 7008
 7009static bool check_btf_id_ok(const struct bpf_func_proto *fn)
 7010{
 7011	int i;
 7012
 7013	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
 7014		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
 7015			return !!fn->arg_btf_id[i];
 7016		if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
 7017			return fn->arg_btf_id[i] == BPF_PTR_POISON;
 7018		if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
 7019		    /* arg_btf_id and arg_size are in a union. */
 7020		    (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
 7021		     !(fn->arg_type[i] & MEM_FIXED_SIZE)))
 7022			return false;
 7023	}
 7024
 7025	return true;
 7026}
 7027
 7028static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
 7029{
 7030	return check_raw_mode_ok(fn) &&
 7031	       check_arg_pair_ok(fn) &&
 7032	       check_btf_id_ok(fn) ? 0 : -EINVAL;
 7033}
 7034
 7035/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
 7036 * are now invalid, so turn them into unknown SCALAR_VALUE.
 7037 */
 7038static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 7039{
 7040	struct bpf_func_state *state;
 7041	struct bpf_reg_state *reg;
 7042
 7043	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
 7044		if (reg_is_pkt_pointer_any(reg))
 7045			__mark_reg_unknown(env, reg);
 7046	}));
 7047}
 7048
 7049enum {
 7050	AT_PKT_END = -1,
 7051	BEYOND_PKT_END = -2,
 7052};
 7053
 7054static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
 7055{
 7056	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 7057	struct bpf_reg_state *reg = &state->regs[regn];
 7058
 7059	if (reg->type != PTR_TO_PACKET)
 7060		/* PTR_TO_PACKET_META is not supported yet */
 7061		return;
 7062
 7063	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
 7064	 * How far beyond pkt_end it goes is unknown.
 7065	 * if (!range_open) it's the case of pkt >= pkt_end
 7066	 * if (range_open) it's the case of pkt > pkt_end
 7067	 * hence this pointer is at least 1 byte bigger than pkt_end
 7068	 */
 7069	if (range_open)
 7070		reg->range = BEYOND_PKT_END;
 7071	else
 7072		reg->range = AT_PKT_END;
 7073}
 7074
 7075/* The pointer with the specified id has released its reference to kernel
 7076 * resources. Identify all copies of the same pointer and clear the reference.
 7077 */
 7078static int release_reference(struct bpf_verifier_env *env,
 7079			     int ref_obj_id)
 7080{
 7081	struct bpf_func_state *state;
 7082	struct bpf_reg_state *reg;
 7083	int err;
 7084
 7085	err = release_reference_state(cur_func(env), ref_obj_id);
 7086	if (err)
 7087		return err;
 7088
 7089	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
 7090		if (reg->ref_obj_id == ref_obj_id) {
 7091			if (!env->allow_ptr_leaks)
 7092				__mark_reg_not_init(env, reg);
 7093			else
 7094				__mark_reg_unknown(env, reg);
 7095		}
 7096	}));
 7097
 7098	return 0;
 7099}
 7100
 7101static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 7102				    struct bpf_reg_state *regs)
 7103{
 7104	int i;
 7105
 7106	/* after the call registers r0 - r5 were scratched */
 7107	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 7108		mark_reg_not_init(env, regs, caller_saved[i]);
 7109		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 7110	}
 7111}
 7112
 7113typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
 7114				   struct bpf_func_state *caller,
 7115				   struct bpf_func_state *callee,
 7116				   int insn_idx);
 7117
 7118static int set_callee_state(struct bpf_verifier_env *env,
 7119			    struct bpf_func_state *caller,
 7120			    struct bpf_func_state *callee, int insn_idx);
 7121
 7122static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 7123			     int *insn_idx, int subprog,
 7124			     set_callee_state_fn set_callee_state_cb)
 7125{
 7126	struct bpf_verifier_state *state = env->cur_state;
 7127	struct bpf_func_info_aux *func_info_aux;
 7128	struct bpf_func_state *caller, *callee;
 7129	int err;
 7130	bool is_global = false;
 7131
 7132	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
 7133		verbose(env, "the call stack of %d frames is too deep\n",
 7134			state->curframe + 2);
 7135		return -E2BIG;
 7136	}
 7137
 7138	caller = state->frame[state->curframe];
 7139	if (state->frame[state->curframe + 1]) {
 7140		verbose(env, "verifier bug. Frame %d already allocated\n",
 7141			state->curframe + 1);
 7142		return -EFAULT;
 7143	}
 7144
 7145	func_info_aux = env->prog->aux->func_info_aux;
 7146	if (func_info_aux)
 7147		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
 7148	err = btf_check_subprog_call(env, subprog, caller->regs);
 7149	if (err == -EFAULT)
 7150		return err;
 7151	if (is_global) {
 7152		if (err) {
 7153			verbose(env, "Caller passes invalid args into func#%d\n",
 7154				subprog);
 7155			return err;
 7156		} else {
 7157			if (env->log.level & BPF_LOG_LEVEL)
 7158				verbose(env,
 7159					"Func#%d is global and valid. Skipping.\n",
 7160					subprog);
 7161			clear_caller_saved_regs(env, caller->regs);
 7162
 7163			/* All global functions return a 64-bit SCALAR_VALUE */
 7164			mark_reg_unknown(env, caller->regs, BPF_REG_0);
 7165			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
 7166
 7167			/* continue with next insn after call */
 7168			return 0;
 7169		}
 7170	}
 7171
 7172	/* set_callee_state is used for direct subprog calls, but we are
 7173	 * interested in validating only BPF helpers that can call subprogs as
 7174	 * callbacks
 7175	 */
 7176	if (set_callee_state_cb != set_callee_state && !is_callback_calling_function(insn->imm)) {
 7177		verbose(env, "verifier bug: helper %s#%d is not marked as callback-calling\n",
 7178			func_id_name(insn->imm), insn->imm);
 7179		return -EFAULT;
 7180	}
 7181
 7182	if (insn->code == (BPF_JMP | BPF_CALL) &&
 7183	    insn->src_reg == 0 &&
 7184	    insn->imm == BPF_FUNC_timer_set_callback) {
 7185		struct bpf_verifier_state *async_cb;
 7186
 7187		/* there is no real recursion here. timer callbacks are async */
 7188		env->subprog_info[subprog].is_async_cb = true;
 7189		async_cb = push_async_cb(env, env->subprog_info[subprog].start,
 7190					 *insn_idx, subprog);
 7191		if (!async_cb)
 7192			return -EFAULT;
 7193		callee = async_cb->frame[0];
 7194		callee->async_entry_cnt = caller->async_entry_cnt + 1;
 7195
 7196		/* Convert bpf_timer_set_callback() args into timer callback args */
 7197		err = set_callee_state_cb(env, caller, callee, *insn_idx);
 7198		if (err)
 7199			return err;
 7200
 7201		clear_caller_saved_regs(env, caller->regs);
 7202		mark_reg_unknown(env, caller->regs, BPF_REG_0);
 7203		caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
 7204		/* continue with next insn after call */
 7205		return 0;
 7206	}
 7207
 7208	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
 7209	if (!callee)
 7210		return -ENOMEM;
 7211	state->frame[state->curframe + 1] = callee;
 7212
 7213	/* callee cannot access r0, r6 - r9 for reading and has to write
 7214	 * into its own stack before reading from it.
 7215	 * callee can read/write into caller's stack
 7216	 */
 7217	init_func_state(env, callee,
 7218			/* remember the callsite, it will be used by bpf_exit */
 7219			*insn_idx /* callsite */,
 7220			state->curframe + 1 /* frameno within this callchain */,
 7221			subprog /* subprog number within this prog */);
 7222
 7223	/* Transfer references to the callee */
 7224	err = copy_reference_state(callee, caller);
 7225	if (err)
 7226		goto err_out;
 7227
 7228	err = set_callee_state_cb(env, caller, callee, *insn_idx);
 7229	if (err)
 7230		goto err_out;
 7231
 7232	clear_caller_saved_regs(env, caller->regs);
 7233
 7234	/* only increment it after check_reg_arg() finished */
 7235	state->curframe++;
 7236
 7237	/* and go analyze first insn of the callee */
 7238	*insn_idx = env->subprog_info[subprog].start - 1;
 7239
 7240	if (env->log.level & BPF_LOG_LEVEL) {
 7241		verbose(env, "caller:\n");
 7242		print_verifier_state(env, caller, true);
 7243		verbose(env, "callee:\n");
 7244		print_verifier_state(env, callee, true);
 7245	}
 7246	return 0;
 7247
 7248err_out:
 7249	free_func_state(callee);
 7250	state->frame[state->curframe + 1] = NULL;
 7251	return err;
 7252}
 7253
 7254int map_set_for_each_callback_args(struct bpf_verifier_env *env,
 7255				   struct bpf_func_state *caller,
 7256				   struct bpf_func_state *callee)
 7257{
 7258	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
 7259	 *      void *callback_ctx, u64 flags);
 7260	 * callback_fn(struct bpf_map *map, void *key, void *value,
 7261	 *      void *callback_ctx);
 7262	 */
 7263	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
 7264
 7265	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
 7266	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
 7267	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
 7268
 7269	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
 7270	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
 7271	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
 7272
 7273	/* pointer to stack or null */
 7274	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
 7275
 7276	/* unused */
 7277	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 7278	return 0;
 7279}
 7280
 7281static int set_callee_state(struct bpf_verifier_env *env,
 7282			    struct bpf_func_state *caller,
 7283			    struct bpf_func_state *callee, int insn_idx)
 7284{
 7285	int i;
 7286
 7287	/* copy r1 - r5 args that callee can access.  The copy includes parent
 7288	 * pointers, which connects us up to the liveness chain
 7289	 */
 7290	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 7291		callee->regs[i] = caller->regs[i];
 7292	return 0;
 7293}
 7294
 7295static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 7296			   int *insn_idx)
 7297{
 7298	int subprog, target_insn;
 7299
 7300	target_insn = *insn_idx + insn->imm + 1;
 7301	subprog = find_subprog(env, target_insn);
 7302	if (subprog < 0) {
 7303		verbose(env, "verifier bug. No program starts at insn %d\n",
 7304			target_insn);
 7305		return -EFAULT;
 7306	}
 7307
 7308	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
 7309}
 7310
 7311static int set_map_elem_callback_state(struct bpf_verifier_env *env,
 7312				       struct bpf_func_state *caller,
 7313				       struct bpf_func_state *callee,
 7314				       int insn_idx)
 7315{
 7316	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
 7317	struct bpf_map *map;
 7318	int err;
 7319
 7320	if (bpf_map_ptr_poisoned(insn_aux)) {
 7321		verbose(env, "tail_call abusing map_ptr\n");
 7322		return -EINVAL;
 7323	}
 7324
 7325	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
 7326	if (!map->ops->map_set_for_each_callback_args ||
 7327	    !map->ops->map_for_each_callback) {
 7328		verbose(env, "callback function not allowed for map\n");
 7329		return -ENOTSUPP;
 7330	}
 7331
 7332	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
 7333	if (err)
 7334		return err;
 7335
 7336	callee->in_callback_fn = true;
 7337	callee->callback_ret_range = tnum_range(0, 1);
 7338	return 0;
 7339}
 7340
 7341static int set_loop_callback_state(struct bpf_verifier_env *env,
 7342				   struct bpf_func_state *caller,
 7343				   struct bpf_func_state *callee,
 7344				   int insn_idx)
 7345{
 7346	/* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
 7347	 *	    u64 flags);
 7348	 * callback_fn(u32 index, void *callback_ctx);
 7349	 */
 7350	callee->regs[BPF_REG_1].type = SCALAR_VALUE;
 7351	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
 7352
 7353	/* unused */
 7354	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
 7355	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
 7356	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 7357
 7358	callee->in_callback_fn = true;
 7359	callee->callback_ret_range = tnum_range(0, 1);
 7360	return 0;
 7361}
 7362
 7363static int set_timer_callback_state(struct bpf_verifier_env *env,
 7364				    struct bpf_func_state *caller,
 7365				    struct bpf_func_state *callee,
 7366				    int insn_idx)
 7367{
 7368	struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
 7369
 7370	/* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
 7371	 * callback_fn(struct bpf_map *map, void *key, void *value);
 7372	 */
 7373	callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
 7374	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
 7375	callee->regs[BPF_REG_1].map_ptr = map_ptr;
 7376
 7377	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
 7378	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
 7379	callee->regs[BPF_REG_2].map_ptr = map_ptr;
 7380
 7381	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
 7382	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
 7383	callee->regs[BPF_REG_3].map_ptr = map_ptr;
 7384
 7385	/* unused */
 7386	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
 7387	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 7388	callee->in_async_callback_fn = true;
 7389	callee->callback_ret_range = tnum_range(0, 1);
 7390	return 0;
 7391}
 7392
 7393static int set_find_vma_callback_state(struct bpf_verifier_env *env,
 7394				       struct bpf_func_state *caller,
 7395				       struct bpf_func_state *callee,
 7396				       int insn_idx)
 7397{
 7398	/* bpf_find_vma(struct task_struct *task, u64 addr,
 7399	 *               void *callback_fn, void *callback_ctx, u64 flags)
 7400	 * (callback_fn)(struct task_struct *task,
 7401	 *               struct vm_area_struct *vma, void *callback_ctx);
 7402	 */
 7403	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
 7404
 7405	callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
 7406	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
 7407	callee->regs[BPF_REG_2].btf =  btf_vmlinux;
 7408	callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
 7409
 7410	/* pointer to stack or null */
 7411	callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
 7412
 7413	/* unused */
 7414	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
 7415	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 7416	callee->in_callback_fn = true;
 7417	callee->callback_ret_range = tnum_range(0, 1);
 7418	return 0;
 7419}
 7420
 7421static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
 7422					   struct bpf_func_state *caller,
 7423					   struct bpf_func_state *callee,
 7424					   int insn_idx)
 7425{
 7426	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
 7427	 *			  callback_ctx, u64 flags);
 7428	 * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
 7429	 */
 7430	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
 7431	mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
 7432	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
 7433
 7434	/* unused */
 7435	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
 7436	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
 7437	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 7438
 7439	callee->in_callback_fn = true;
 7440	callee->callback_ret_range = tnum_range(0, 1);
 7441	return 0;
 7442}
 7443
 7444static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 7445{
 7446	struct bpf_verifier_state *state = env->cur_state;
 7447	struct bpf_func_state *caller, *callee;
 7448	struct bpf_reg_state *r0;
 7449	int err;
 7450
 7451	callee = state->frame[state->curframe];
 7452	r0 = &callee->regs[BPF_REG_0];
 7453	if (r0->type == PTR_TO_STACK) {
 7454		/* technically it's ok to return caller's stack pointer
 7455		 * (or caller's caller's pointer) back to the caller,
 7456		 * since these pointers are valid. Only current stack
 7457		 * pointer will be invalid as soon as function exits,
 7458		 * but let's be conservative
 7459		 */
 7460		verbose(env, "cannot return stack pointer to the caller\n");
 7461		return -EINVAL;
 7462	}
 7463
 7464	caller = state->frame[state->curframe - 1];
 7465	if (callee->in_callback_fn) {
 7466		/* enforce R0 return value range [0, 1]. */
 7467		struct tnum range = callee->callback_ret_range;
 7468
 7469		if (r0->type != SCALAR_VALUE) {
 7470			verbose(env, "R0 not a scalar value\n");
 7471			return -EACCES;
 7472		}
 7473		if (!tnum_in(range, r0->var_off)) {
 7474			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
 7475			return -EINVAL;
 7476		}
 7477	} else {
 7478		/* return to the caller whatever r0 had in the callee */
 7479		caller->regs[BPF_REG_0] = *r0;
 7480	}
 7481
 7482	/* callback_fn frame should have released its own additions to parent's
 7483	 * reference state at this point, or check_reference_leak would
 7484	 * complain, hence it must be the same as the caller. There is no need
 7485	 * to copy it back.
 7486	 */
 7487	if (!callee->in_callback_fn) {
 7488		/* Transfer references to the caller */
 7489		err = copy_reference_state(caller, callee);
 7490		if (err)
 7491			return err;
 7492	}
 7493
 7494	*insn_idx = callee->callsite + 1;
 7495	if (env->log.level & BPF_LOG_LEVEL) {
 7496		verbose(env, "returning from callee:\n");
 7497		print_verifier_state(env, callee, true);
 7498		verbose(env, "to caller at %d:\n", *insn_idx);
 7499		print_verifier_state(env, caller, true);
 7500	}
 7501	/* clear everything in the callee */
 7502	free_func_state(callee);
 7503	state->frame[state->curframe--] = NULL;
 7504	return 0;
 7505}
 7506
 7507static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
 7508				   int func_id,
 7509				   struct bpf_call_arg_meta *meta)
 7510{
 7511	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
 7512
 7513	if (ret_type != RET_INTEGER ||
 7514	    (func_id != BPF_FUNC_get_stack &&
 7515	     func_id != BPF_FUNC_get_task_stack &&
 7516	     func_id != BPF_FUNC_probe_read_str &&
 7517	     func_id != BPF_FUNC_probe_read_kernel_str &&
 7518	     func_id != BPF_FUNC_probe_read_user_str))
 7519		return;
 7520
 7521	ret_reg->smax_value = meta->msize_max_value;
 7522	ret_reg->s32_max_value = meta->msize_max_value;
 7523	ret_reg->smin_value = -MAX_ERRNO;
 7524	ret_reg->s32_min_value = -MAX_ERRNO;
 7525	reg_bounds_sync(ret_reg);
 7526}
 7527
 7528static int
 7529record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 7530		int func_id, int insn_idx)
 7531{
 7532	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 7533	struct bpf_map *map = meta->map_ptr;
 7534
 7535	if (func_id != BPF_FUNC_tail_call &&
 7536	    func_id != BPF_FUNC_map_lookup_elem &&
 7537	    func_id != BPF_FUNC_map_update_elem &&
 7538	    func_id != BPF_FUNC_map_delete_elem &&
 7539	    func_id != BPF_FUNC_map_push_elem &&
 7540	    func_id != BPF_FUNC_map_pop_elem &&
 7541	    func_id != BPF_FUNC_map_peek_elem &&
 7542	    func_id != BPF_FUNC_for_each_map_elem &&
 7543	    func_id != BPF_FUNC_redirect_map &&
 7544	    func_id != BPF_FUNC_map_lookup_percpu_elem)
 7545		return 0;
 7546
 7547	if (map == NULL) {
 7548		verbose(env, "kernel subsystem misconfigured verifier\n");
 7549		return -EINVAL;
 7550	}
 7551
 7552	/* In case of read-only, some additional restrictions
 7553	 * need to be applied in order to prevent altering the
 7554	 * state of the map from program side.
 7555	 */
 7556	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
 7557	    (func_id == BPF_FUNC_map_delete_elem ||
 7558	     func_id == BPF_FUNC_map_update_elem ||
 7559	     func_id == BPF_FUNC_map_push_elem ||
 7560	     func_id == BPF_FUNC_map_pop_elem)) {
 7561		verbose(env, "write into map forbidden\n");
 7562		return -EACCES;
 7563	}
 7564
 7565	if (!BPF_MAP_PTR(aux->map_ptr_state))
 7566		bpf_map_ptr_store(aux, meta->map_ptr,
 7567				  !meta->map_ptr->bypass_spec_v1);
 7568	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
 7569		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
 7570				  !meta->map_ptr->bypass_spec_v1);
 7571	return 0;
 7572}
 7573
 7574static int
 7575record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 7576		int func_id, int insn_idx)
 7577{
 7578	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 7579	struct bpf_reg_state *regs = cur_regs(env), *reg;
 7580	struct bpf_map *map = meta->map_ptr;
 7581	u64 val, max;
 7582	int err;
 7583
 7584	if (func_id != BPF_FUNC_tail_call)
 7585		return 0;
 7586	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
 7587		verbose(env, "kernel subsystem misconfigured verifier\n");
 7588		return -EINVAL;
 7589	}
 7590
 7591	reg = &regs[BPF_REG_3];
 7592	val = reg->var_off.value;
 7593	max = map->max_entries;
 7594
 7595	if (!(register_is_const(reg) && val < max)) {
 7596		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
 7597		return 0;
 7598	}
 7599
 7600	err = mark_chain_precision(env, BPF_REG_3);
 7601	if (err)
 7602		return err;
 7603	if (bpf_map_key_unseen(aux))
 7604		bpf_map_key_store(aux, val);
 7605	else if (!bpf_map_key_poisoned(aux) &&
 7606		  bpf_map_key_immediate(aux) != val)
 7607		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
 7608	return 0;
 7609}
 7610
 7611static int check_reference_leak(struct bpf_verifier_env *env)
 7612{
 7613	struct bpf_func_state *state = cur_func(env);
 7614	bool refs_lingering = false;
 7615	int i;
 7616
 7617	if (state->frameno && !state->in_callback_fn)
 7618		return 0;
 7619
 7620	for (i = 0; i < state->acquired_refs; i++) {
 7621		if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
 7622			continue;
 7623		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
 7624			state->refs[i].id, state->refs[i].insn_idx);
 7625		refs_lingering = true;
 7626	}
 7627	return refs_lingering ? -EINVAL : 0;
 7628}
 7629
 7630static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 7631				   struct bpf_reg_state *regs)
 7632{
 7633	struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
 7634	struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
 7635	struct bpf_map *fmt_map = fmt_reg->map_ptr;
 7636	int err, fmt_map_off, num_args;
 7637	u64 fmt_addr;
 7638	char *fmt;
 7639
 7640	/* data must be an array of u64 */
 7641	if (data_len_reg->var_off.value % 8)
 7642		return -EINVAL;
 7643	num_args = data_len_reg->var_off.value / 8;
 7644
 7645	/* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
 7646	 * and map_direct_value_addr is set.
 7647	 */
 7648	fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
 7649	err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
 7650						  fmt_map_off);
 7651	if (err) {
 7652		verbose(env, "verifier bug\n");
 7653		return -EFAULT;
 7654	}
 7655	fmt = (char *)(long)fmt_addr + fmt_map_off;
 7656
 7657	/* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
 7658	 * can focus on validating the format specifiers.
 7659	 */
 7660	err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, NULL, num_args);
 7661	if (err < 0)
 7662		verbose(env, "Invalid format string\n");
 7663
 7664	return err;
 7665}
 7666
 7667static int check_get_func_ip(struct bpf_verifier_env *env)
 7668{
 7669	enum bpf_prog_type type = resolve_prog_type(env->prog);
 7670	int func_id = BPF_FUNC_get_func_ip;
 7671
 7672	if (type == BPF_PROG_TYPE_TRACING) {
 7673		if (!bpf_prog_has_trampoline(env->prog)) {
 7674			verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
 7675				func_id_name(func_id), func_id);
 7676			return -ENOTSUPP;
 7677		}
 7678		return 0;
 7679	} else if (type == BPF_PROG_TYPE_KPROBE) {
 7680		return 0;
 7681	}
 7682
 7683	verbose(env, "func %s#%d not supported for program type %d\n",
 7684		func_id_name(func_id), func_id, type);
 7685	return -ENOTSUPP;
 7686}
 7687
 7688static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
 7689{
 7690	return &env->insn_aux_data[env->insn_idx];
 7691}
 7692
 7693static bool loop_flag_is_zero(struct bpf_verifier_env *env)
 7694{
 7695	struct bpf_reg_state *regs = cur_regs(env);
 7696	struct bpf_reg_state *reg = &regs[BPF_REG_4];
 7697	bool reg_is_null = register_is_null(reg);
 7698
 7699	if (reg_is_null)
 7700		mark_chain_precision(env, BPF_REG_4);
 7701
 7702	return reg_is_null;
 7703}
 7704
 7705static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
 7706{
 7707	struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
 7708
 7709	if (!state->initialized) {
 7710		state->initialized = 1;
 7711		state->fit_for_inline = loop_flag_is_zero(env);
 7712		state->callback_subprogno = subprogno;
 7713		return;
 7714	}
 7715
 7716	if (!state->fit_for_inline)
 7717		return;
 7718
 7719	state->fit_for_inline = (loop_flag_is_zero(env) &&
 7720				 state->callback_subprogno == subprogno);
 7721}
 7722
 7723static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 7724			     int *insn_idx_p)
 7725{
 7726	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
 7727	const struct bpf_func_proto *fn = NULL;
 7728	enum bpf_return_type ret_type;
 7729	enum bpf_type_flag ret_flag;
 7730	struct bpf_reg_state *regs;
 7731	struct bpf_call_arg_meta meta;
 7732	int insn_idx = *insn_idx_p;
 7733	bool changes_data;
 7734	int i, err, func_id;
 7735
 7736	/* find function prototype */
 7737	func_id = insn->imm;
 7738	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
 7739		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
 7740			func_id);
 7741		return -EINVAL;
 7742	}
 7743
 7744	if (env->ops->get_func_proto)
 7745		fn = env->ops->get_func_proto(func_id, env->prog);
 7746	if (!fn) {
 7747		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
 7748			func_id);
 7749		return -EINVAL;
 7750	}
 7751
 7752	/* eBPF programs must be GPL compatible to use GPL-ed functions */
 7753	if (!env->prog->gpl_compatible && fn->gpl_only) {
 7754		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
 7755		return -EINVAL;
 7756	}
 7757
 7758	if (fn->allowed && !fn->allowed(env->prog)) {
 7759		verbose(env, "helper call is not allowed in probe\n");
 7760		return -EINVAL;
 7761	}
 7762
 7763	if (!env->prog->aux->sleepable && fn->might_sleep) {
 7764		verbose(env, "helper call might sleep in a non-sleepable prog\n");
 7765		return -EINVAL;
 7766	}
 7767
 7768	/* With LD_ABS/IND some JITs save/restore skb from r1. */
 7769	changes_data = bpf_helper_changes_pkt_data(fn->func);
 7770	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
 7771		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
 7772			func_id_name(func_id), func_id);
 7773		return -EINVAL;
 7774	}
 7775
 7776	memset(&meta, 0, sizeof(meta));
 7777	meta.pkt_access = fn->pkt_access;
 7778
 7779	err = check_func_proto(fn, func_id);
 7780	if (err) {
 7781		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
 7782			func_id_name(func_id), func_id);
 7783		return err;
 7784	}
 7785
 7786	if (env->cur_state->active_rcu_lock) {
 7787		if (fn->might_sleep) {
 7788			verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
 7789				func_id_name(func_id), func_id);
 7790			return -EINVAL;
 7791		}
 7792
 7793		if (env->prog->aux->sleepable && is_storage_get_function(func_id))
 7794			env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
 7795	}
 7796
 7797	meta.func_id = func_id;
 7798	/* check args */
 7799	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 7800		err = check_func_arg(env, i, &meta, fn);
 7801		if (err)
 7802			return err;
 7803	}
 7804
 7805	err = record_func_map(env, &meta, func_id, insn_idx);
 7806	if (err)
 7807		return err;
 7808
 7809	err = record_func_key(env, &meta, func_id, insn_idx);
 7810	if (err)
 7811		return err;
 7812
 7813	/* Mark slots with STACK_MISC in case of raw mode, stack offset
 7814	 * is inferred from register state.
 7815	 */
 7816	for (i = 0; i < meta.access_size; i++) {
 7817		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
 7818				       BPF_WRITE, -1, false);
 7819		if (err)
 7820			return err;
 7821	}
 7822
 7823	regs = cur_regs(env);
 7824
 7825	/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
 7826	 * be reinitialized by any dynptr helper. Hence, mark_stack_slots_dynptr
 7827	 * is safe to do directly.
 7828	 */
 7829	if (meta.uninit_dynptr_regno) {
 7830		if (regs[meta.uninit_dynptr_regno].type == CONST_PTR_TO_DYNPTR) {
 7831			verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be initialized\n");
 7832			return -EFAULT;
 7833		}
 7834		/* we write BPF_DW bits (8 bytes) at a time */
 7835		for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
 7836			err = check_mem_access(env, insn_idx, meta.uninit_dynptr_regno,
 7837					       i, BPF_DW, BPF_WRITE, -1, false);
 7838			if (err)
 7839				return err;
 7840		}
 7841
 7842		err = mark_stack_slots_dynptr(env, &regs[meta.uninit_dynptr_regno],
 7843					      fn->arg_type[meta.uninit_dynptr_regno - BPF_REG_1],
 7844					      insn_idx);
 7845		if (err)
 7846			return err;
 7847	}
 7848
 7849	if (meta.release_regno) {
 7850		err = -EINVAL;
 7851		/* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
 7852		 * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
 7853		 * is safe to do directly.
 7854		 */
 7855		if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
 7856			if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
 7857				verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
 7858				return -EFAULT;
 7859			}
 7860			err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
 7861		} else if (meta.ref_obj_id) {
 7862			err = release_reference(env, meta.ref_obj_id);
 7863		} else if (register_is_null(&regs[meta.release_regno])) {
 7864			/* meta.ref_obj_id can only be 0 if register that is meant to be
 7865			 * released is NULL, which must be > R0.
 7866			 */
 7867			err = 0;
 7868		}
 7869		if (err) {
 7870			verbose(env, "func %s#%d reference has not been acquired before\n",
 7871				func_id_name(func_id), func_id);
 7872			return err;
 7873		}
 7874	}
 7875
 7876	switch (func_id) {
 7877	case BPF_FUNC_tail_call:
 7878		err = check_reference_leak(env);
 7879		if (err) {
 7880			verbose(env, "tail_call would lead to reference leak\n");
 7881			return err;
 7882		}
 7883		break;
 7884	case BPF_FUNC_get_local_storage:
 7885		/* check that flags argument in get_local_storage(map, flags) is 0,
 7886		 * this is required because get_local_storage() can't return an error.
 7887		 */
 7888		if (!register_is_null(&regs[BPF_REG_2])) {
 7889			verbose(env, "get_local_storage() doesn't support non-zero flags\n");
 7890			return -EINVAL;
 7891		}
 7892		break;
 7893	case BPF_FUNC_for_each_map_elem:
 7894		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 7895					set_map_elem_callback_state);
 7896		break;
 7897	case BPF_FUNC_timer_set_callback:
 7898		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 7899					set_timer_callback_state);
 7900		break;
 7901	case BPF_FUNC_find_vma:
 7902		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 7903					set_find_vma_callback_state);
 7904		break;
 7905	case BPF_FUNC_snprintf:
 7906		err = check_bpf_snprintf_call(env, regs);
 7907		break;
 7908	case BPF_FUNC_loop:
 7909		update_loop_inline_state(env, meta.subprogno);
 7910		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 7911					set_loop_callback_state);
 7912		break;
 7913	case BPF_FUNC_dynptr_from_mem:
 7914		if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
 7915			verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
 7916				reg_type_str(env, regs[BPF_REG_1].type));
 7917			return -EACCES;
 7918		}
 7919		break;
 7920	case BPF_FUNC_set_retval:
 7921		if (prog_type == BPF_PROG_TYPE_LSM &&
 7922		    env->prog->expected_attach_type == BPF_LSM_CGROUP) {
 7923			if (!env->prog->aux->attach_func_proto->type) {
 7924				/* Make sure programs that attach to void
 7925				 * hooks don't try to modify return value.
 7926				 */
 7927				verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
 7928				return -EINVAL;
 7929			}
 7930		}
 7931		break;
 7932	case BPF_FUNC_dynptr_data:
 7933		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 7934			if (arg_type_is_dynptr(fn->arg_type[i])) {
 7935				struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
 7936
 7937				if (meta.ref_obj_id) {
 7938					verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
 7939					return -EFAULT;
 7940				}
 7941
 7942				meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
 7943				break;
 7944			}
 7945		}
 7946		if (i == MAX_BPF_FUNC_REG_ARGS) {
 7947			verbose(env, "verifier internal error: no dynptr in bpf_dynptr_data()\n");
 7948			return -EFAULT;
 7949		}
 7950		break;
 7951	case BPF_FUNC_user_ringbuf_drain:
 7952		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 7953					set_user_ringbuf_callback_state);
 7954		break;
 7955	}
 7956
 7957	if (err)
 7958		return err;
 7959
 7960	/* reset caller saved regs */
 7961	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 7962		mark_reg_not_init(env, regs, caller_saved[i]);
 7963		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 7964	}
 7965
 7966	/* helper call returns 64-bit value. */
 7967	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
 7968
 7969	/* update return register (already marked as written above) */
 7970	ret_type = fn->ret_type;
 7971	ret_flag = type_flag(ret_type);
 7972
 7973	switch (base_type(ret_type)) {
 7974	case RET_INTEGER:
 7975		/* sets type to SCALAR_VALUE */
 7976		mark_reg_unknown(env, regs, BPF_REG_0);
 7977		break;
 7978	case RET_VOID:
 7979		regs[BPF_REG_0].type = NOT_INIT;
 7980		break;
 7981	case RET_PTR_TO_MAP_VALUE:
 7982		/* There is no offset yet applied, variable or fixed */
 7983		mark_reg_known_zero(env, regs, BPF_REG_0);
 7984		/* remember map_ptr, so that check_map_access()
 7985		 * can check 'value_size' boundary of memory access
 7986		 * to map element returned from bpf_map_lookup_elem()
 7987		 */
 7988		if (meta.map_ptr == NULL) {
 7989			verbose(env,
 7990				"kernel subsystem misconfigured verifier\n");
 7991			return -EINVAL;
 7992		}
 7993		regs[BPF_REG_0].map_ptr = meta.map_ptr;
 7994		regs[BPF_REG_0].map_uid = meta.map_uid;
 7995		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
 7996		if (!type_may_be_null(ret_type) &&
 7997		    btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
 7998			regs[BPF_REG_0].id = ++env->id_gen;
 7999		}
 8000		break;
 8001	case RET_PTR_TO_SOCKET:
 8002		mark_reg_known_zero(env, regs, BPF_REG_0);
 8003		regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
 8004		break;
 8005	case RET_PTR_TO_SOCK_COMMON:
 8006		mark_reg_known_zero(env, regs, BPF_REG_0);
 8007		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
 8008		break;
 8009	case RET_PTR_TO_TCP_SOCK:
 8010		mark_reg_known_zero(env, regs, BPF_REG_0);
 8011		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
 8012		break;
 8013	case RET_PTR_TO_MEM:
 8014		mark_reg_known_zero(env, regs, BPF_REG_0);
 8015		regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
 8016		regs[BPF_REG_0].mem_size = meta.mem_size;
 8017		break;
 8018	case RET_PTR_TO_MEM_OR_BTF_ID:
 8019	{
 8020		const struct btf_type *t;
 8021
 8022		mark_reg_known_zero(env, regs, BPF_REG_0);
 8023		t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
 8024		if (!btf_type_is_struct(t)) {
 8025			u32 tsize;
 8026			const struct btf_type *ret;
 8027			const char *tname;
 8028
 8029			/* resolve the type size of ksym. */
 8030			ret = btf_resolve_size(meta.ret_btf, t, &tsize);
 8031			if (IS_ERR(ret)) {
 8032				tname = btf_name_by_offset(meta.ret_btf, t->name_off);
 8033				verbose(env, "unable to resolve the size of type '%s': %ld\n",
 8034					tname, PTR_ERR(ret));
 8035				return -EINVAL;
 8036			}
 8037			regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
 8038			regs[BPF_REG_0].mem_size = tsize;
 8039		} else {
 8040			/* MEM_RDONLY may be carried from ret_flag, but it
 8041			 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
 8042			 * it will confuse the check of PTR_TO_BTF_ID in
 8043			 * check_mem_access().
 8044			 */
 8045			ret_flag &= ~MEM_RDONLY;
 8046
 8047			regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
 8048			regs[BPF_REG_0].btf = meta.ret_btf;
 8049			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 8050		}
 8051		break;
 8052	}
 8053	case RET_PTR_TO_BTF_ID:
 8054	{
 8055		struct btf *ret_btf;
 8056		int ret_btf_id;
 8057
 8058		mark_reg_known_zero(env, regs, BPF_REG_0);
 8059		regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
 8060		if (func_id == BPF_FUNC_kptr_xchg) {
 8061			ret_btf = meta.kptr_field->kptr.btf;
 8062			ret_btf_id = meta.kptr_field->kptr.btf_id;
 8063		} else {
 8064			if (fn->ret_btf_id == BPF_PTR_POISON) {
 8065				verbose(env, "verifier internal error:");
 8066				verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
 8067					func_id_name(func_id));
 8068				return -EINVAL;
 8069			}
 8070			ret_btf = btf_vmlinux;
 8071			ret_btf_id = *fn->ret_btf_id;
 8072		}
 8073		if (ret_btf_id == 0) {
 8074			verbose(env, "invalid return type %u of func %s#%d\n",
 8075				base_type(ret_type), func_id_name(func_id),
 8076				func_id);
 8077			return -EINVAL;
 8078		}
 8079		regs[BPF_REG_0].btf = ret_btf;
 8080		regs[BPF_REG_0].btf_id = ret_btf_id;
 8081		break;
 8082	}
 8083	default:
 8084		verbose(env, "unknown return type %u of func %s#%d\n",
 8085			base_type(ret_type), func_id_name(func_id), func_id);
 8086		return -EINVAL;
 8087	}
 8088
 8089	if (type_may_be_null(regs[BPF_REG_0].type))
 8090		regs[BPF_REG_0].id = ++env->id_gen;
 8091
 8092	if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
 8093		verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
 8094			func_id_name(func_id), func_id);
 8095		return -EFAULT;
 8096	}
 8097
 8098	if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
 8099		/* For release_reference() */
 8100		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
 8101	} else if (is_acquire_function(func_id, meta.map_ptr)) {
 8102		int id = acquire_reference_state(env, insn_idx);
 8103
 8104		if (id < 0)
 8105			return id;
 8106		/* For mark_ptr_or_null_reg() */
 8107		regs[BPF_REG_0].id = id;
 8108		/* For release_reference() */
 8109		regs[BPF_REG_0].ref_obj_id = id;
 8110	}
 8111
 8112	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
 8113
 8114	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
 8115	if (err)
 8116		return err;
 8117
 8118	if ((func_id == BPF_FUNC_get_stack ||
 8119	     func_id == BPF_FUNC_get_task_stack) &&
 8120	    !env->prog->has_callchain_buf) {
 8121		const char *err_str;
 8122
 8123#ifdef CONFIG_PERF_EVENTS
 8124		err = get_callchain_buffers(sysctl_perf_event_max_stack);
 8125		err_str = "cannot get callchain buffer for func %s#%d\n";
 8126#else
 8127		err = -ENOTSUPP;
 8128		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
 8129#endif
 8130		if (err) {
 8131			verbose(env, err_str, func_id_name(func_id), func_id);
 8132			return err;
 8133		}
 8134
 8135		env->prog->has_callchain_buf = true;
 8136	}
 8137
 8138	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
 8139		env->prog->call_get_stack = true;
 8140
 8141	if (func_id == BPF_FUNC_get_func_ip) {
 8142		if (check_get_func_ip(env))
 8143			return -ENOTSUPP;
 8144		env->prog->call_get_func_ip = true;
 8145	}
 8146
 8147	if (changes_data)
 8148		clear_all_pkt_pointers(env);
 8149	return 0;
 8150}
 8151
 8152/* mark_btf_func_reg_size() is used when the reg size is determined by
 8153 * the BTF func_proto's return value size and argument.
 8154 */
 8155static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
 8156				   size_t reg_size)
 8157{
 8158	struct bpf_reg_state *reg = &cur_regs(env)[regno];
 8159
 8160	if (regno == BPF_REG_0) {
 8161		/* Function return value */
 8162		reg->live |= REG_LIVE_WRITTEN;
 8163		reg->subreg_def = reg_size == sizeof(u64) ?
 8164			DEF_NOT_SUBREG : env->insn_idx + 1;
 8165	} else {
 8166		/* Function argument */
 8167		if (reg_size == sizeof(u64)) {
 8168			mark_insn_zext(env, reg);
 8169			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 8170		} else {
 8171			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
 8172		}
 8173	}
 8174}
 8175
 8176struct bpf_kfunc_call_arg_meta {
 8177	/* In parameters */
 8178	struct btf *btf;
 8179	u32 func_id;
 8180	u32 kfunc_flags;
 8181	const struct btf_type *func_proto;
 8182	const char *func_name;
 8183	/* Out parameters */
 8184	u32 ref_obj_id;
 8185	u8 release_regno;
 8186	bool r0_rdonly;
 8187	u32 ret_btf_id;
 8188	u64 r0_size;
 8189	struct {
 8190		u64 value;
 8191		bool found;
 8192	} arg_constant;
 8193	struct {
 8194		struct btf *btf;
 8195		u32 btf_id;
 8196	} arg_obj_drop;
 8197	struct {
 8198		struct btf_field *field;
 8199	} arg_list_head;
 8200};
 8201
 8202static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
 8203{
 8204	return meta->kfunc_flags & KF_ACQUIRE;
 8205}
 8206
 8207static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
 8208{
 8209	return meta->kfunc_flags & KF_RET_NULL;
 8210}
 8211
 8212static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
 8213{
 8214	return meta->kfunc_flags & KF_RELEASE;
 8215}
 8216
 8217static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
 8218{
 8219	return meta->kfunc_flags & KF_TRUSTED_ARGS;
 8220}
 8221
 8222static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
 8223{
 8224	return meta->kfunc_flags & KF_SLEEPABLE;
 8225}
 8226
 8227static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
 8228{
 8229	return meta->kfunc_flags & KF_DESTRUCTIVE;
 8230}
 8231
 8232static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
 8233{
 8234	return meta->kfunc_flags & KF_RCU;
 8235}
 8236
 8237static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
 8238{
 8239	return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
 8240}
 8241
 8242static bool __kfunc_param_match_suffix(const struct btf *btf,
 8243				       const struct btf_param *arg,
 8244				       const char *suffix)
 8245{
 8246	int suffix_len = strlen(suffix), len;
 8247	const char *param_name;
 8248
 8249	/* In the future, this can be ported to use BTF tagging */
 8250	param_name = btf_name_by_offset(btf, arg->name_off);
 8251	if (str_is_empty(param_name))
 8252		return false;
 8253	len = strlen(param_name);
 8254	if (len < suffix_len)
 8255		return false;
 8256	param_name += len - suffix_len;
 8257	return !strncmp(param_name, suffix, suffix_len);
 8258}
 8259
 8260static bool is_kfunc_arg_mem_size(const struct btf *btf,
 8261				  const struct btf_param *arg,
 8262				  const struct bpf_reg_state *reg)
 8263{
 8264	const struct btf_type *t;
 8265
 8266	t = btf_type_skip_modifiers(btf, arg->type, NULL);
 8267	if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
 8268		return false;
 8269
 8270	return __kfunc_param_match_suffix(btf, arg, "__sz");
 8271}
 8272
 8273static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
 8274{
 8275	return __kfunc_param_match_suffix(btf, arg, "__k");
 8276}
 8277
 8278static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
 8279{
 8280	return __kfunc_param_match_suffix(btf, arg, "__ign");
 8281}
 8282
 8283static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
 8284{
 8285	return __kfunc_param_match_suffix(btf, arg, "__alloc");
 8286}
 8287
 8288static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 8289					  const struct btf_param *arg,
 8290					  const char *name)
 8291{
 8292	int len, target_len = strlen(name);
 8293	const char *param_name;
 8294
 8295	param_name = btf_name_by_offset(btf, arg->name_off);
 8296	if (str_is_empty(param_name))
 8297		return false;
 8298	len = strlen(param_name);
 8299	if (len != target_len)
 8300		return false;
 8301	if (strcmp(param_name, name))
 8302		return false;
 8303
 8304	return true;
 8305}
 8306
 8307enum {
 8308	KF_ARG_DYNPTR_ID,
 8309	KF_ARG_LIST_HEAD_ID,
 8310	KF_ARG_LIST_NODE_ID,
 8311};
 8312
 8313BTF_ID_LIST(kf_arg_btf_ids)
 8314BTF_ID(struct, bpf_dynptr_kern)
 8315BTF_ID(struct, bpf_list_head)
 8316BTF_ID(struct, bpf_list_node)
 8317
 8318static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
 8319				    const struct btf_param *arg, int type)
 8320{
 8321	const struct btf_type *t;
 8322	u32 res_id;
 8323
 8324	t = btf_type_skip_modifiers(btf, arg->type, NULL);
 8325	if (!t)
 8326		return false;
 8327	if (!btf_type_is_ptr(t))
 8328		return false;
 8329	t = btf_type_skip_modifiers(btf, t->type, &res_id);
 8330	if (!t)
 8331		return false;
 8332	return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
 8333}
 8334
 8335static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
 8336{
 8337	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
 8338}
 8339
 8340static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
 8341{
 8342	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
 8343}
 8344
 8345static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
 8346{
 8347	return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
 8348}
 8349
 8350/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
 8351static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
 8352					const struct btf *btf,
 8353					const struct btf_type *t, int rec)
 8354{
 8355	const struct btf_type *member_type;
 8356	const struct btf_member *member;
 8357	u32 i;
 8358
 8359	if (!btf_type_is_struct(t))
 8360		return false;
 8361
 8362	for_each_member(i, t, member) {
 8363		const struct btf_array *array;
 8364
 8365		member_type = btf_type_skip_modifiers(btf, member->type, NULL);
 8366		if (btf_type_is_struct(member_type)) {
 8367			if (rec >= 3) {
 8368				verbose(env, "max struct nesting depth exceeded\n");
 8369				return false;
 8370			}
 8371			if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
 8372				return false;
 8373			continue;
 8374		}
 8375		if (btf_type_is_array(member_type)) {
 8376			array = btf_array(member_type);
 8377			if (!array->nelems)
 8378				return false;
 8379			member_type = btf_type_skip_modifiers(btf, array->type, NULL);
 8380			if (!btf_type_is_scalar(member_type))
 8381				return false;
 8382			continue;
 8383		}
 8384		if (!btf_type_is_scalar(member_type))
 8385			return false;
 8386	}
 8387	return true;
 8388}
 8389
 8390
 8391static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
 8392#ifdef CONFIG_NET
 8393	[PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
 8394	[PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
 8395	[PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
 8396#endif
 8397};
 8398
 8399enum kfunc_ptr_arg_type {
 8400	KF_ARG_PTR_TO_CTX,
 8401	KF_ARG_PTR_TO_ALLOC_BTF_ID,  /* Allocated object */
 8402	KF_ARG_PTR_TO_KPTR,	     /* PTR_TO_KPTR but type specific */
 8403	KF_ARG_PTR_TO_DYNPTR,
 8404	KF_ARG_PTR_TO_LIST_HEAD,
 8405	KF_ARG_PTR_TO_LIST_NODE,
 8406	KF_ARG_PTR_TO_BTF_ID,	     /* Also covers reg2btf_ids conversions */
 8407	KF_ARG_PTR_TO_MEM,
 8408	KF_ARG_PTR_TO_MEM_SIZE,	     /* Size derived from next argument, skip it */
 8409};
 8410
 8411enum special_kfunc_type {
 8412	KF_bpf_obj_new_impl,
 8413	KF_bpf_obj_drop_impl,
 8414	KF_bpf_list_push_front,
 8415	KF_bpf_list_push_back,
 8416	KF_bpf_list_pop_front,
 8417	KF_bpf_list_pop_back,
 8418	KF_bpf_cast_to_kern_ctx,
 8419	KF_bpf_rdonly_cast,
 8420	KF_bpf_rcu_read_lock,
 8421	KF_bpf_rcu_read_unlock,
 8422};
 8423
 8424BTF_SET_START(special_kfunc_set)
 8425BTF_ID(func, bpf_obj_new_impl)
 8426BTF_ID(func, bpf_obj_drop_impl)
 8427BTF_ID(func, bpf_list_push_front)
 8428BTF_ID(func, bpf_list_push_back)
 8429BTF_ID(func, bpf_list_pop_front)
 8430BTF_ID(func, bpf_list_pop_back)
 8431BTF_ID(func, bpf_cast_to_kern_ctx)
 8432BTF_ID(func, bpf_rdonly_cast)
 8433BTF_SET_END(special_kfunc_set)
 8434
 8435BTF_ID_LIST(special_kfunc_list)
 8436BTF_ID(func, bpf_obj_new_impl)
 8437BTF_ID(func, bpf_obj_drop_impl)
 8438BTF_ID(func, bpf_list_push_front)
 8439BTF_ID(func, bpf_list_push_back)
 8440BTF_ID(func, bpf_list_pop_front)
 8441BTF_ID(func, bpf_list_pop_back)
 8442BTF_ID(func, bpf_cast_to_kern_ctx)
 8443BTF_ID(func, bpf_rdonly_cast)
 8444BTF_ID(func, bpf_rcu_read_lock)
 8445BTF_ID(func, bpf_rcu_read_unlock)
 8446
 8447static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
 8448{
 8449	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
 8450}
 8451
 8452static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
 8453{
 8454	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
 8455}
 8456
 8457static enum kfunc_ptr_arg_type
 8458get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 8459		       struct bpf_kfunc_call_arg_meta *meta,
 8460		       const struct btf_type *t, const struct btf_type *ref_t,
 8461		       const char *ref_tname, const struct btf_param *args,
 8462		       int argno, int nargs)
 8463{
 8464	u32 regno = argno + 1;
 8465	struct bpf_reg_state *regs = cur_regs(env);
 8466	struct bpf_reg_state *reg = &regs[regno];
 8467	bool arg_mem_size = false;
 8468
 8469	if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
 8470		return KF_ARG_PTR_TO_CTX;
 8471
 8472	/* In this function, we verify the kfunc's BTF as per the argument type,
 8473	 * leaving the rest of the verification with respect to the register
 8474	 * type to our caller. When a set of conditions hold in the BTF type of
 8475	 * arguments, we resolve it to a known kfunc_ptr_arg_type.
 8476	 */
 8477	if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
 8478		return KF_ARG_PTR_TO_CTX;
 8479
 8480	if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
 8481		return KF_ARG_PTR_TO_ALLOC_BTF_ID;
 8482
 8483	if (is_kfunc_arg_kptr_get(meta, argno)) {
 8484		if (!btf_type_is_ptr(ref_t)) {
 8485			verbose(env, "arg#0 BTF type must be a double pointer for kptr_get kfunc\n");
 8486			return -EINVAL;
 8487		}
 8488		ref_t = btf_type_by_id(meta->btf, ref_t->type);
 8489		ref_tname = btf_name_by_offset(meta->btf, ref_t->name_off);
 8490		if (!btf_type_is_struct(ref_t)) {
 8491			verbose(env, "kernel function %s args#0 pointer type %s %s is not supported\n",
 8492				meta->func_name, btf_type_str(ref_t), ref_tname);
 8493			return -EINVAL;
 8494		}
 8495		return KF_ARG_PTR_TO_KPTR;
 8496	}
 8497
 8498	if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
 8499		return KF_ARG_PTR_TO_DYNPTR;
 8500
 8501	if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
 8502		return KF_ARG_PTR_TO_LIST_HEAD;
 8503
 8504	if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
 8505		return KF_ARG_PTR_TO_LIST_NODE;
 8506
 8507	if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
 8508		if (!btf_type_is_struct(ref_t)) {
 8509			verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
 8510				meta->func_name, argno, btf_type_str(ref_t), ref_tname);
 8511			return -EINVAL;
 8512		}
 8513		return KF_ARG_PTR_TO_BTF_ID;
 8514	}
 8515
 8516	if (argno + 1 < nargs && is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]))
 8517		arg_mem_size = true;
 8518
 8519	/* This is the catch all argument type of register types supported by
 8520	 * check_helper_mem_access. However, we only allow when argument type is
 8521	 * pointer to scalar, or struct composed (recursively) of scalars. When
 8522	 * arg_mem_size is true, the pointer can be void *.
 8523	 */
 8524	if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
 8525	    (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
 8526		verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
 8527			argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
 8528		return -EINVAL;
 8529	}
 8530	return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
 8531}
 8532
 8533static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
 8534					struct bpf_reg_state *reg,
 8535					const struct btf_type *ref_t,
 8536					const char *ref_tname, u32 ref_id,
 8537					struct bpf_kfunc_call_arg_meta *meta,
 8538					int argno)
 8539{
 8540	const struct btf_type *reg_ref_t;
 8541	bool strict_type_match = false;
 8542	const struct btf *reg_btf;
 8543	const char *reg_ref_tname;
 8544	u32 reg_ref_id;
 8545
 8546	if (base_type(reg->type) == PTR_TO_BTF_ID) {
 8547		reg_btf = reg->btf;
 8548		reg_ref_id = reg->btf_id;
 8549	} else {
 8550		reg_btf = btf_vmlinux;
 8551		reg_ref_id = *reg2btf_ids[base_type(reg->type)];
 8552	}
 8553
 8554	if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id))
 8555		strict_type_match = true;
 8556
 8557	reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
 8558	reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
 8559	if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
 8560		verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
 8561			meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
 8562			btf_type_str(reg_ref_t), reg_ref_tname);
 8563		return -EINVAL;
 8564	}
 8565	return 0;
 8566}
 8567
 8568static int process_kf_arg_ptr_to_kptr(struct bpf_verifier_env *env,
 8569				      struct bpf_reg_state *reg,
 8570				      const struct btf_type *ref_t,
 8571				      const char *ref_tname,
 8572				      struct bpf_kfunc_call_arg_meta *meta,
 8573				      int argno)
 8574{
 8575	struct btf_field *kptr_field;
 8576
 8577	/* check_func_arg_reg_off allows var_off for
 8578	 * PTR_TO_MAP_VALUE, but we need fixed offset to find
 8579	 * off_desc.
 8580	 */
 8581	if (!tnum_is_const(reg->var_off)) {
 8582		verbose(env, "arg#0 must have constant offset\n");
 8583		return -EINVAL;
 8584	}
 8585
 8586	kptr_field = btf_record_find(reg->map_ptr->record, reg->off + reg->var_off.value, BPF_KPTR);
 8587	if (!kptr_field || kptr_field->type != BPF_KPTR_REF) {
 8588		verbose(env, "arg#0 no referenced kptr at map value offset=%llu\n",
 8589			reg->off + reg->var_off.value);
 8590		return -EINVAL;
 8591	}
 8592
 8593	if (!btf_struct_ids_match(&env->log, meta->btf, ref_t->type, 0, kptr_field->kptr.btf,
 8594				  kptr_field->kptr.btf_id, true)) {
 8595		verbose(env, "kernel function %s args#%d expected pointer to %s %s\n",
 8596			meta->func_name, argno, btf_type_str(ref_t), ref_tname);
 8597		return -EINVAL;
 8598	}
 8599	return 0;
 8600}
 8601
 8602static int ref_set_release_on_unlock(struct bpf_verifier_env *env, u32 ref_obj_id)
 8603{
 8604	struct bpf_func_state *state = cur_func(env);
 8605	struct bpf_reg_state *reg;
 8606	int i;
 8607
 8608	/* bpf_spin_lock only allows calling list_push and list_pop, no BPF
 8609	 * subprogs, no global functions. This means that the references would
 8610	 * not be released inside the critical section but they may be added to
 8611	 * the reference state, and the acquired_refs are never copied out for a
 8612	 * different frame as BPF to BPF calls don't work in bpf_spin_lock
 8613	 * critical sections.
 8614	 */
 8615	if (!ref_obj_id) {
 8616		verbose(env, "verifier internal error: ref_obj_id is zero for release_on_unlock\n");
 8617		return -EFAULT;
 8618	}
 8619	for (i = 0; i < state->acquired_refs; i++) {
 8620		if (state->refs[i].id == ref_obj_id) {
 8621			if (state->refs[i].release_on_unlock) {
 8622				verbose(env, "verifier internal error: expected false release_on_unlock");
 8623				return -EFAULT;
 8624			}
 8625			state->refs[i].release_on_unlock = true;
 8626			/* Now mark everyone sharing same ref_obj_id as untrusted */
 8627			bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
 8628				if (reg->ref_obj_id == ref_obj_id)
 8629					reg->type |= PTR_UNTRUSTED;
 8630			}));
 8631			return 0;
 8632		}
 8633	}
 8634	verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
 8635	return -EFAULT;
 8636}
 8637
 8638/* Implementation details:
 8639 *
 8640 * Each register points to some region of memory, which we define as an
 8641 * allocation. Each allocation may embed a bpf_spin_lock which protects any
 8642 * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
 8643 * allocation. The lock and the data it protects are colocated in the same
 8644 * memory region.
 8645 *
 8646 * Hence, everytime a register holds a pointer value pointing to such
 8647 * allocation, the verifier preserves a unique reg->id for it.
 8648 *
 8649 * The verifier remembers the lock 'ptr' and the lock 'id' whenever
 8650 * bpf_spin_lock is called.
 8651 *
 8652 * To enable this, lock state in the verifier captures two values:
 8653 *	active_lock.ptr = Register's type specific pointer
 8654 *	active_lock.id  = A unique ID for each register pointer value
 8655 *
 8656 * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
 8657 * supported register types.
 8658 *
 8659 * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
 8660 * allocated objects is the reg->btf pointer.
 8661 *
 8662 * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
 8663 * can establish the provenance of the map value statically for each distinct
 8664 * lookup into such maps. They always contain a single map value hence unique
 8665 * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
 8666 *
 8667 * So, in case of global variables, they use array maps with max_entries = 1,
 8668 * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
 8669 * into the same map value as max_entries is 1, as described above).
 8670 *
 8671 * In case of inner map lookups, the inner map pointer has same map_ptr as the
 8672 * outer map pointer (in verifier context), but each lookup into an inner map
 8673 * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
 8674 * maps from the same outer map share the same map_ptr as active_lock.ptr, they
 8675 * will get different reg->id assigned to each lookup, hence different
 8676 * active_lock.id.
 8677 *
 8678 * In case of allocated objects, active_lock.ptr is the reg->btf, and the
 8679 * reg->id is a unique ID preserved after the NULL pointer check on the pointer
 8680 * returned from bpf_obj_new. Each allocation receives a new reg->id.
 8681 */
 8682static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 8683{
 8684	void *ptr;
 8685	u32 id;
 8686
 8687	switch ((int)reg->type) {
 8688	case PTR_TO_MAP_VALUE:
 8689		ptr = reg->map_ptr;
 8690		break;
 8691	case PTR_TO_BTF_ID | MEM_ALLOC:
 8692	case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
 8693		ptr = reg->btf;
 8694		break;
 8695	default:
 8696		verbose(env, "verifier internal error: unknown reg type for lock check\n");
 8697		return -EFAULT;
 8698	}
 8699	id = reg->id;
 8700
 8701	if (!env->cur_state->active_lock.ptr)
 8702		return -EINVAL;
 8703	if (env->cur_state->active_lock.ptr != ptr ||
 8704	    env->cur_state->active_lock.id != id) {
 8705		verbose(env, "held lock and object are not in the same allocation\n");
 8706		return -EINVAL;
 8707	}
 8708	return 0;
 8709}
 8710
 8711static bool is_bpf_list_api_kfunc(u32 btf_id)
 8712{
 8713	return btf_id == special_kfunc_list[KF_bpf_list_push_front] ||
 8714	       btf_id == special_kfunc_list[KF_bpf_list_push_back] ||
 8715	       btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
 8716	       btf_id == special_kfunc_list[KF_bpf_list_pop_back];
 8717}
 8718
 8719static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
 8720					   struct bpf_reg_state *reg, u32 regno,
 8721					   struct bpf_kfunc_call_arg_meta *meta)
 8722{
 8723	struct btf_field *field;
 8724	struct btf_record *rec;
 8725	u32 list_head_off;
 8726
 8727	if (meta->btf != btf_vmlinux || !is_bpf_list_api_kfunc(meta->func_id)) {
 8728		verbose(env, "verifier internal error: bpf_list_head argument for unknown kfunc\n");
 8729		return -EFAULT;
 8730	}
 8731
 8732	if (!tnum_is_const(reg->var_off)) {
 8733		verbose(env,
 8734			"R%d doesn't have constant offset. bpf_list_head has to be at the constant offset\n",
 8735			regno);
 8736		return -EINVAL;
 8737	}
 8738
 8739	rec = reg_btf_record(reg);
 8740	list_head_off = reg->off + reg->var_off.value;
 8741	field = btf_record_find(rec, list_head_off, BPF_LIST_HEAD);
 8742	if (!field) {
 8743		verbose(env, "bpf_list_head not found at offset=%u\n", list_head_off);
 8744		return -EINVAL;
 8745	}
 8746
 8747	/* All functions require bpf_list_head to be protected using a bpf_spin_lock */
 8748	if (check_reg_allocation_locked(env, reg)) {
 8749		verbose(env, "bpf_spin_lock at off=%d must be held for bpf_list_head\n",
 8750			rec->spin_lock_off);
 8751		return -EINVAL;
 8752	}
 8753
 8754	if (meta->arg_list_head.field) {
 8755		verbose(env, "verifier internal error: repeating bpf_list_head arg\n");
 8756		return -EFAULT;
 8757	}
 8758	meta->arg_list_head.field = field;
 8759	return 0;
 8760}
 8761
 8762static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
 8763					   struct bpf_reg_state *reg, u32 regno,
 8764					   struct bpf_kfunc_call_arg_meta *meta)
 8765{
 8766	const struct btf_type *et, *t;
 8767	struct btf_field *field;
 8768	struct btf_record *rec;
 8769	u32 list_node_off;
 8770
 8771	if (meta->btf != btf_vmlinux ||
 8772	    (meta->func_id != special_kfunc_list[KF_bpf_list_push_front] &&
 8773	     meta->func_id != special_kfunc_list[KF_bpf_list_push_back])) {
 8774		verbose(env, "verifier internal error: bpf_list_node argument for unknown kfunc\n");
 8775		return -EFAULT;
 8776	}
 8777
 8778	if (!tnum_is_const(reg->var_off)) {
 8779		verbose(env,
 8780			"R%d doesn't have constant offset. bpf_list_node has to be at the constant offset\n",
 8781			regno);
 8782		return -EINVAL;
 8783	}
 8784
 8785	rec = reg_btf_record(reg);
 8786	list_node_off = reg->off + reg->var_off.value;
 8787	field = btf_record_find(rec, list_node_off, BPF_LIST_NODE);
 8788	if (!field || field->offset != list_node_off) {
 8789		verbose(env, "bpf_list_node not found at offset=%u\n", list_node_off);
 8790		return -EINVAL;
 8791	}
 8792
 8793	field = meta->arg_list_head.field;
 8794
 8795	et = btf_type_by_id(field->list_head.btf, field->list_head.value_btf_id);
 8796	t = btf_type_by_id(reg->btf, reg->btf_id);
 8797	if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->list_head.btf,
 8798				  field->list_head.value_btf_id, true)) {
 8799		verbose(env, "operation on bpf_list_head expects arg#1 bpf_list_node at offset=%d "
 8800			"in struct %s, but arg is at offset=%d in struct %s\n",
 8801			field->list_head.node_offset, btf_name_by_offset(field->list_head.btf, et->name_off),
 8802			list_node_off, btf_name_by_offset(reg->btf, t->name_off));
 8803		return -EINVAL;
 8804	}
 8805
 8806	if (list_node_off != field->list_head.node_offset) {
 8807		verbose(env, "arg#1 offset=%d, but expected bpf_list_node at offset=%d in struct %s\n",
 8808			list_node_off, field->list_head.node_offset,
 8809			btf_name_by_offset(field->list_head.btf, et->name_off));
 8810		return -EINVAL;
 8811	}
 8812	/* Set arg#1 for expiration after unlock */
 8813	return ref_set_release_on_unlock(env, reg->ref_obj_id);
 8814}
 8815
 8816static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta)
 8817{
 8818	const char *func_name = meta->func_name, *ref_tname;
 8819	const struct btf *btf = meta->btf;
 8820	const struct btf_param *args;
 8821	u32 i, nargs;
 8822	int ret;
 8823
 8824	args = (const struct btf_param *)(meta->func_proto + 1);
 8825	nargs = btf_type_vlen(meta->func_proto);
 8826	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
 8827		verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
 8828			MAX_BPF_FUNC_REG_ARGS);
 8829		return -EINVAL;
 8830	}
 8831
 8832	/* Check that BTF function arguments match actual types that the
 8833	 * verifier sees.
 8834	 */
 8835	for (i = 0; i < nargs; i++) {
 8836		struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
 8837		const struct btf_type *t, *ref_t, *resolve_ret;
 8838		enum bpf_arg_type arg_type = ARG_DONTCARE;
 8839		u32 regno = i + 1, ref_id, type_size;
 8840		bool is_ret_buf_sz = false;
 8841		int kf_arg_type;
 8842
 8843		t = btf_type_skip_modifiers(btf, args[i].type, NULL);
 8844
 8845		if (is_kfunc_arg_ignore(btf, &args[i]))
 8846			continue;
 8847
 8848		if (btf_type_is_scalar(t)) {
 8849			if (reg->type != SCALAR_VALUE) {
 8850				verbose(env, "R%d is not a scalar\n", regno);
 8851				return -EINVAL;
 8852			}
 8853
 8854			if (is_kfunc_arg_constant(meta->btf, &args[i])) {
 8855				if (meta->arg_constant.found) {
 8856					verbose(env, "verifier internal error: only one constant argument permitted\n");
 8857					return -EFAULT;
 8858				}
 8859				if (!tnum_is_const(reg->var_off)) {
 8860					verbose(env, "R%d must be a known constant\n", regno);
 8861					return -EINVAL;
 8862				}
 8863				ret = mark_chain_precision(env, regno);
 8864				if (ret < 0)
 8865					return ret;
 8866				meta->arg_constant.found = true;
 8867				meta->arg_constant.value = reg->var_off.value;
 8868			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
 8869				meta->r0_rdonly = true;
 8870				is_ret_buf_sz = true;
 8871			} else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
 8872				is_ret_buf_sz = true;
 8873			}
 8874
 8875			if (is_ret_buf_sz) {
 8876				if (meta->r0_size) {
 8877					verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
 8878					return -EINVAL;
 8879				}
 8880
 8881				if (!tnum_is_const(reg->var_off)) {
 8882					verbose(env, "R%d is not a const\n", regno);
 8883					return -EINVAL;
 8884				}
 8885
 8886				meta->r0_size = reg->var_off.value;
 8887				ret = mark_chain_precision(env, regno);
 8888				if (ret)
 8889					return ret;
 8890			}
 8891			continue;
 8892		}
 8893
 8894		if (!btf_type_is_ptr(t)) {
 8895			verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
 8896			return -EINVAL;
 8897		}
 8898
 8899		if (reg->ref_obj_id) {
 8900			if (is_kfunc_release(meta) && meta->ref_obj_id) {
 8901				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
 8902					regno, reg->ref_obj_id,
 8903					meta->ref_obj_id);
 8904				return -EFAULT;
 8905			}
 8906			meta->ref_obj_id = reg->ref_obj_id;
 8907			if (is_kfunc_release(meta))
 8908				meta->release_regno = regno;
 8909		}
 8910
 8911		ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
 8912		ref_tname = btf_name_by_offset(btf, ref_t->name_off);
 8913
 8914		kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
 8915		if (kf_arg_type < 0)
 8916			return kf_arg_type;
 8917
 8918		switch (kf_arg_type) {
 8919		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
 8920		case KF_ARG_PTR_TO_BTF_ID:
 8921			if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
 8922				break;
 8923
 8924			if (!is_trusted_reg(reg)) {
 8925				if (!is_kfunc_rcu(meta)) {
 8926					verbose(env, "R%d must be referenced or trusted\n", regno);
 8927					return -EINVAL;
 8928				}
 8929				if (!is_rcu_reg(reg)) {
 8930					verbose(env, "R%d must be a rcu pointer\n", regno);
 8931					return -EINVAL;
 8932				}
 8933			}
 8934
 8935			fallthrough;
 8936		case KF_ARG_PTR_TO_CTX:
 8937			/* Trusted arguments have the same offset checks as release arguments */
 8938			arg_type |= OBJ_RELEASE;
 8939			break;
 8940		case KF_ARG_PTR_TO_KPTR:
 8941		case KF_ARG_PTR_TO_DYNPTR:
 8942		case KF_ARG_PTR_TO_LIST_HEAD:
 8943		case KF_ARG_PTR_TO_LIST_NODE:
 8944		case KF_ARG_PTR_TO_MEM:
 8945		case KF_ARG_PTR_TO_MEM_SIZE:
 8946			/* Trusted by default */
 8947			break;
 8948		default:
 8949			WARN_ON_ONCE(1);
 8950			return -EFAULT;
 8951		}
 8952
 8953		if (is_kfunc_release(meta) && reg->ref_obj_id)
 8954			arg_type |= OBJ_RELEASE;
 8955		ret = check_func_arg_reg_off(env, reg, regno, arg_type);
 8956		if (ret < 0)
 8957			return ret;
 8958
 8959		switch (kf_arg_type) {
 8960		case KF_ARG_PTR_TO_CTX:
 8961			if (reg->type != PTR_TO_CTX) {
 8962				verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
 8963				return -EINVAL;
 8964			}
 8965
 8966			if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
 8967				ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
 8968				if (ret < 0)
 8969					return -EINVAL;
 8970				meta->ret_btf_id  = ret;
 8971			}
 8972			break;
 8973		case KF_ARG_PTR_TO_ALLOC_BTF_ID:
 8974			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
 8975				verbose(env, "arg#%d expected pointer to allocated object\n", i);
 8976				return -EINVAL;
 8977			}
 8978			if (!reg->ref_obj_id) {
 8979				verbose(env, "allocated object must be referenced\n");
 8980				return -EINVAL;
 8981			}
 8982			if (meta->btf == btf_vmlinux &&
 8983			    meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
 8984				meta->arg_obj_drop.btf = reg->btf;
 8985				meta->arg_obj_drop.btf_id = reg->btf_id;
 8986			}
 8987			break;
 8988		case KF_ARG_PTR_TO_KPTR:
 8989			if (reg->type != PTR_TO_MAP_VALUE) {
 8990				verbose(env, "arg#0 expected pointer to map value\n");
 8991				return -EINVAL;
 8992			}
 8993			ret = process_kf_arg_ptr_to_kptr(env, reg, ref_t, ref_tname, meta, i);
 8994			if (ret < 0)
 8995				return ret;
 8996			break;
 8997		case KF_ARG_PTR_TO_DYNPTR:
 8998			if (reg->type != PTR_TO_STACK &&
 8999			    reg->type != CONST_PTR_TO_DYNPTR) {
 9000				verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
 9001				return -EINVAL;
 9002			}
 9003
 9004			ret = process_dynptr_func(env, regno, ARG_PTR_TO_DYNPTR | MEM_RDONLY, NULL);
 9005			if (ret < 0)
 9006				return ret;
 9007			break;
 9008		case KF_ARG_PTR_TO_LIST_HEAD:
 9009			if (reg->type != PTR_TO_MAP_VALUE &&
 9010			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
 9011				verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
 9012				return -EINVAL;
 9013			}
 9014			if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
 9015				verbose(env, "allocated object must be referenced\n");
 9016				return -EINVAL;
 9017			}
 9018			ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
 9019			if (ret < 0)
 9020				return ret;
 9021			break;
 9022		case KF_ARG_PTR_TO_LIST_NODE:
 9023			if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
 9024				verbose(env, "arg#%d expected pointer to allocated object\n", i);
 9025				return -EINVAL;
 9026			}
 9027			if (!reg->ref_obj_id) {
 9028				verbose(env, "allocated object must be referenced\n");
 9029				return -EINVAL;
 9030			}
 9031			ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
 9032			if (ret < 0)
 9033				return ret;
 9034			break;
 9035		case KF_ARG_PTR_TO_BTF_ID:
 9036			/* Only base_type is checked, further checks are done here */
 9037			if ((base_type(reg->type) != PTR_TO_BTF_ID ||
 9038			     (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
 9039			    !reg2btf_ids[base_type(reg->type)]) {
 9040				verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
 9041				verbose(env, "expected %s or socket\n",
 9042					reg_type_str(env, base_type(reg->type) |
 9043							  (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
 9044				return -EINVAL;
 9045			}
 9046			ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
 9047			if (ret < 0)
 9048				return ret;
 9049			break;
 9050		case KF_ARG_PTR_TO_MEM:
 9051			resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
 9052			if (IS_ERR(resolve_ret)) {
 9053				verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
 9054					i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
 9055				return -EINVAL;
 9056			}
 9057			ret = check_mem_reg(env, reg, regno, type_size);
 9058			if (ret < 0)
 9059				return ret;
 9060			break;
 9061		case KF_ARG_PTR_TO_MEM_SIZE:
 9062			ret = check_kfunc_mem_size_reg(env, &regs[regno + 1], regno + 1);
 9063			if (ret < 0) {
 9064				verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
 9065				return ret;
 9066			}
 9067			/* Skip next '__sz' argument */
 9068			i++;
 9069			break;
 9070		}
 9071	}
 9072
 9073	if (is_kfunc_release(meta) && !meta->release_regno) {
 9074		verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
 9075			func_name);
 9076		return -EINVAL;
 9077	}
 9078
 9079	return 0;
 9080}
 9081
 9082static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 9083			    int *insn_idx_p)
 9084{
 9085	const struct btf_type *t, *func, *func_proto, *ptr_type;
 9086	struct bpf_reg_state *regs = cur_regs(env);
 9087	const char *func_name, *ptr_type_name;
 9088	bool sleepable, rcu_lock, rcu_unlock;
 9089	struct bpf_kfunc_call_arg_meta meta;
 9090	u32 i, nargs, func_id, ptr_type_id;
 9091	int err, insn_idx = *insn_idx_p;
 9092	const struct btf_param *args;
 9093	const struct btf_type *ret_t;
 9094	struct btf *desc_btf;
 9095	u32 *kfunc_flags;
 9096
 9097	/* skip for now, but return error when we find this in fixup_kfunc_call */
 9098	if (!insn->imm)
 9099		return 0;
 9100
 9101	desc_btf = find_kfunc_desc_btf(env, insn->off);
 9102	if (IS_ERR(desc_btf))
 9103		return PTR_ERR(desc_btf);
 9104
 9105	func_id = insn->imm;
 9106	func = btf_type_by_id(desc_btf, func_id);
 9107	func_name = btf_name_by_offset(desc_btf, func->name_off);
 9108	func_proto = btf_type_by_id(desc_btf, func->type);
 9109
 9110	kfunc_flags = btf_kfunc_id_set_contains(desc_btf, resolve_prog_type(env->prog), func_id);
 9111	if (!kfunc_flags) {
 9112		verbose(env, "calling kernel function %s is not allowed\n",
 9113			func_name);
 9114		return -EACCES;
 9115	}
 9116
 9117	/* Prepare kfunc call metadata */
 9118	memset(&meta, 0, sizeof(meta));
 9119	meta.btf = desc_btf;
 9120	meta.func_id = func_id;
 9121	meta.kfunc_flags = *kfunc_flags;
 9122	meta.func_proto = func_proto;
 9123	meta.func_name = func_name;
 9124
 9125	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
 9126		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
 9127		return -EACCES;
 9128	}
 9129
 9130	sleepable = is_kfunc_sleepable(&meta);
 9131	if (sleepable && !env->prog->aux->sleepable) {
 9132		verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
 9133		return -EACCES;
 9134	}
 9135
 9136	rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
 9137	rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
 9138	if ((rcu_lock || rcu_unlock) && !env->rcu_tag_supported) {
 9139		verbose(env, "no vmlinux btf rcu tag support for kfunc %s\n", func_name);
 9140		return -EACCES;
 9141	}
 9142
 9143	if (env->cur_state->active_rcu_lock) {
 9144		struct bpf_func_state *state;
 9145		struct bpf_reg_state *reg;
 9146
 9147		if (rcu_lock) {
 9148			verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
 9149			return -EINVAL;
 9150		} else if (rcu_unlock) {
 9151			bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
 9152				if (reg->type & MEM_RCU) {
 9153					reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
 9154					reg->type |= PTR_UNTRUSTED;
 9155				}
 9156			}));
 9157			env->cur_state->active_rcu_lock = false;
 9158		} else if (sleepable) {
 9159			verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
 9160			return -EACCES;
 9161		}
 9162	} else if (rcu_lock) {
 9163		env->cur_state->active_rcu_lock = true;
 9164	} else if (rcu_unlock) {
 9165		verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
 9166		return -EINVAL;
 9167	}
 9168
 9169	/* Check the arguments */
 9170	err = check_kfunc_args(env, &meta);
 9171	if (err < 0)
 9172		return err;
 9173	/* In case of release function, we get register number of refcounted
 9174	 * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
 9175	 */
 9176	if (meta.release_regno) {
 9177		err = release_reference(env, regs[meta.release_regno].ref_obj_id);
 9178		if (err) {
 9179			verbose(env, "kfunc %s#%d reference has not been acquired before\n",
 9180				func_name, func_id);
 9181			return err;
 9182		}
 9183	}
 9184
 9185	for (i = 0; i < CALLER_SAVED_REGS; i++)
 9186		mark_reg_not_init(env, regs, caller_saved[i]);
 9187
 9188	/* Check return type */
 9189	t = btf_type_skip_modifiers(desc_btf, func_proto->type, NULL);
 9190
 9191	if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
 9192		/* Only exception is bpf_obj_new_impl */
 9193		if (meta.btf != btf_vmlinux || meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl]) {
 9194			verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
 9195			return -EINVAL;
 9196		}
 9197	}
 9198
 9199	if (btf_type_is_scalar(t)) {
 9200		mark_reg_unknown(env, regs, BPF_REG_0);
 9201		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
 9202	} else if (btf_type_is_ptr(t)) {
 9203		ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
 9204
 9205		if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
 9206			if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
 9207				struct btf *ret_btf;
 9208				u32 ret_btf_id;
 9209
 9210				if (unlikely(!bpf_global_ma_set))
 9211					return -ENOMEM;
 9212
 9213				if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
 9214					verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
 9215					return -EINVAL;
 9216				}
 9217
 9218				ret_btf = env->prog->aux->btf;
 9219				ret_btf_id = meta.arg_constant.value;
 9220
 9221				/* This may be NULL due to user not supplying a BTF */
 9222				if (!ret_btf) {
 9223					verbose(env, "bpf_obj_new requires prog BTF\n");
 9224					return -EINVAL;
 9225				}
 9226
 9227				ret_t = btf_type_by_id(ret_btf, ret_btf_id);
 9228				if (!ret_t || !__btf_type_is_struct(ret_t)) {
 9229					verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
 9230					return -EINVAL;
 9231				}
 9232
 9233				mark_reg_known_zero(env, regs, BPF_REG_0);
 9234				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 9235				regs[BPF_REG_0].btf = ret_btf;
 9236				regs[BPF_REG_0].btf_id = ret_btf_id;
 9237
 9238				env->insn_aux_data[insn_idx].obj_new_size = ret_t->size;
 9239				env->insn_aux_data[insn_idx].kptr_struct_meta =
 9240					btf_find_struct_meta(ret_btf, ret_btf_id);
 9241			} else if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
 9242				env->insn_aux_data[insn_idx].kptr_struct_meta =
 9243					btf_find_struct_meta(meta.arg_obj_drop.btf,
 9244							     meta.arg_obj_drop.btf_id);
 9245			} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
 9246				   meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
 9247				struct btf_field *field = meta.arg_list_head.field;
 9248
 9249				mark_reg_known_zero(env, regs, BPF_REG_0);
 9250				regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
 9251				regs[BPF_REG_0].btf = field->list_head.btf;
 9252				regs[BPF_REG_0].btf_id = field->list_head.value_btf_id;
 9253				regs[BPF_REG_0].off = field->list_head.node_offset;
 9254			} else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
 9255				mark_reg_known_zero(env, regs, BPF_REG_0);
 9256				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
 9257				regs[BPF_REG_0].btf = desc_btf;
 9258				regs[BPF_REG_0].btf_id = meta.ret_btf_id;
 9259			} else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 9260				ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
 9261				if (!ret_t || !btf_type_is_struct(ret_t)) {
 9262					verbose(env,
 9263						"kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
 9264					return -EINVAL;
 9265				}
 9266
 9267				mark_reg_known_zero(env, regs, BPF_REG_0);
 9268				regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
 9269				regs[BPF_REG_0].btf = desc_btf;
 9270				regs[BPF_REG_0].btf_id = meta.arg_constant.value;
 9271			} else {
 9272				verbose(env, "kernel function %s unhandled dynamic return type\n",
 9273					meta.func_name);
 9274				return -EFAULT;
 9275			}
 9276		} else if (!__btf_type_is_struct(ptr_type)) {
 9277			if (!meta.r0_size) {
 9278				ptr_type_name = btf_name_by_offset(desc_btf,
 9279								   ptr_type->name_off);
 9280				verbose(env,
 9281					"kernel function %s returns pointer type %s %s is not supported\n",
 9282					func_name,
 9283					btf_type_str(ptr_type),
 9284					ptr_type_name);
 9285				return -EINVAL;
 9286			}
 9287
 9288			mark_reg_known_zero(env, regs, BPF_REG_0);
 9289			regs[BPF_REG_0].type = PTR_TO_MEM;
 9290			regs[BPF_REG_0].mem_size = meta.r0_size;
 9291
 9292			if (meta.r0_rdonly)
 9293				regs[BPF_REG_0].type |= MEM_RDONLY;
 9294
 9295			/* Ensures we don't access the memory after a release_reference() */
 9296			if (meta.ref_obj_id)
 9297				regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
 9298		} else {
 9299			mark_reg_known_zero(env, regs, BPF_REG_0);
 9300			regs[BPF_REG_0].btf = desc_btf;
 9301			regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 9302			regs[BPF_REG_0].btf_id = ptr_type_id;
 9303		}
 9304
 9305		if (is_kfunc_ret_null(&meta)) {
 9306			regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
 9307			/* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
 9308			regs[BPF_REG_0].id = ++env->id_gen;
 9309		}
 9310		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
 9311		if (is_kfunc_acquire(&meta)) {
 9312			int id = acquire_reference_state(env, insn_idx);
 9313
 9314			if (id < 0)
 9315				return id;
 9316			if (is_kfunc_ret_null(&meta))
 9317				regs[BPF_REG_0].id = id;
 9318			regs[BPF_REG_0].ref_obj_id = id;
 9319		}
 9320		if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
 9321			regs[BPF_REG_0].id = ++env->id_gen;
 9322	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
 9323
 9324	nargs = btf_type_vlen(func_proto);
 9325	args = (const struct btf_param *)(func_proto + 1);
 9326	for (i = 0; i < nargs; i++) {
 9327		u32 regno = i + 1;
 9328
 9329		t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
 9330		if (btf_type_is_ptr(t))
 9331			mark_btf_func_reg_size(env, regno, sizeof(void *));
 9332		else
 9333			/* scalar. ensured by btf_check_kfunc_arg_match() */
 9334			mark_btf_func_reg_size(env, regno, t->size);
 9335	}
 9336
 9337	return 0;
 9338}
 9339
 9340static bool signed_add_overflows(s64 a, s64 b)
 9341{
 9342	/* Do the add in u64, where overflow is well-defined */
 9343	s64 res = (s64)((u64)a + (u64)b);
 9344
 9345	if (b < 0)
 9346		return res > a;
 9347	return res < a;
 9348}
 9349
 9350static bool signed_add32_overflows(s32 a, s32 b)
 9351{
 9352	/* Do the add in u32, where overflow is well-defined */
 9353	s32 res = (s32)((u32)a + (u32)b);
 9354
 9355	if (b < 0)
 9356		return res > a;
 9357	return res < a;
 9358}
 9359
 9360static bool signed_sub_overflows(s64 a, s64 b)
 9361{
 9362	/* Do the sub in u64, where overflow is well-defined */
 9363	s64 res = (s64)((u64)a - (u64)b);
 9364
 9365	if (b < 0)
 9366		return res < a;
 9367	return res > a;
 9368}
 9369
 9370static bool signed_sub32_overflows(s32 a, s32 b)
 9371{
 9372	/* Do the sub in u32, where overflow is well-defined */
 9373	s32 res = (s32)((u32)a - (u32)b);
 9374
 9375	if (b < 0)
 9376		return res < a;
 9377	return res > a;
 9378}
 9379
 9380static bool check_reg_sane_offset(struct bpf_verifier_env *env,
 9381				  const struct bpf_reg_state *reg,
 9382				  enum bpf_reg_type type)
 9383{
 9384	bool known = tnum_is_const(reg->var_off);
 9385	s64 val = reg->var_off.value;
 9386	s64 smin = reg->smin_value;
 9387
 9388	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
 9389		verbose(env, "math between %s pointer and %lld is not allowed\n",
 9390			reg_type_str(env, type), val);
 9391		return false;
 9392	}
 9393
 9394	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
 9395		verbose(env, "%s pointer offset %d is not allowed\n",
 9396			reg_type_str(env, type), reg->off);
 9397		return false;
 9398	}
 9399
 9400	if (smin == S64_MIN) {
 9401		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
 9402			reg_type_str(env, type));
 9403		return false;
 9404	}
 9405
 9406	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
 9407		verbose(env, "value %lld makes %s pointer be out of bounds\n",
 9408			smin, reg_type_str(env, type));
 9409		return false;
 9410	}
 9411
 9412	return true;
 9413}
 9414
 9415enum {
 9416	REASON_BOUNDS	= -1,
 9417	REASON_TYPE	= -2,
 9418	REASON_PATHS	= -3,
 9419	REASON_LIMIT	= -4,
 9420	REASON_STACK	= -5,
 9421};
 9422
 9423static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 9424			      u32 *alu_limit, bool mask_to_left)
 9425{
 9426	u32 max = 0, ptr_limit = 0;
 9427
 9428	switch (ptr_reg->type) {
 9429	case PTR_TO_STACK:
 9430		/* Offset 0 is out-of-bounds, but acceptable start for the
 9431		 * left direction, see BPF_REG_FP. Also, unknown scalar
 9432		 * offset where we would need to deal with min/max bounds is
 9433		 * currently prohibited for unprivileged.
 9434		 */
 9435		max = MAX_BPF_STACK + mask_to_left;
 9436		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
 9437		break;
 9438	case PTR_TO_MAP_VALUE:
 9439		max = ptr_reg->map_ptr->value_size;
 9440		ptr_limit = (mask_to_left ?
 9441			     ptr_reg->smin_value :
 9442			     ptr_reg->umax_value) + ptr_reg->off;
 9443		break;
 9444	default:
 9445		return REASON_TYPE;
 9446	}
 9447
 9448	if (ptr_limit >= max)
 9449		return REASON_LIMIT;
 9450	*alu_limit = ptr_limit;
 9451	return 0;
 9452}
 9453
 9454static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
 9455				    const struct bpf_insn *insn)
 9456{
 9457	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
 9458}
 9459
 9460static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
 9461				       u32 alu_state, u32 alu_limit)
 9462{
 9463	/* If we arrived here from different branches with different
 9464	 * state or limits to sanitize, then this won't work.
 9465	 */
 9466	if (aux->alu_state &&
 9467	    (aux->alu_state != alu_state ||
 9468	     aux->alu_limit != alu_limit))
 9469		return REASON_PATHS;
 9470
 9471	/* Corresponding fixup done in do_misc_fixups(). */
 9472	aux->alu_state = alu_state;
 9473	aux->alu_limit = alu_limit;
 9474	return 0;
 9475}
 9476
 9477static int sanitize_val_alu(struct bpf_verifier_env *env,
 9478			    struct bpf_insn *insn)
 9479{
 9480	struct bpf_insn_aux_data *aux = cur_aux(env);
 9481
 9482	if (can_skip_alu_sanitation(env, insn))
 9483		return 0;
 9484
 9485	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
 9486}
 9487
 9488static bool sanitize_needed(u8 opcode)
 9489{
 9490	return opcode == BPF_ADD || opcode == BPF_SUB;
 9491}
 9492
 9493struct bpf_sanitize_info {
 9494	struct bpf_insn_aux_data aux;
 9495	bool mask_to_left;
 9496};
 9497
 9498static struct bpf_verifier_state *
 9499sanitize_speculative_path(struct bpf_verifier_env *env,
 9500			  const struct bpf_insn *insn,
 9501			  u32 next_idx, u32 curr_idx)
 9502{
 9503	struct bpf_verifier_state *branch;
 9504	struct bpf_reg_state *regs;
 9505
 9506	branch = push_stack(env, next_idx, curr_idx, true);
 9507	if (branch && insn) {
 9508		regs = branch->frame[branch->curframe]->regs;
 9509		if (BPF_SRC(insn->code) == BPF_K) {
 9510			mark_reg_unknown(env, regs, insn->dst_reg);
 9511		} else if (BPF_SRC(insn->code) == BPF_X) {
 9512			mark_reg_unknown(env, regs, insn->dst_reg);
 9513			mark_reg_unknown(env, regs, insn->src_reg);
 9514		}
 9515	}
 9516	return branch;
 9517}
 9518
 9519static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 9520			    struct bpf_insn *insn,
 9521			    const struct bpf_reg_state *ptr_reg,
 9522			    const struct bpf_reg_state *off_reg,
 9523			    struct bpf_reg_state *dst_reg,
 9524			    struct bpf_sanitize_info *info,
 9525			    const bool commit_window)
 9526{
 9527	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
 9528	struct bpf_verifier_state *vstate = env->cur_state;
 9529	bool off_is_imm = tnum_is_const(off_reg->var_off);
 9530	bool off_is_neg = off_reg->smin_value < 0;
 9531	bool ptr_is_dst_reg = ptr_reg == dst_reg;
 9532	u8 opcode = BPF_OP(insn->code);
 9533	u32 alu_state, alu_limit;
 9534	struct bpf_reg_state tmp;
 9535	bool ret;
 9536	int err;
 9537
 9538	if (can_skip_alu_sanitation(env, insn))
 9539		return 0;
 9540
 9541	/* We already marked aux for masking from non-speculative
 9542	 * paths, thus we got here in the first place. We only care
 9543	 * to explore bad access from here.
 9544	 */
 9545	if (vstate->speculative)
 9546		goto do_sim;
 9547
 9548	if (!commit_window) {
 9549		if (!tnum_is_const(off_reg->var_off) &&
 9550		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
 9551			return REASON_BOUNDS;
 9552
 9553		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
 9554				     (opcode == BPF_SUB && !off_is_neg);
 9555	}
 9556
 9557	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
 9558	if (err < 0)
 9559		return err;
 9560
 9561	if (commit_window) {
 9562		/* In commit phase we narrow the masking window based on
 9563		 * the observed pointer move after the simulated operation.
 9564		 */
 9565		alu_state = info->aux.alu_state;
 9566		alu_limit = abs(info->aux.alu_limit - alu_limit);
 9567	} else {
 9568		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
 9569		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
 9570		alu_state |= ptr_is_dst_reg ?
 9571			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
 9572
 9573		/* Limit pruning on unknown scalars to enable deep search for
 9574		 * potential masking differences from other program paths.
 9575		 */
 9576		if (!off_is_imm)
 9577			env->explore_alu_limits = true;
 9578	}
 9579
 9580	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
 9581	if (err < 0)
 9582		return err;
 9583do_sim:
 9584	/* If we're in commit phase, we're done here given we already
 9585	 * pushed the truncated dst_reg into the speculative verification
 9586	 * stack.
 9587	 *
 9588	 * Also, when register is a known constant, we rewrite register-based
 9589	 * operation to immediate-based, and thus do not need masking (and as
 9590	 * a consequence, do not need to simulate the zero-truncation either).
 9591	 */
 9592	if (commit_window || off_is_imm)
 9593		return 0;
 9594
 9595	/* Simulate and find potential out-of-bounds access under
 9596	 * speculative execution from truncation as a result of
 9597	 * masking when off was not within expected range. If off
 9598	 * sits in dst, then we temporarily need to move ptr there
 9599	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
 9600	 * for cases where we use K-based arithmetic in one direction
 9601	 * and truncated reg-based in the other in order to explore
 9602	 * bad access.
 9603	 */
 9604	if (!ptr_is_dst_reg) {
 9605		tmp = *dst_reg;
 9606		copy_register_state(dst_reg, ptr_reg);
 9607	}
 9608	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
 9609					env->insn_idx);
 9610	if (!ptr_is_dst_reg && ret)
 9611		*dst_reg = tmp;
 9612	return !ret ? REASON_STACK : 0;
 9613}
 9614
 9615static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
 9616{
 9617	struct bpf_verifier_state *vstate = env->cur_state;
 9618
 9619	/* If we simulate paths under speculation, we don't update the
 9620	 * insn as 'seen' such that when we verify unreachable paths in
 9621	 * the non-speculative domain, sanitize_dead_code() can still
 9622	 * rewrite/sanitize them.
 9623	 */
 9624	if (!vstate->speculative)
 9625		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
 9626}
 9627
 9628static int sanitize_err(struct bpf_verifier_env *env,
 9629			const struct bpf_insn *insn, int reason,
 9630			const struct bpf_reg_state *off_reg,
 9631			const struct bpf_reg_state *dst_reg)
 9632{
 9633	static const char *err = "pointer arithmetic with it prohibited for !root";
 9634	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
 9635	u32 dst = insn->dst_reg, src = insn->src_reg;
 9636
 9637	switch (reason) {
 9638	case REASON_BOUNDS:
 9639		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
 9640			off_reg == dst_reg ? dst : src, err);
 9641		break;
 9642	case REASON_TYPE:
 9643		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
 9644			off_reg == dst_reg ? src : dst, err);
 9645		break;
 9646	case REASON_PATHS:
 9647		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
 9648			dst, op, err);
 9649		break;
 9650	case REASON_LIMIT:
 9651		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
 9652			dst, op, err);
 9653		break;
 9654	case REASON_STACK:
 9655		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
 9656			dst, err);
 9657		break;
 9658	default:
 9659		verbose(env, "verifier internal error: unknown reason (%d)\n",
 9660			reason);
 9661		break;
 9662	}
 9663
 9664	return -EACCES;
 9665}
 9666
 9667/* check that stack access falls within stack limits and that 'reg' doesn't
 9668 * have a variable offset.
 9669 *
 9670 * Variable offset is prohibited for unprivileged mode for simplicity since it
 9671 * requires corresponding support in Spectre masking for stack ALU.  See also
 9672 * retrieve_ptr_limit().
 9673 *
 9674 *
 9675 * 'off' includes 'reg->off'.
 9676 */
 9677static int check_stack_access_for_ptr_arithmetic(
 9678				struct bpf_verifier_env *env,
 9679				int regno,
 9680				const struct bpf_reg_state *reg,
 9681				int off)
 9682{
 9683	if (!tnum_is_const(reg->var_off)) {
 9684		char tn_buf[48];
 9685
 9686		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 9687		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
 9688			regno, tn_buf, off);
 9689		return -EACCES;
 9690	}
 9691
 9692	if (off >= 0 || off < -MAX_BPF_STACK) {
 9693		verbose(env, "R%d stack pointer arithmetic goes out of range, "
 9694			"prohibited for !root; off=%d\n", regno, off);
 9695		return -EACCES;
 9696	}
 9697
 9698	return 0;
 9699}
 9700
 9701static int sanitize_check_bounds(struct bpf_verifier_env *env,
 9702				 const struct bpf_insn *insn,
 9703				 const struct bpf_reg_state *dst_reg)
 9704{
 9705	u32 dst = insn->dst_reg;
 9706
 9707	/* For unprivileged we require that resulting offset must be in bounds
 9708	 * in order to be able to sanitize access later on.
 9709	 */
 9710	if (env->bypass_spec_v1)
 9711		return 0;
 9712
 9713	switch (dst_reg->type) {
 9714	case PTR_TO_STACK:
 9715		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
 9716					dst_reg->off + dst_reg->var_off.value))
 9717			return -EACCES;
 9718		break;
 9719	case PTR_TO_MAP_VALUE:
 9720		if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
 9721			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
 9722				"prohibited for !root\n", dst);
 9723			return -EACCES;
 9724		}
 9725		break;
 9726	default:
 9727		break;
 9728	}
 9729
 9730	return 0;
 9731}
 9732
 9733/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
 9734 * Caller should also handle BPF_MOV case separately.
 9735 * If we return -EACCES, caller may want to try again treating pointer as a
 9736 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
 9737 */
 9738static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 9739				   struct bpf_insn *insn,
 9740				   const struct bpf_reg_state *ptr_reg,
 9741				   const struct bpf_reg_state *off_reg)
 9742{
 9743	struct bpf_verifier_state *vstate = env->cur_state;
 9744	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 9745	struct bpf_reg_state *regs = state->regs, *dst_reg;
 9746	bool known = tnum_is_const(off_reg->var_off);
 9747	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
 9748	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
 9749	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
 9750	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
 9751	struct bpf_sanitize_info info = {};
 9752	u8 opcode = BPF_OP(insn->code);
 9753	u32 dst = insn->dst_reg;
 9754	int ret;
 9755
 9756	dst_reg = &regs[dst];
 9757
 9758	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
 9759	    smin_val > smax_val || umin_val > umax_val) {
 9760		/* Taint dst register if offset had invalid bounds derived from
 9761		 * e.g. dead branches.
 9762		 */
 9763		__mark_reg_unknown(env, dst_reg);
 9764		return 0;
 9765	}
 9766
 9767	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 9768		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
 9769		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
 9770			__mark_reg_unknown(env, dst_reg);
 9771			return 0;
 9772		}
 9773
 9774		verbose(env,
 9775			"R%d 32-bit pointer arithmetic prohibited\n",
 9776			dst);
 9777		return -EACCES;
 9778	}
 9779
 9780	if (ptr_reg->type & PTR_MAYBE_NULL) {
 9781		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
 9782			dst, reg_type_str(env, ptr_reg->type));
 9783		return -EACCES;
 9784	}
 9785
 9786	switch (base_type(ptr_reg->type)) {
 9787	case CONST_PTR_TO_MAP:
 9788		/* smin_val represents the known value */
 9789		if (known && smin_val == 0 && opcode == BPF_ADD)
 9790			break;
 9791		fallthrough;
 9792	case PTR_TO_PACKET_END:
 9793	case PTR_TO_SOCKET:
 9794	case PTR_TO_SOCK_COMMON:
 9795	case PTR_TO_TCP_SOCK:
 9796	case PTR_TO_XDP_SOCK:
 9797		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 9798			dst, reg_type_str(env, ptr_reg->type));
 9799		return -EACCES;
 9800	default:
 9801		break;
 9802	}
 9803
 9804	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
 9805	 * The id may be overwritten later if we create a new variable offset.
 9806	 */
 9807	dst_reg->type = ptr_reg->type;
 9808	dst_reg->id = ptr_reg->id;
 9809
 9810	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
 9811	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
 9812		return -EINVAL;
 9813
 9814	/* pointer types do not carry 32-bit bounds at the moment. */
 9815	__mark_reg32_unbounded(dst_reg);
 9816
 9817	if (sanitize_needed(opcode)) {
 9818		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
 9819				       &info, false);
 9820		if (ret < 0)
 9821			return sanitize_err(env, insn, ret, off_reg, dst_reg);
 9822	}
 9823
 9824	switch (opcode) {
 9825	case BPF_ADD:
 9826		/* We can take a fixed offset as long as it doesn't overflow
 9827		 * the s32 'off' field
 9828		 */
 9829		if (known && (ptr_reg->off + smin_val ==
 9830			      (s64)(s32)(ptr_reg->off + smin_val))) {
 9831			/* pointer += K.  Accumulate it into fixed offset */
 9832			dst_reg->smin_value = smin_ptr;
 9833			dst_reg->smax_value = smax_ptr;
 9834			dst_reg->umin_value = umin_ptr;
 9835			dst_reg->umax_value = umax_ptr;
 9836			dst_reg->var_off = ptr_reg->var_off;
 9837			dst_reg->off = ptr_reg->off + smin_val;
 9838			dst_reg->raw = ptr_reg->raw;
 9839			break;
 9840		}
 9841		/* A new variable offset is created.  Note that off_reg->off
 9842		 * == 0, since it's a scalar.
 9843		 * dst_reg gets the pointer type and since some positive
 9844		 * integer value was added to the pointer, give it a new 'id'
 9845		 * if it's a PTR_TO_PACKET.
 9846		 * this creates a new 'base' pointer, off_reg (variable) gets
 9847		 * added into the variable offset, and we copy the fixed offset
 9848		 * from ptr_reg.
 9849		 */
 9850		if (signed_add_overflows(smin_ptr, smin_val) ||
 9851		    signed_add_overflows(smax_ptr, smax_val)) {
 9852			dst_reg->smin_value = S64_MIN;
 9853			dst_reg->smax_value = S64_MAX;
 9854		} else {
 9855			dst_reg->smin_value = smin_ptr + smin_val;
 9856			dst_reg->smax_value = smax_ptr + smax_val;
 9857		}
 9858		if (umin_ptr + umin_val < umin_ptr ||
 9859		    umax_ptr + umax_val < umax_ptr) {
 9860			dst_reg->umin_value = 0;
 9861			dst_reg->umax_value = U64_MAX;
 9862		} else {
 9863			dst_reg->umin_value = umin_ptr + umin_val;
 9864			dst_reg->umax_value = umax_ptr + umax_val;
 9865		}
 9866		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
 9867		dst_reg->off = ptr_reg->off;
 9868		dst_reg->raw = ptr_reg->raw;
 9869		if (reg_is_pkt_pointer(ptr_reg)) {
 9870			dst_reg->id = ++env->id_gen;
 9871			/* something was added to pkt_ptr, set range to zero */
 9872			memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
 9873		}
 9874		break;
 9875	case BPF_SUB:
 9876		if (dst_reg == off_reg) {
 9877			/* scalar -= pointer.  Creates an unknown scalar */
 9878			verbose(env, "R%d tried to subtract pointer from scalar\n",
 9879				dst);
 9880			return -EACCES;
 9881		}
 9882		/* We don't allow subtraction from FP, because (according to
 9883		 * test_verifier.c test "invalid fp arithmetic", JITs might not
 9884		 * be able to deal with it.
 9885		 */
 9886		if (ptr_reg->type == PTR_TO_STACK) {
 9887			verbose(env, "R%d subtraction from stack pointer prohibited\n",
 9888				dst);
 9889			return -EACCES;
 9890		}
 9891		if (known && (ptr_reg->off - smin_val ==
 9892			      (s64)(s32)(ptr_reg->off - smin_val))) {
 9893			/* pointer -= K.  Subtract it from fixed offset */
 9894			dst_reg->smin_value = smin_ptr;
 9895			dst_reg->smax_value = smax_ptr;
 9896			dst_reg->umin_value = umin_ptr;
 9897			dst_reg->umax_value = umax_ptr;
 9898			dst_reg->var_off = ptr_reg->var_off;
 9899			dst_reg->id = ptr_reg->id;
 9900			dst_reg->off = ptr_reg->off - smin_val;
 9901			dst_reg->raw = ptr_reg->raw;
 9902			break;
 9903		}
 9904		/* A new variable offset is created.  If the subtrahend is known
 9905		 * nonnegative, then any reg->range we had before is still good.
 9906		 */
 9907		if (signed_sub_overflows(smin_ptr, smax_val) ||
 9908		    signed_sub_overflows(smax_ptr, smin_val)) {
 9909			/* Overflow possible, we know nothing */
 9910			dst_reg->smin_value = S64_MIN;
 9911			dst_reg->smax_value = S64_MAX;
 9912		} else {
 9913			dst_reg->smin_value = smin_ptr - smax_val;
 9914			dst_reg->smax_value = smax_ptr - smin_val;
 9915		}
 9916		if (umin_ptr < umax_val) {
 9917			/* Overflow possible, we know nothing */
 9918			dst_reg->umin_value = 0;
 9919			dst_reg->umax_value = U64_MAX;
 9920		} else {
 9921			/* Cannot overflow (as long as bounds are consistent) */
 9922			dst_reg->umin_value = umin_ptr - umax_val;
 9923			dst_reg->umax_value = umax_ptr - umin_val;
 9924		}
 9925		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
 9926		dst_reg->off = ptr_reg->off;
 9927		dst_reg->raw = ptr_reg->raw;
 9928		if (reg_is_pkt_pointer(ptr_reg)) {
 9929			dst_reg->id = ++env->id_gen;
 9930			/* something was added to pkt_ptr, set range to zero */
 9931			if (smin_val < 0)
 9932				memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
 9933		}
 9934		break;
 9935	case BPF_AND:
 9936	case BPF_OR:
 9937	case BPF_XOR:
 9938		/* bitwise ops on pointers are troublesome, prohibit. */
 9939		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
 9940			dst, bpf_alu_string[opcode >> 4]);
 9941		return -EACCES;
 9942	default:
 9943		/* other operators (e.g. MUL,LSH) produce non-pointer results */
 9944		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
 9945			dst, bpf_alu_string[opcode >> 4]);
 9946		return -EACCES;
 9947	}
 9948
 9949	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
 9950		return -EINVAL;
 9951	reg_bounds_sync(dst_reg);
 9952	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
 9953		return -EACCES;
 9954	if (sanitize_needed(opcode)) {
 9955		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
 9956				       &info, true);
 9957		if (ret < 0)
 9958			return sanitize_err(env, insn, ret, off_reg, dst_reg);
 9959	}
 9960
 9961	return 0;
 9962}
 9963
 9964static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
 9965				 struct bpf_reg_state *src_reg)
 9966{
 9967	s32 smin_val = src_reg->s32_min_value;
 9968	s32 smax_val = src_reg->s32_max_value;
 9969	u32 umin_val = src_reg->u32_min_value;
 9970	u32 umax_val = src_reg->u32_max_value;
 9971
 9972	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
 9973	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
 9974		dst_reg->s32_min_value = S32_MIN;
 9975		dst_reg->s32_max_value = S32_MAX;
 9976	} else {
 9977		dst_reg->s32_min_value += smin_val;
 9978		dst_reg->s32_max_value += smax_val;
 9979	}
 9980	if (dst_reg->u32_min_value + umin_val < umin_val ||
 9981	    dst_reg->u32_max_value + umax_val < umax_val) {
 9982		dst_reg->u32_min_value = 0;
 9983		dst_reg->u32_max_value = U32_MAX;
 9984	} else {
 9985		dst_reg->u32_min_value += umin_val;
 9986		dst_reg->u32_max_value += umax_val;
 9987	}
 9988}
 9989
 9990static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
 9991			       struct bpf_reg_state *src_reg)
 9992{
 9993	s64 smin_val = src_reg->smin_value;
 9994	s64 smax_val = src_reg->smax_value;
 9995	u64 umin_val = src_reg->umin_value;
 9996	u64 umax_val = src_reg->umax_value;
 9997
 9998	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
 9999	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
10000		dst_reg->smin_value = S64_MIN;
10001		dst_reg->smax_value = S64_MAX;
10002	} else {
10003		dst_reg->smin_value += smin_val;
10004		dst_reg->smax_value += smax_val;
10005	}
10006	if (dst_reg->umin_value + umin_val < umin_val ||
10007	    dst_reg->umax_value + umax_val < umax_val) {
10008		dst_reg->umin_value = 0;
10009		dst_reg->umax_value = U64_MAX;
10010	} else {
10011		dst_reg->umin_value += umin_val;
10012		dst_reg->umax_value += umax_val;
10013	}
10014}
10015
10016static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
10017				 struct bpf_reg_state *src_reg)
10018{
10019	s32 smin_val = src_reg->s32_min_value;
10020	s32 smax_val = src_reg->s32_max_value;
10021	u32 umin_val = src_reg->u32_min_value;
10022	u32 umax_val = src_reg->u32_max_value;
10023
10024	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
10025	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
10026		/* Overflow possible, we know nothing */
10027		dst_reg->s32_min_value = S32_MIN;
10028		dst_reg->s32_max_value = S32_MAX;
10029	} else {
10030		dst_reg->s32_min_value -= smax_val;
10031		dst_reg->s32_max_value -= smin_val;
10032	}
10033	if (dst_reg->u32_min_value < umax_val) {
10034		/* Overflow possible, we know nothing */
10035		dst_reg->u32_min_value = 0;
10036		dst_reg->u32_max_value = U32_MAX;
10037	} else {
10038		/* Cannot overflow (as long as bounds are consistent) */
10039		dst_reg->u32_min_value -= umax_val;
10040		dst_reg->u32_max_value -= umin_val;
10041	}
10042}
10043
10044static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
10045			       struct bpf_reg_state *src_reg)
10046{
10047	s64 smin_val = src_reg->smin_value;
10048	s64 smax_val = src_reg->smax_value;
10049	u64 umin_val = src_reg->umin_value;
10050	u64 umax_val = src_reg->umax_value;
10051
10052	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
10053	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
10054		/* Overflow possible, we know nothing */
10055		dst_reg->smin_value = S64_MIN;
10056		dst_reg->smax_value = S64_MAX;
10057	} else {
10058		dst_reg->smin_value -= smax_val;
10059		dst_reg->smax_value -= smin_val;
10060	}
10061	if (dst_reg->umin_value < umax_val) {
10062		/* Overflow possible, we know nothing */
10063		dst_reg->umin_value = 0;
10064		dst_reg->umax_value = U64_MAX;
10065	} else {
10066		/* Cannot overflow (as long as bounds are consistent) */
10067		dst_reg->umin_value -= umax_val;
10068		dst_reg->umax_value -= umin_val;
10069	}
10070}
10071
10072static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
10073				 struct bpf_reg_state *src_reg)
10074{
10075	s32 smin_val = src_reg->s32_min_value;
10076	u32 umin_val = src_reg->u32_min_value;
10077	u32 umax_val = src_reg->u32_max_value;
10078
10079	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
10080		/* Ain't nobody got time to multiply that sign */
10081		__mark_reg32_unbounded(dst_reg);
10082		return;
10083	}
10084	/* Both values are positive, so we can work with unsigned and
10085	 * copy the result to signed (unless it exceeds S32_MAX).
10086	 */
10087	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
10088		/* Potential overflow, we know nothing */
10089		__mark_reg32_unbounded(dst_reg);
10090		return;
10091	}
10092	dst_reg->u32_min_value *= umin_val;
10093	dst_reg->u32_max_value *= umax_val;
10094	if (dst_reg->u32_max_value > S32_MAX) {
10095		/* Overflow possible, we know nothing */
10096		dst_reg->s32_min_value = S32_MIN;
10097		dst_reg->s32_max_value = S32_MAX;
10098	} else {
10099		dst_reg->s32_min_value = dst_reg->u32_min_value;
10100		dst_reg->s32_max_value = dst_reg->u32_max_value;
10101	}
10102}
10103
10104static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
10105			       struct bpf_reg_state *src_reg)
10106{
10107	s64 smin_val = src_reg->smin_value;
10108	u64 umin_val = src_reg->umin_value;
10109	u64 umax_val = src_reg->umax_value;
10110
10111	if (smin_val < 0 || dst_reg->smin_value < 0) {
10112		/* Ain't nobody got time to multiply that sign */
10113		__mark_reg64_unbounded(dst_reg);
10114		return;
10115	}
10116	/* Both values are positive, so we can work with unsigned and
10117	 * copy the result to signed (unless it exceeds S64_MAX).
10118	 */
10119	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
10120		/* Potential overflow, we know nothing */
10121		__mark_reg64_unbounded(dst_reg);
10122		return;
10123	}
10124	dst_reg->umin_value *= umin_val;
10125	dst_reg->umax_value *= umax_val;
10126	if (dst_reg->umax_value > S64_MAX) {
10127		/* Overflow possible, we know nothing */
10128		dst_reg->smin_value = S64_MIN;
10129		dst_reg->smax_value = S64_MAX;
10130	} else {
10131		dst_reg->smin_value = dst_reg->umin_value;
10132		dst_reg->smax_value = dst_reg->umax_value;
10133	}
10134}
10135
10136static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
10137				 struct bpf_reg_state *src_reg)
10138{
10139	bool src_known = tnum_subreg_is_const(src_reg->var_off);
10140	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
10141	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
10142	s32 smin_val = src_reg->s32_min_value;
10143	u32 umax_val = src_reg->u32_max_value;
10144
10145	if (src_known && dst_known) {
10146		__mark_reg32_known(dst_reg, var32_off.value);
10147		return;
10148	}
10149
10150	/* We get our minimum from the var_off, since that's inherently
10151	 * bitwise.  Our maximum is the minimum of the operands' maxima.
10152	 */
10153	dst_reg->u32_min_value = var32_off.value;
10154	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
10155	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
10156		/* Lose signed bounds when ANDing negative numbers,
10157		 * ain't nobody got time for that.
10158		 */
10159		dst_reg->s32_min_value = S32_MIN;
10160		dst_reg->s32_max_value = S32_MAX;
10161	} else {
10162		/* ANDing two positives gives a positive, so safe to
10163		 * cast result into s64.
10164		 */
10165		dst_reg->s32_min_value = dst_reg->u32_min_value;
10166		dst_reg->s32_max_value = dst_reg->u32_max_value;
10167	}
10168}
10169
10170static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
10171			       struct bpf_reg_state *src_reg)
10172{
10173	bool src_known = tnum_is_const(src_reg->var_off);
10174	bool dst_known = tnum_is_const(dst_reg->var_off);
10175	s64 smin_val = src_reg->smin_value;
10176	u64 umax_val = src_reg->umax_value;
10177
10178	if (src_known && dst_known) {
10179		__mark_reg_known(dst_reg, dst_reg->var_off.value);
10180		return;
10181	}
10182
10183	/* We get our minimum from the var_off, since that's inherently
10184	 * bitwise.  Our maximum is the minimum of the operands' maxima.
10185	 */
10186	dst_reg->umin_value = dst_reg->var_off.value;
10187	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
10188	if (dst_reg->smin_value < 0 || smin_val < 0) {
10189		/* Lose signed bounds when ANDing negative numbers,
10190		 * ain't nobody got time for that.
10191		 */
10192		dst_reg->smin_value = S64_MIN;
10193		dst_reg->smax_value = S64_MAX;
10194	} else {
10195		/* ANDing two positives gives a positive, so safe to
10196		 * cast result into s64.
10197		 */
10198		dst_reg->smin_value = dst_reg->umin_value;
10199		dst_reg->smax_value = dst_reg->umax_value;
10200	}
10201	/* We may learn something more from the var_off */
10202	__update_reg_bounds(dst_reg);
10203}
10204
10205static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
10206				struct bpf_reg_state *src_reg)
10207{
10208	bool src_known = tnum_subreg_is_const(src_reg->var_off);
10209	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
10210	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
10211	s32 smin_val = src_reg->s32_min_value;
10212	u32 umin_val = src_reg->u32_min_value;
10213
10214	if (src_known && dst_known) {
10215		__mark_reg32_known(dst_reg, var32_off.value);
10216		return;
10217	}
10218
10219	/* We get our maximum from the var_off, and our minimum is the
10220	 * maximum of the operands' minima
10221	 */
10222	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
10223	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
10224	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
10225		/* Lose signed bounds when ORing negative numbers,
10226		 * ain't nobody got time for that.
10227		 */
10228		dst_reg->s32_min_value = S32_MIN;
10229		dst_reg->s32_max_value = S32_MAX;
10230	} else {
10231		/* ORing two positives gives a positive, so safe to
10232		 * cast result into s64.
10233		 */
10234		dst_reg->s32_min_value = dst_reg->u32_min_value;
10235		dst_reg->s32_max_value = dst_reg->u32_max_value;
10236	}
10237}
10238
10239static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
10240			      struct bpf_reg_state *src_reg)
10241{
10242	bool src_known = tnum_is_const(src_reg->var_off);
10243	bool dst_known = tnum_is_const(dst_reg->var_off);
10244	s64 smin_val = src_reg->smin_value;
10245	u64 umin_val = src_reg->umin_value;
10246
10247	if (src_known && dst_known) {
10248		__mark_reg_known(dst_reg, dst_reg->var_off.value);
10249		return;
10250	}
10251
10252	/* We get our maximum from the var_off, and our minimum is the
10253	 * maximum of the operands' minima
10254	 */
10255	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
10256	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
10257	if (dst_reg->smin_value < 0 || smin_val < 0) {
10258		/* Lose signed bounds when ORing negative numbers,
10259		 * ain't nobody got time for that.
10260		 */
10261		dst_reg->smin_value = S64_MIN;
10262		dst_reg->smax_value = S64_MAX;
10263	} else {
10264		/* ORing two positives gives a positive, so safe to
10265		 * cast result into s64.
10266		 */
10267		dst_reg->smin_value = dst_reg->umin_value;
10268		dst_reg->smax_value = dst_reg->umax_value;
10269	}
10270	/* We may learn something more from the var_off */
10271	__update_reg_bounds(dst_reg);
10272}
10273
10274static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
10275				 struct bpf_reg_state *src_reg)
10276{
10277	bool src_known = tnum_subreg_is_const(src_reg->var_off);
10278	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
10279	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
10280	s32 smin_val = src_reg->s32_min_value;
10281
10282	if (src_known && dst_known) {
10283		__mark_reg32_known(dst_reg, var32_off.value);
10284		return;
10285	}
10286
10287	/* We get both minimum and maximum from the var32_off. */
10288	dst_reg->u32_min_value = var32_off.value;
10289	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
10290
10291	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
10292		/* XORing two positive sign numbers gives a positive,
10293		 * so safe to cast u32 result into s32.
10294		 */
10295		dst_reg->s32_min_value = dst_reg->u32_min_value;
10296		dst_reg->s32_max_value = dst_reg->u32_max_value;
10297	} else {
10298		dst_reg->s32_min_value = S32_MIN;
10299		dst_reg->s32_max_value = S32_MAX;
10300	}
10301}
10302
10303static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
10304			       struct bpf_reg_state *src_reg)
10305{
10306	bool src_known = tnum_is_const(src_reg->var_off);
10307	bool dst_known = tnum_is_const(dst_reg->var_off);
10308	s64 smin_val = src_reg->smin_value;
10309
10310	if (src_known && dst_known) {
10311		/* dst_reg->var_off.value has been updated earlier */
10312		__mark_reg_known(dst_reg, dst_reg->var_off.value);
10313		return;
10314	}
10315
10316	/* We get both minimum and maximum from the var_off. */
10317	dst_reg->umin_value = dst_reg->var_off.value;
10318	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
10319
10320	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
10321		/* XORing two positive sign numbers gives a positive,
10322		 * so safe to cast u64 result into s64.
10323		 */
10324		dst_reg->smin_value = dst_reg->umin_value;
10325		dst_reg->smax_value = dst_reg->umax_value;
10326	} else {
10327		dst_reg->smin_value = S64_MIN;
10328		dst_reg->smax_value = S64_MAX;
10329	}
10330
10331	__update_reg_bounds(dst_reg);
10332}
10333
10334static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
10335				   u64 umin_val, u64 umax_val)
10336{
10337	/* We lose all sign bit information (except what we can pick
10338	 * up from var_off)
10339	 */
10340	dst_reg->s32_min_value = S32_MIN;
10341	dst_reg->s32_max_value = S32_MAX;
10342	/* If we might shift our top bit out, then we know nothing */
10343	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
10344		dst_reg->u32_min_value = 0;
10345		dst_reg->u32_max_value = U32_MAX;
10346	} else {
10347		dst_reg->u32_min_value <<= umin_val;
10348		dst_reg->u32_max_value <<= umax_val;
10349	}
10350}
10351
10352static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
10353				 struct bpf_reg_state *src_reg)
10354{
10355	u32 umax_val = src_reg->u32_max_value;
10356	u32 umin_val = src_reg->u32_min_value;
10357	/* u32 alu operation will zext upper bits */
10358	struct tnum subreg = tnum_subreg(dst_reg->var_off);
10359
10360	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
10361	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
10362	/* Not required but being careful mark reg64 bounds as unknown so
10363	 * that we are forced to pick them up from tnum and zext later and
10364	 * if some path skips this step we are still safe.
10365	 */
10366	__mark_reg64_unbounded(dst_reg);
10367	__update_reg32_bounds(dst_reg);
10368}
10369
10370static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
10371				   u64 umin_val, u64 umax_val)
10372{
10373	/* Special case <<32 because it is a common compiler pattern to sign
10374	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
10375	 * positive we know this shift will also be positive so we can track
10376	 * bounds correctly. Otherwise we lose all sign bit information except
10377	 * what we can pick up from var_off. Perhaps we can generalize this
10378	 * later to shifts of any length.
10379	 */
10380	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
10381		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
10382	else
10383		dst_reg->smax_value = S64_MAX;
10384
10385	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
10386		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
10387	else
10388		dst_reg->smin_value = S64_MIN;
10389
10390	/* If we might shift our top bit out, then we know nothing */
10391	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
10392		dst_reg->umin_value = 0;
10393		dst_reg->umax_value = U64_MAX;
10394	} else {
10395		dst_reg->umin_value <<= umin_val;
10396		dst_reg->umax_value <<= umax_val;
10397	}
10398}
10399
10400static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
10401			       struct bpf_reg_state *src_reg)
10402{
10403	u64 umax_val = src_reg->umax_value;
10404	u64 umin_val = src_reg->umin_value;
10405
10406	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
10407	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
10408	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
10409
10410	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
10411	/* We may learn something more from the var_off */
10412	__update_reg_bounds(dst_reg);
10413}
10414
10415static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
10416				 struct bpf_reg_state *src_reg)
10417{
10418	struct tnum subreg = tnum_subreg(dst_reg->var_off);
10419	u32 umax_val = src_reg->u32_max_value;
10420	u32 umin_val = src_reg->u32_min_value;
10421
10422	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
10423	 * be negative, then either:
10424	 * 1) src_reg might be zero, so the sign bit of the result is
10425	 *    unknown, so we lose our signed bounds
10426	 * 2) it's known negative, thus the unsigned bounds capture the
10427	 *    signed bounds
10428	 * 3) the signed bounds cross zero, so they tell us nothing
10429	 *    about the result
10430	 * If the value in dst_reg is known nonnegative, then again the
10431	 * unsigned bounds capture the signed bounds.
10432	 * Thus, in all cases it suffices to blow away our signed bounds
10433	 * and rely on inferring new ones from the unsigned bounds and
10434	 * var_off of the result.
10435	 */
10436	dst_reg->s32_min_value = S32_MIN;
10437	dst_reg->s32_max_value = S32_MAX;
10438
10439	dst_reg->var_off = tnum_rshift(subreg, umin_val);
10440	dst_reg->u32_min_value >>= umax_val;
10441	dst_reg->u32_max_value >>= umin_val;
10442
10443	__mark_reg64_unbounded(dst_reg);
10444	__update_reg32_bounds(dst_reg);
10445}
10446
10447static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
10448			       struct bpf_reg_state *src_reg)
10449{
10450	u64 umax_val = src_reg->umax_value;
10451	u64 umin_val = src_reg->umin_value;
10452
10453	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
10454	 * be negative, then either:
10455	 * 1) src_reg might be zero, so the sign bit of the result is
10456	 *    unknown, so we lose our signed bounds
10457	 * 2) it's known negative, thus the unsigned bounds capture the
10458	 *    signed bounds
10459	 * 3) the signed bounds cross zero, so they tell us nothing
10460	 *    about the result
10461	 * If the value in dst_reg is known nonnegative, then again the
10462	 * unsigned bounds capture the signed bounds.
10463	 * Thus, in all cases it suffices to blow away our signed bounds
10464	 * and rely on inferring new ones from the unsigned bounds and
10465	 * var_off of the result.
10466	 */
10467	dst_reg->smin_value = S64_MIN;
10468	dst_reg->smax_value = S64_MAX;
10469	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
10470	dst_reg->umin_value >>= umax_val;
10471	dst_reg->umax_value >>= umin_val;
10472
10473	/* Its not easy to operate on alu32 bounds here because it depends
10474	 * on bits being shifted in. Take easy way out and mark unbounded
10475	 * so we can recalculate later from tnum.
10476	 */
10477	__mark_reg32_unbounded(dst_reg);
10478	__update_reg_bounds(dst_reg);
10479}
10480
10481static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
10482				  struct bpf_reg_state *src_reg)
10483{
10484	u64 umin_val = src_reg->u32_min_value;
10485
10486	/* Upon reaching here, src_known is true and
10487	 * umax_val is equal to umin_val.
10488	 */
10489	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
10490	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
10491
10492	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
10493
10494	/* blow away the dst_reg umin_value/umax_value and rely on
10495	 * dst_reg var_off to refine the result.
10496	 */
10497	dst_reg->u32_min_value = 0;
10498	dst_reg->u32_max_value = U32_MAX;
10499
10500	__mark_reg64_unbounded(dst_reg);
10501	__update_reg32_bounds(dst_reg);
10502}
10503
10504static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
10505				struct bpf_reg_state *src_reg)
10506{
10507	u64 umin_val = src_reg->umin_value;
10508
10509	/* Upon reaching here, src_known is true and umax_val is equal
10510	 * to umin_val.
10511	 */
10512	dst_reg->smin_value >>= umin_val;
10513	dst_reg->smax_value >>= umin_val;
10514
10515	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
10516
10517	/* blow away the dst_reg umin_value/umax_value and rely on
10518	 * dst_reg var_off to refine the result.
10519	 */
10520	dst_reg->umin_value = 0;
10521	dst_reg->umax_value = U64_MAX;
10522
10523	/* Its not easy to operate on alu32 bounds here because it depends
10524	 * on bits being shifted in from upper 32-bits. Take easy way out
10525	 * and mark unbounded so we can recalculate later from tnum.
10526	 */
10527	__mark_reg32_unbounded(dst_reg);
10528	__update_reg_bounds(dst_reg);
10529}
10530
10531/* WARNING: This function does calculations on 64-bit values, but the actual
10532 * execution may occur on 32-bit values. Therefore, things like bitshifts
10533 * need extra checks in the 32-bit case.
10534 */
10535static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
10536				      struct bpf_insn *insn,
10537				      struct bpf_reg_state *dst_reg,
10538				      struct bpf_reg_state src_reg)
10539{
10540	struct bpf_reg_state *regs = cur_regs(env);
10541	u8 opcode = BPF_OP(insn->code);
10542	bool src_known;
10543	s64 smin_val, smax_val;
10544	u64 umin_val, umax_val;
10545	s32 s32_min_val, s32_max_val;
10546	u32 u32_min_val, u32_max_val;
10547	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
10548	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
10549	int ret;
10550
10551	smin_val = src_reg.smin_value;
10552	smax_val = src_reg.smax_value;
10553	umin_val = src_reg.umin_value;
10554	umax_val = src_reg.umax_value;
10555
10556	s32_min_val = src_reg.s32_min_value;
10557	s32_max_val = src_reg.s32_max_value;
10558	u32_min_val = src_reg.u32_min_value;
10559	u32_max_val = src_reg.u32_max_value;
10560
10561	if (alu32) {
10562		src_known = tnum_subreg_is_const(src_reg.var_off);
10563		if ((src_known &&
10564		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
10565		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
10566			/* Taint dst register if offset had invalid bounds
10567			 * derived from e.g. dead branches.
10568			 */
10569			__mark_reg_unknown(env, dst_reg);
10570			return 0;
10571		}
10572	} else {
10573		src_known = tnum_is_const(src_reg.var_off);
10574		if ((src_known &&
10575		     (smin_val != smax_val || umin_val != umax_val)) ||
10576		    smin_val > smax_val || umin_val > umax_val) {
10577			/* Taint dst register if offset had invalid bounds
10578			 * derived from e.g. dead branches.
10579			 */
10580			__mark_reg_unknown(env, dst_reg);
10581			return 0;
10582		}
10583	}
10584
10585	if (!src_known &&
10586	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
10587		__mark_reg_unknown(env, dst_reg);
10588		return 0;
10589	}
10590
10591	if (sanitize_needed(opcode)) {
10592		ret = sanitize_val_alu(env, insn);
10593		if (ret < 0)
10594			return sanitize_err(env, insn, ret, NULL, NULL);
10595	}
10596
10597	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
10598	 * There are two classes of instructions: The first class we track both
10599	 * alu32 and alu64 sign/unsigned bounds independently this provides the
10600	 * greatest amount of precision when alu operations are mixed with jmp32
10601	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
10602	 * and BPF_OR. This is possible because these ops have fairly easy to
10603	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
10604	 * See alu32 verifier tests for examples. The second class of
10605	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
10606	 * with regards to tracking sign/unsigned bounds because the bits may
10607	 * cross subreg boundaries in the alu64 case. When this happens we mark
10608	 * the reg unbounded in the subreg bound space and use the resulting
10609	 * tnum to calculate an approximation of the sign/unsigned bounds.
10610	 */
10611	switch (opcode) {
10612	case BPF_ADD:
10613		scalar32_min_max_add(dst_reg, &src_reg);
10614		scalar_min_max_add(dst_reg, &src_reg);
10615		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
10616		break;
10617	case BPF_SUB:
10618		scalar32_min_max_sub(dst_reg, &src_reg);
10619		scalar_min_max_sub(dst_reg, &src_reg);
10620		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
10621		break;
10622	case BPF_MUL:
10623		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
10624		scalar32_min_max_mul(dst_reg, &src_reg);
10625		scalar_min_max_mul(dst_reg, &src_reg);
10626		break;
10627	case BPF_AND:
10628		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
10629		scalar32_min_max_and(dst_reg, &src_reg);
10630		scalar_min_max_and(dst_reg, &src_reg);
10631		break;
10632	case BPF_OR:
10633		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
10634		scalar32_min_max_or(dst_reg, &src_reg);
10635		scalar_min_max_or(dst_reg, &src_reg);
10636		break;
10637	case BPF_XOR:
10638		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
10639		scalar32_min_max_xor(dst_reg, &src_reg);
10640		scalar_min_max_xor(dst_reg, &src_reg);
10641		break;
10642	case BPF_LSH:
10643		if (umax_val >= insn_bitness) {
10644			/* Shifts greater than 31 or 63 are undefined.
10645			 * This includes shifts by a negative number.
10646			 */
10647			mark_reg_unknown(env, regs, insn->dst_reg);
10648			break;
10649		}
10650		if (alu32)
10651			scalar32_min_max_lsh(dst_reg, &src_reg);
10652		else
10653			scalar_min_max_lsh(dst_reg, &src_reg);
10654		break;
10655	case BPF_RSH:
10656		if (umax_val >= insn_bitness) {
10657			/* Shifts greater than 31 or 63 are undefined.
10658			 * This includes shifts by a negative number.
10659			 */
10660			mark_reg_unknown(env, regs, insn->dst_reg);
10661			break;
10662		}
10663		if (alu32)
10664			scalar32_min_max_rsh(dst_reg, &src_reg);
10665		else
10666			scalar_min_max_rsh(dst_reg, &src_reg);
10667		break;
10668	case BPF_ARSH:
10669		if (umax_val >= insn_bitness) {
10670			/* Shifts greater than 31 or 63 are undefined.
10671			 * This includes shifts by a negative number.
10672			 */
10673			mark_reg_unknown(env, regs, insn->dst_reg);
10674			break;
10675		}
10676		if (alu32)
10677			scalar32_min_max_arsh(dst_reg, &src_reg);
10678		else
10679			scalar_min_max_arsh(dst_reg, &src_reg);
10680		break;
10681	default:
10682		mark_reg_unknown(env, regs, insn->dst_reg);
10683		break;
10684	}
10685
10686	/* ALU32 ops are zero extended into 64bit register */
10687	if (alu32)
10688		zext_32_to_64(dst_reg);
10689	reg_bounds_sync(dst_reg);
10690	return 0;
10691}
10692
10693/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
10694 * and var_off.
10695 */
10696static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
10697				   struct bpf_insn *insn)
10698{
10699	struct bpf_verifier_state *vstate = env->cur_state;
10700	struct bpf_func_state *state = vstate->frame[vstate->curframe];
10701	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
10702	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
10703	u8 opcode = BPF_OP(insn->code);
10704	int err;
10705
10706	dst_reg = &regs[insn->dst_reg];
10707	src_reg = NULL;
10708	if (dst_reg->type != SCALAR_VALUE)
10709		ptr_reg = dst_reg;
10710	else
10711		/* Make sure ID is cleared otherwise dst_reg min/max could be
10712		 * incorrectly propagated into other registers by find_equal_scalars()
10713		 */
10714		dst_reg->id = 0;
10715	if (BPF_SRC(insn->code) == BPF_X) {
10716		src_reg = &regs[insn->src_reg];
10717		if (src_reg->type != SCALAR_VALUE) {
10718			if (dst_reg->type != SCALAR_VALUE) {
10719				/* Combining two pointers by any ALU op yields
10720				 * an arbitrary scalar. Disallow all math except
10721				 * pointer subtraction
10722				 */
10723				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
10724					mark_reg_unknown(env, regs, insn->dst_reg);
10725					return 0;
10726				}
10727				verbose(env, "R%d pointer %s pointer prohibited\n",
10728					insn->dst_reg,
10729					bpf_alu_string[opcode >> 4]);
10730				return -EACCES;
10731			} else {
10732				/* scalar += pointer
10733				 * This is legal, but we have to reverse our
10734				 * src/dest handling in computing the range
10735				 */
10736				err = mark_chain_precision(env, insn->dst_reg);
10737				if (err)
10738					return err;
10739				return adjust_ptr_min_max_vals(env, insn,
10740							       src_reg, dst_reg);
10741			}
10742		} else if (ptr_reg) {
10743			/* pointer += scalar */
10744			err = mark_chain_precision(env, insn->src_reg);
10745			if (err)
10746				return err;
10747			return adjust_ptr_min_max_vals(env, insn,
10748						       dst_reg, src_reg);
10749		} else if (dst_reg->precise) {
10750			/* if dst_reg is precise, src_reg should be precise as well */
10751			err = mark_chain_precision(env, insn->src_reg);
10752			if (err)
10753				return err;
10754		}
10755	} else {
10756		/* Pretend the src is a reg with a known value, since we only
10757		 * need to be able to read from this state.
10758		 */
10759		off_reg.type = SCALAR_VALUE;
10760		__mark_reg_known(&off_reg, insn->imm);
10761		src_reg = &off_reg;
10762		if (ptr_reg) /* pointer += K */
10763			return adjust_ptr_min_max_vals(env, insn,
10764						       ptr_reg, src_reg);
10765	}
10766
10767	/* Got here implies adding two SCALAR_VALUEs */
10768	if (WARN_ON_ONCE(ptr_reg)) {
10769		print_verifier_state(env, state, true);
10770		verbose(env, "verifier internal error: unexpected ptr_reg\n");
10771		return -EINVAL;
10772	}
10773	if (WARN_ON(!src_reg)) {
10774		print_verifier_state(env, state, true);
10775		verbose(env, "verifier internal error: no src_reg\n");
10776		return -EINVAL;
10777	}
10778	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
10779}
10780
10781/* check validity of 32-bit and 64-bit arithmetic operations */
10782static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
10783{
10784	struct bpf_reg_state *regs = cur_regs(env);
10785	u8 opcode = BPF_OP(insn->code);
10786	int err;
10787
10788	if (opcode == BPF_END || opcode == BPF_NEG) {
10789		if (opcode == BPF_NEG) {
10790			if (BPF_SRC(insn->code) != BPF_K ||
10791			    insn->src_reg != BPF_REG_0 ||
10792			    insn->off != 0 || insn->imm != 0) {
10793				verbose(env, "BPF_NEG uses reserved fields\n");
10794				return -EINVAL;
10795			}
10796		} else {
10797			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
10798			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
10799			    BPF_CLASS(insn->code) == BPF_ALU64) {
10800				verbose(env, "BPF_END uses reserved fields\n");
10801				return -EINVAL;
10802			}
10803		}
10804
10805		/* check src operand */
10806		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
10807		if (err)
10808			return err;
10809
10810		if (is_pointer_value(env, insn->dst_reg)) {
10811			verbose(env, "R%d pointer arithmetic prohibited\n",
10812				insn->dst_reg);
10813			return -EACCES;
10814		}
10815
10816		/* check dest operand */
10817		err = check_reg_arg(env, insn->dst_reg, DST_OP);
10818		if (err)
10819			return err;
10820
10821	} else if (opcode == BPF_MOV) {
10822
10823		if (BPF_SRC(insn->code) == BPF_X) {
10824			if (insn->imm != 0 || insn->off != 0) {
10825				verbose(env, "BPF_MOV uses reserved fields\n");
10826				return -EINVAL;
10827			}
10828
10829			/* check src operand */
10830			err = check_reg_arg(env, insn->src_reg, SRC_OP);
10831			if (err)
10832				return err;
10833		} else {
10834			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
10835				verbose(env, "BPF_MOV uses reserved fields\n");
10836				return -EINVAL;
10837			}
10838		}
10839
10840		/* check dest operand, mark as required later */
10841		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
10842		if (err)
10843			return err;
10844
10845		if (BPF_SRC(insn->code) == BPF_X) {
10846			struct bpf_reg_state *src_reg = regs + insn->src_reg;
10847			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
10848
10849			if (BPF_CLASS(insn->code) == BPF_ALU64) {
10850				/* case: R1 = R2
10851				 * copy register state to dest reg
10852				 */
10853				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
10854					/* Assign src and dst registers the same ID
10855					 * that will be used by find_equal_scalars()
10856					 * to propagate min/max range.
10857					 */
10858					src_reg->id = ++env->id_gen;
10859				copy_register_state(dst_reg, src_reg);
10860				dst_reg->live |= REG_LIVE_WRITTEN;
10861				dst_reg->subreg_def = DEF_NOT_SUBREG;
10862			} else {
10863				/* R1 = (u32) R2 */
10864				if (is_pointer_value(env, insn->src_reg)) {
10865					verbose(env,
10866						"R%d partial copy of pointer\n",
10867						insn->src_reg);
10868					return -EACCES;
10869				} else if (src_reg->type == SCALAR_VALUE) {
10870					copy_register_state(dst_reg, src_reg);
10871					/* Make sure ID is cleared otherwise
10872					 * dst_reg min/max could be incorrectly
10873					 * propagated into src_reg by find_equal_scalars()
10874					 */
10875					dst_reg->id = 0;
10876					dst_reg->live |= REG_LIVE_WRITTEN;
10877					dst_reg->subreg_def = env->insn_idx + 1;
10878				} else {
10879					mark_reg_unknown(env, regs,
10880							 insn->dst_reg);
10881				}
10882				zext_32_to_64(dst_reg);
10883				reg_bounds_sync(dst_reg);
10884			}
10885		} else {
10886			/* case: R = imm
10887			 * remember the value we stored into this reg
10888			 */
10889			/* clear any state __mark_reg_known doesn't set */
10890			mark_reg_unknown(env, regs, insn->dst_reg);
10891			regs[insn->dst_reg].type = SCALAR_VALUE;
10892			if (BPF_CLASS(insn->code) == BPF_ALU64) {
10893				__mark_reg_known(regs + insn->dst_reg,
10894						 insn->imm);
10895			} else {
10896				__mark_reg_known(regs + insn->dst_reg,
10897						 (u32)insn->imm);
10898			}
10899		}
10900
10901	} else if (opcode > BPF_END) {
10902		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
10903		return -EINVAL;
10904
10905	} else {	/* all other ALU ops: and, sub, xor, add, ... */
10906
10907		if (BPF_SRC(insn->code) == BPF_X) {
10908			if (insn->imm != 0 || insn->off != 0) {
10909				verbose(env, "BPF_ALU uses reserved fields\n");
10910				return -EINVAL;
10911			}
10912			/* check src1 operand */
10913			err = check_reg_arg(env, insn->src_reg, SRC_OP);
10914			if (err)
10915				return err;
10916		} else {
10917			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
10918				verbose(env, "BPF_ALU uses reserved fields\n");
10919				return -EINVAL;
10920			}
10921		}
10922
10923		/* check src2 operand */
10924		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
10925		if (err)
10926			return err;
10927
10928		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
10929		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
10930			verbose(env, "div by zero\n");
10931			return -EINVAL;
10932		}
10933
10934		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
10935		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
10936			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
10937
10938			if (insn->imm < 0 || insn->imm >= size) {
10939				verbose(env, "invalid shift %d\n", insn->imm);
10940				return -EINVAL;
10941			}
10942		}
10943
10944		/* check dest operand */
10945		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
10946		if (err)
10947			return err;
10948
10949		return adjust_reg_min_max_vals(env, insn);
10950	}
10951
10952	return 0;
10953}
10954
10955static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
10956				   struct bpf_reg_state *dst_reg,
10957				   enum bpf_reg_type type,
10958				   bool range_right_open)
10959{
10960	struct bpf_func_state *state;
10961	struct bpf_reg_state *reg;
10962	int new_range;
10963
10964	if (dst_reg->off < 0 ||
10965	    (dst_reg->off == 0 && range_right_open))
10966		/* This doesn't give us any range */
10967		return;
10968
10969	if (dst_reg->umax_value > MAX_PACKET_OFF ||
10970	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
10971		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
10972		 * than pkt_end, but that's because it's also less than pkt.
10973		 */
10974		return;
10975
10976	new_range = dst_reg->off;
10977	if (range_right_open)
10978		new_range++;
10979
10980	/* Examples for register markings:
10981	 *
10982	 * pkt_data in dst register:
10983	 *
10984	 *   r2 = r3;
10985	 *   r2 += 8;
10986	 *   if (r2 > pkt_end) goto <handle exception>
10987	 *   <access okay>
10988	 *
10989	 *   r2 = r3;
10990	 *   r2 += 8;
10991	 *   if (r2 < pkt_end) goto <access okay>
10992	 *   <handle exception>
10993	 *
10994	 *   Where:
10995	 *     r2 == dst_reg, pkt_end == src_reg
10996	 *     r2=pkt(id=n,off=8,r=0)
10997	 *     r3=pkt(id=n,off=0,r=0)
10998	 *
10999	 * pkt_data in src register:
11000	 *
11001	 *   r2 = r3;
11002	 *   r2 += 8;
11003	 *   if (pkt_end >= r2) goto <access okay>
11004	 *   <handle exception>
11005	 *
11006	 *   r2 = r3;
11007	 *   r2 += 8;
11008	 *   if (pkt_end <= r2) goto <handle exception>
11009	 *   <access okay>
11010	 *
11011	 *   Where:
11012	 *     pkt_end == dst_reg, r2 == src_reg
11013	 *     r2=pkt(id=n,off=8,r=0)
11014	 *     r3=pkt(id=n,off=0,r=0)
11015	 *
11016	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
11017	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
11018	 * and [r3, r3 + 8-1) respectively is safe to access depending on
11019	 * the check.
11020	 */
11021
11022	/* If our ids match, then we must have the same max_value.  And we
11023	 * don't care about the other reg's fixed offset, since if it's too big
11024	 * the range won't allow anything.
11025	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
11026	 */
11027	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
11028		if (reg->type == type && reg->id == dst_reg->id)
11029			/* keep the maximum range already checked */
11030			reg->range = max(reg->range, new_range);
11031	}));
11032}
11033
11034static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
11035{
11036	struct tnum subreg = tnum_subreg(reg->var_off);
11037	s32 sval = (s32)val;
11038
11039	switch (opcode) {
11040	case BPF_JEQ:
11041		if (tnum_is_const(subreg))
11042			return !!tnum_equals_const(subreg, val);
11043		break;
11044	case BPF_JNE:
11045		if (tnum_is_const(subreg))
11046			return !tnum_equals_const(subreg, val);
11047		break;
11048	case BPF_JSET:
11049		if ((~subreg.mask & subreg.value) & val)
11050			return 1;
11051		if (!((subreg.mask | subreg.value) & val))
11052			return 0;
11053		break;
11054	case BPF_JGT:
11055		if (reg->u32_min_value > val)
11056			return 1;
11057		else if (reg->u32_max_value <= val)
11058			return 0;
11059		break;
11060	case BPF_JSGT:
11061		if (reg->s32_min_value > sval)
11062			return 1;
11063		else if (reg->s32_max_value <= sval)
11064			return 0;
11065		break;
11066	case BPF_JLT:
11067		if (reg->u32_max_value < val)
11068			return 1;
11069		else if (reg->u32_min_value >= val)
11070			return 0;
11071		break;
11072	case BPF_JSLT:
11073		if (reg->s32_max_value < sval)
11074			return 1;
11075		else if (reg->s32_min_value >= sval)
11076			return 0;
11077		break;
11078	case BPF_JGE:
11079		if (reg->u32_min_value >= val)
11080			return 1;
11081		else if (reg->u32_max_value < val)
11082			return 0;
11083		break;
11084	case BPF_JSGE:
11085		if (reg->s32_min_value >= sval)
11086			return 1;
11087		else if (reg->s32_max_value < sval)
11088			return 0;
11089		break;
11090	case BPF_JLE:
11091		if (reg->u32_max_value <= val)
11092			return 1;
11093		else if (reg->u32_min_value > val)
11094			return 0;
11095		break;
11096	case BPF_JSLE:
11097		if (reg->s32_max_value <= sval)
11098			return 1;
11099		else if (reg->s32_min_value > sval)
11100			return 0;
11101		break;
11102	}
11103
11104	return -1;
11105}
11106
11107
11108static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
11109{
11110	s64 sval = (s64)val;
11111
11112	switch (opcode) {
11113	case BPF_JEQ:
11114		if (tnum_is_const(reg->var_off))
11115			return !!tnum_equals_const(reg->var_off, val);
11116		break;
11117	case BPF_JNE:
11118		if (tnum_is_const(reg->var_off))
11119			return !tnum_equals_const(reg->var_off, val);
11120		break;
11121	case BPF_JSET:
11122		if ((~reg->var_off.mask & reg->var_off.value) & val)
11123			return 1;
11124		if (!((reg->var_off.mask | reg->var_off.value) & val))
11125			return 0;
11126		break;
11127	case BPF_JGT:
11128		if (reg->umin_value > val)
11129			return 1;
11130		else if (reg->umax_value <= val)
11131			return 0;
11132		break;
11133	case BPF_JSGT:
11134		if (reg->smin_value > sval)
11135			return 1;
11136		else if (reg->smax_value <= sval)
11137			return 0;
11138		break;
11139	case BPF_JLT:
11140		if (reg->umax_value < val)
11141			return 1;
11142		else if (reg->umin_value >= val)
11143			return 0;
11144		break;
11145	case BPF_JSLT:
11146		if (reg->smax_value < sval)
11147			return 1;
11148		else if (reg->smin_value >= sval)
11149			return 0;
11150		break;
11151	case BPF_JGE:
11152		if (reg->umin_value >= val)
11153			return 1;
11154		else if (reg->umax_value < val)
11155			return 0;
11156		break;
11157	case BPF_JSGE:
11158		if (reg->smin_value >= sval)
11159			return 1;
11160		else if (reg->smax_value < sval)
11161			return 0;
11162		break;
11163	case BPF_JLE:
11164		if (reg->umax_value <= val)
11165			return 1;
11166		else if (reg->umin_value > val)
11167			return 0;
11168		break;
11169	case BPF_JSLE:
11170		if (reg->smax_value <= sval)
11171			return 1;
11172		else if (reg->smin_value > sval)
11173			return 0;
11174		break;
11175	}
11176
11177	return -1;
11178}
11179
11180/* compute branch direction of the expression "if (reg opcode val) goto target;"
11181 * and return:
11182 *  1 - branch will be taken and "goto target" will be executed
11183 *  0 - branch will not be taken and fall-through to next insn
11184 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
11185 *      range [0,10]
11186 */
11187static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
11188			   bool is_jmp32)
11189{
11190	if (__is_pointer_value(false, reg)) {
11191		if (!reg_type_not_null(reg->type))
11192			return -1;
11193
11194		/* If pointer is valid tests against zero will fail so we can
11195		 * use this to direct branch taken.
11196		 */
11197		if (val != 0)
11198			return -1;
11199
11200		switch (opcode) {
11201		case BPF_JEQ:
11202			return 0;
11203		case BPF_JNE:
11204			return 1;
11205		default:
11206			return -1;
11207		}
11208	}
11209
11210	if (is_jmp32)
11211		return is_branch32_taken(reg, val, opcode);
11212	return is_branch64_taken(reg, val, opcode);
11213}
11214
11215static int flip_opcode(u32 opcode)
11216{
11217	/* How can we transform "a <op> b" into "b <op> a"? */
11218	static const u8 opcode_flip[16] = {
11219		/* these stay the same */
11220		[BPF_JEQ  >> 4] = BPF_JEQ,
11221		[BPF_JNE  >> 4] = BPF_JNE,
11222		[BPF_JSET >> 4] = BPF_JSET,
11223		/* these swap "lesser" and "greater" (L and G in the opcodes) */
11224		[BPF_JGE  >> 4] = BPF_JLE,
11225		[BPF_JGT  >> 4] = BPF_JLT,
11226		[BPF_JLE  >> 4] = BPF_JGE,
11227		[BPF_JLT  >> 4] = BPF_JGT,
11228		[BPF_JSGE >> 4] = BPF_JSLE,
11229		[BPF_JSGT >> 4] = BPF_JSLT,
11230		[BPF_JSLE >> 4] = BPF_JSGE,
11231		[BPF_JSLT >> 4] = BPF_JSGT
11232	};
11233	return opcode_flip[opcode >> 4];
11234}
11235
11236static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
11237				   struct bpf_reg_state *src_reg,
11238				   u8 opcode)
11239{
11240	struct bpf_reg_state *pkt;
11241
11242	if (src_reg->type == PTR_TO_PACKET_END) {
11243		pkt = dst_reg;
11244	} else if (dst_reg->type == PTR_TO_PACKET_END) {
11245		pkt = src_reg;
11246		opcode = flip_opcode(opcode);
11247	} else {
11248		return -1;
11249	}
11250
11251	if (pkt->range >= 0)
11252		return -1;
11253
11254	switch (opcode) {
11255	case BPF_JLE:
11256		/* pkt <= pkt_end */
11257		fallthrough;
11258	case BPF_JGT:
11259		/* pkt > pkt_end */
11260		if (pkt->range == BEYOND_PKT_END)
11261			/* pkt has at last one extra byte beyond pkt_end */
11262			return opcode == BPF_JGT;
11263		break;
11264	case BPF_JLT:
11265		/* pkt < pkt_end */
11266		fallthrough;
11267	case BPF_JGE:
11268		/* pkt >= pkt_end */
11269		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
11270			return opcode == BPF_JGE;
11271		break;
11272	}
11273	return -1;
11274}
11275
11276/* Adjusts the register min/max values in the case that the dst_reg is the
11277 * variable register that we are working on, and src_reg is a constant or we're
11278 * simply doing a BPF_K check.
11279 * In JEQ/JNE cases we also adjust the var_off values.
11280 */
11281static void reg_set_min_max(struct bpf_reg_state *true_reg,
11282			    struct bpf_reg_state *false_reg,
11283			    u64 val, u32 val32,
11284			    u8 opcode, bool is_jmp32)
11285{
11286	struct tnum false_32off = tnum_subreg(false_reg->var_off);
11287	struct tnum false_64off = false_reg->var_off;
11288	struct tnum true_32off = tnum_subreg(true_reg->var_off);
11289	struct tnum true_64off = true_reg->var_off;
11290	s64 sval = (s64)val;
11291	s32 sval32 = (s32)val32;
11292
11293	/* If the dst_reg is a pointer, we can't learn anything about its
11294	 * variable offset from the compare (unless src_reg were a pointer into
11295	 * the same object, but we don't bother with that.
11296	 * Since false_reg and true_reg have the same type by construction, we
11297	 * only need to check one of them for pointerness.
11298	 */
11299	if (__is_pointer_value(false, false_reg))
11300		return;
11301
11302	switch (opcode) {
11303	/* JEQ/JNE comparison doesn't change the register equivalence.
11304	 *
11305	 * r1 = r2;
11306	 * if (r1 == 42) goto label;
11307	 * ...
11308	 * label: // here both r1 and r2 are known to be 42.
11309	 *
11310	 * Hence when marking register as known preserve it's ID.
11311	 */
11312	case BPF_JEQ:
11313		if (is_jmp32) {
11314			__mark_reg32_known(true_reg, val32);
11315			true_32off = tnum_subreg(true_reg->var_off);
11316		} else {
11317			___mark_reg_known(true_reg, val);
11318			true_64off = true_reg->var_off;
11319		}
11320		break;
11321	case BPF_JNE:
11322		if (is_jmp32) {
11323			__mark_reg32_known(false_reg, val32);
11324			false_32off = tnum_subreg(false_reg->var_off);
11325		} else {
11326			___mark_reg_known(false_reg, val);
11327			false_64off = false_reg->var_off;
11328		}
11329		break;
11330	case BPF_JSET:
11331		if (is_jmp32) {
11332			false_32off = tnum_and(false_32off, tnum_const(~val32));
11333			if (is_power_of_2(val32))
11334				true_32off = tnum_or(true_32off,
11335						     tnum_const(val32));
11336		} else {
11337			false_64off = tnum_and(false_64off, tnum_const(~val));
11338			if (is_power_of_2(val))
11339				true_64off = tnum_or(true_64off,
11340						     tnum_const(val));
11341		}
11342		break;
11343	case BPF_JGE:
11344	case BPF_JGT:
11345	{
11346		if (is_jmp32) {
11347			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
11348			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
11349
11350			false_reg->u32_max_value = min(false_reg->u32_max_value,
11351						       false_umax);
11352			true_reg->u32_min_value = max(true_reg->u32_min_value,
11353						      true_umin);
11354		} else {
11355			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
11356			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
11357
11358			false_reg->umax_value = min(false_reg->umax_value, false_umax);
11359			true_reg->umin_value = max(true_reg->umin_value, true_umin);
11360		}
11361		break;
11362	}
11363	case BPF_JSGE:
11364	case BPF_JSGT:
11365	{
11366		if (is_jmp32) {
11367			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
11368			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
11369
11370			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
11371			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
11372		} else {
11373			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
11374			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
11375
11376			false_reg->smax_value = min(false_reg->smax_value, false_smax);
11377			true_reg->smin_value = max(true_reg->smin_value, true_smin);
11378		}
11379		break;
11380	}
11381	case BPF_JLE:
11382	case BPF_JLT:
11383	{
11384		if (is_jmp32) {
11385			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
11386			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
11387
11388			false_reg->u32_min_value = max(false_reg->u32_min_value,
11389						       false_umin);
11390			true_reg->u32_max_value = min(true_reg->u32_max_value,
11391						      true_umax);
11392		} else {
11393			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
11394			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
11395
11396			false_reg->umin_value = max(false_reg->umin_value, false_umin);
11397			true_reg->umax_value = min(true_reg->umax_value, true_umax);
11398		}
11399		break;
11400	}
11401	case BPF_JSLE:
11402	case BPF_JSLT:
11403	{
11404		if (is_jmp32) {
11405			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
11406			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
11407
11408			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
11409			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
11410		} else {
11411			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
11412			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
11413
11414			false_reg->smin_value = max(false_reg->smin_value, false_smin);
11415			true_reg->smax_value = min(true_reg->smax_value, true_smax);
11416		}
11417		break;
11418	}
11419	default:
11420		return;
11421	}
11422
11423	if (is_jmp32) {
11424		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
11425					     tnum_subreg(false_32off));
11426		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
11427					    tnum_subreg(true_32off));
11428		__reg_combine_32_into_64(false_reg);
11429		__reg_combine_32_into_64(true_reg);
11430	} else {
11431		false_reg->var_off = false_64off;
11432		true_reg->var_off = true_64off;
11433		__reg_combine_64_into_32(false_reg);
11434		__reg_combine_64_into_32(true_reg);
11435	}
11436}
11437
11438/* Same as above, but for the case that dst_reg holds a constant and src_reg is
11439 * the variable reg.
11440 */
11441static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
11442				struct bpf_reg_state *false_reg,
11443				u64 val, u32 val32,
11444				u8 opcode, bool is_jmp32)
11445{
11446	opcode = flip_opcode(opcode);
11447	/* This uses zero as "not present in table"; luckily the zero opcode,
11448	 * BPF_JA, can't get here.
11449	 */
11450	if (opcode)
11451		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
11452}
11453
11454/* Regs are known to be equal, so intersect their min/max/var_off */
11455static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
11456				  struct bpf_reg_state *dst_reg)
11457{
11458	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
11459							dst_reg->umin_value);
11460	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
11461							dst_reg->umax_value);
11462	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
11463							dst_reg->smin_value);
11464	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
11465							dst_reg->smax_value);
11466	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
11467							     dst_reg->var_off);
11468	reg_bounds_sync(src_reg);
11469	reg_bounds_sync(dst_reg);
11470}
11471
11472static void reg_combine_min_max(struct bpf_reg_state *true_src,
11473				struct bpf_reg_state *true_dst,
11474				struct bpf_reg_state *false_src,
11475				struct bpf_reg_state *false_dst,
11476				u8 opcode)
11477{
11478	switch (opcode) {
11479	case BPF_JEQ:
11480		__reg_combine_min_max(true_src, true_dst);
11481		break;
11482	case BPF_JNE:
11483		__reg_combine_min_max(false_src, false_dst);
11484		break;
11485	}
11486}
11487
11488static void mark_ptr_or_null_reg(struct bpf_func_state *state,
11489				 struct bpf_reg_state *reg, u32 id,
11490				 bool is_null)
11491{
11492	if (type_may_be_null(reg->type) && reg->id == id &&
11493	    (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
11494		/* Old offset (both fixed and variable parts) should have been
11495		 * known-zero, because we don't allow pointer arithmetic on
11496		 * pointers that might be NULL. If we see this happening, don't
11497		 * convert the register.
11498		 *
11499		 * But in some cases, some helpers that return local kptrs
11500		 * advance offset for the returned pointer. In those cases, it
11501		 * is fine to expect to see reg->off.
11502		 */
11503		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
11504			return;
11505		if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL) && WARN_ON_ONCE(reg->off))
11506			return;
11507		if (is_null) {
11508			reg->type = SCALAR_VALUE;
11509			/* We don't need id and ref_obj_id from this point
11510			 * onwards anymore, thus we should better reset it,
11511			 * so that state pruning has chances to take effect.
11512			 */
11513			reg->id = 0;
11514			reg->ref_obj_id = 0;
11515
11516			return;
11517		}
11518
11519		mark_ptr_not_null_reg(reg);
11520
11521		if (!reg_may_point_to_spin_lock(reg)) {
11522			/* For not-NULL ptr, reg->ref_obj_id will be reset
11523			 * in release_reference().
11524			 *
11525			 * reg->id is still used by spin_lock ptr. Other
11526			 * than spin_lock ptr type, reg->id can be reset.
11527			 */
11528			reg->id = 0;
11529		}
11530	}
11531}
11532
11533/* The logic is similar to find_good_pkt_pointers(), both could eventually
11534 * be folded together at some point.
11535 */
11536static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
11537				  bool is_null)
11538{
11539	struct bpf_func_state *state = vstate->frame[vstate->curframe];
11540	struct bpf_reg_state *regs = state->regs, *reg;
11541	u32 ref_obj_id = regs[regno].ref_obj_id;
11542	u32 id = regs[regno].id;
11543
11544	if (ref_obj_id && ref_obj_id == id && is_null)
11545		/* regs[regno] is in the " == NULL" branch.
11546		 * No one could have freed the reference state before
11547		 * doing the NULL check.
11548		 */
11549		WARN_ON_ONCE(release_reference_state(state, id));
11550
11551	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
11552		mark_ptr_or_null_reg(state, reg, id, is_null);
11553	}));
11554}
11555
11556static bool try_match_pkt_pointers(const struct bpf_insn *insn,
11557				   struct bpf_reg_state *dst_reg,
11558				   struct bpf_reg_state *src_reg,
11559				   struct bpf_verifier_state *this_branch,
11560				   struct bpf_verifier_state *other_branch)
11561{
11562	if (BPF_SRC(insn->code) != BPF_X)
11563		return false;
11564
11565	/* Pointers are always 64-bit. */
11566	if (BPF_CLASS(insn->code) == BPF_JMP32)
11567		return false;
11568
11569	switch (BPF_OP(insn->code)) {
11570	case BPF_JGT:
11571		if ((dst_reg->type == PTR_TO_PACKET &&
11572		     src_reg->type == PTR_TO_PACKET_END) ||
11573		    (dst_reg->type == PTR_TO_PACKET_META &&
11574		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11575			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
11576			find_good_pkt_pointers(this_branch, dst_reg,
11577					       dst_reg->type, false);
11578			mark_pkt_end(other_branch, insn->dst_reg, true);
11579		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
11580			    src_reg->type == PTR_TO_PACKET) ||
11581			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11582			    src_reg->type == PTR_TO_PACKET_META)) {
11583			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
11584			find_good_pkt_pointers(other_branch, src_reg,
11585					       src_reg->type, true);
11586			mark_pkt_end(this_branch, insn->src_reg, false);
11587		} else {
11588			return false;
11589		}
11590		break;
11591	case BPF_JLT:
11592		if ((dst_reg->type == PTR_TO_PACKET &&
11593		     src_reg->type == PTR_TO_PACKET_END) ||
11594		    (dst_reg->type == PTR_TO_PACKET_META &&
11595		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11596			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
11597			find_good_pkt_pointers(other_branch, dst_reg,
11598					       dst_reg->type, true);
11599			mark_pkt_end(this_branch, insn->dst_reg, false);
11600		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
11601			    src_reg->type == PTR_TO_PACKET) ||
11602			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11603			    src_reg->type == PTR_TO_PACKET_META)) {
11604			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
11605			find_good_pkt_pointers(this_branch, src_reg,
11606					       src_reg->type, false);
11607			mark_pkt_end(other_branch, insn->src_reg, true);
11608		} else {
11609			return false;
11610		}
11611		break;
11612	case BPF_JGE:
11613		if ((dst_reg->type == PTR_TO_PACKET &&
11614		     src_reg->type == PTR_TO_PACKET_END) ||
11615		    (dst_reg->type == PTR_TO_PACKET_META &&
11616		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11617			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
11618			find_good_pkt_pointers(this_branch, dst_reg,
11619					       dst_reg->type, true);
11620			mark_pkt_end(other_branch, insn->dst_reg, false);
11621		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
11622			    src_reg->type == PTR_TO_PACKET) ||
11623			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11624			    src_reg->type == PTR_TO_PACKET_META)) {
11625			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
11626			find_good_pkt_pointers(other_branch, src_reg,
11627					       src_reg->type, false);
11628			mark_pkt_end(this_branch, insn->src_reg, true);
11629		} else {
11630			return false;
11631		}
11632		break;
11633	case BPF_JLE:
11634		if ((dst_reg->type == PTR_TO_PACKET &&
11635		     src_reg->type == PTR_TO_PACKET_END) ||
11636		    (dst_reg->type == PTR_TO_PACKET_META &&
11637		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
11638			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
11639			find_good_pkt_pointers(other_branch, dst_reg,
11640					       dst_reg->type, false);
11641			mark_pkt_end(this_branch, insn->dst_reg, true);
11642		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
11643			    src_reg->type == PTR_TO_PACKET) ||
11644			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
11645			    src_reg->type == PTR_TO_PACKET_META)) {
11646			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
11647			find_good_pkt_pointers(this_branch, src_reg,
11648					       src_reg->type, true);
11649			mark_pkt_end(other_branch, insn->src_reg, false);
11650		} else {
11651			return false;
11652		}
11653		break;
11654	default:
11655		return false;
11656	}
11657
11658	return true;
11659}
11660
11661static void find_equal_scalars(struct bpf_verifier_state *vstate,
11662			       struct bpf_reg_state *known_reg)
11663{
11664	struct bpf_func_state *state;
11665	struct bpf_reg_state *reg;
11666
11667	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
11668		if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
11669			copy_register_state(reg, known_reg);
11670	}));
11671}
11672
11673static int check_cond_jmp_op(struct bpf_verifier_env *env,
11674			     struct bpf_insn *insn, int *insn_idx)
11675{
11676	struct bpf_verifier_state *this_branch = env->cur_state;
11677	struct bpf_verifier_state *other_branch;
11678	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
11679	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
11680	struct bpf_reg_state *eq_branch_regs;
11681	u8 opcode = BPF_OP(insn->code);
11682	bool is_jmp32;
11683	int pred = -1;
11684	int err;
11685
11686	/* Only conditional jumps are expected to reach here. */
11687	if (opcode == BPF_JA || opcode > BPF_JSLE) {
11688		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
11689		return -EINVAL;
11690	}
11691
11692	if (BPF_SRC(insn->code) == BPF_X) {
11693		if (insn->imm != 0) {
11694			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
11695			return -EINVAL;
11696		}
11697
11698		/* check src1 operand */
11699		err = check_reg_arg(env, insn->src_reg, SRC_OP);
11700		if (err)
11701			return err;
11702
11703		if (is_pointer_value(env, insn->src_reg)) {
11704			verbose(env, "R%d pointer comparison prohibited\n",
11705				insn->src_reg);
11706			return -EACCES;
11707		}
11708		src_reg = &regs[insn->src_reg];
11709	} else {
11710		if (insn->src_reg != BPF_REG_0) {
11711			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
11712			return -EINVAL;
11713		}
11714	}
11715
11716	/* check src2 operand */
11717	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
11718	if (err)
11719		return err;
11720
11721	dst_reg = &regs[insn->dst_reg];
11722	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
11723
11724	if (BPF_SRC(insn->code) == BPF_K) {
11725		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
11726	} else if (src_reg->type == SCALAR_VALUE &&
11727		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
11728		pred = is_branch_taken(dst_reg,
11729				       tnum_subreg(src_reg->var_off).value,
11730				       opcode,
11731				       is_jmp32);
11732	} else if (src_reg->type == SCALAR_VALUE &&
11733		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
11734		pred = is_branch_taken(dst_reg,
11735				       src_reg->var_off.value,
11736				       opcode,
11737				       is_jmp32);
11738	} else if (reg_is_pkt_pointer_any(dst_reg) &&
11739		   reg_is_pkt_pointer_any(src_reg) &&
11740		   !is_jmp32) {
11741		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
11742	}
11743
11744	if (pred >= 0) {
11745		/* If we get here with a dst_reg pointer type it is because
11746		 * above is_branch_taken() special cased the 0 comparison.
11747		 */
11748		if (!__is_pointer_value(false, dst_reg))
11749			err = mark_chain_precision(env, insn->dst_reg);
11750		if (BPF_SRC(insn->code) == BPF_X && !err &&
11751		    !__is_pointer_value(false, src_reg))
11752			err = mark_chain_precision(env, insn->src_reg);
11753		if (err)
11754			return err;
11755	}
11756
11757	if (pred == 1) {
11758		/* Only follow the goto, ignore fall-through. If needed, push
11759		 * the fall-through branch for simulation under speculative
11760		 * execution.
11761		 */
11762		if (!env->bypass_spec_v1 &&
11763		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
11764					       *insn_idx))
11765			return -EFAULT;
11766		*insn_idx += insn->off;
11767		return 0;
11768	} else if (pred == 0) {
11769		/* Only follow the fall-through branch, since that's where the
11770		 * program will go. If needed, push the goto branch for
11771		 * simulation under speculative execution.
11772		 */
11773		if (!env->bypass_spec_v1 &&
11774		    !sanitize_speculative_path(env, insn,
11775					       *insn_idx + insn->off + 1,
11776					       *insn_idx))
11777			return -EFAULT;
11778		return 0;
11779	}
11780
11781	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
11782				  false);
11783	if (!other_branch)
11784		return -EFAULT;
11785	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
11786
11787	/* detect if we are comparing against a constant value so we can adjust
11788	 * our min/max values for our dst register.
11789	 * this is only legit if both are scalars (or pointers to the same
11790	 * object, I suppose, see the PTR_MAYBE_NULL related if block below),
11791	 * because otherwise the different base pointers mean the offsets aren't
11792	 * comparable.
11793	 */
11794	if (BPF_SRC(insn->code) == BPF_X) {
11795		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
11796
11797		if (dst_reg->type == SCALAR_VALUE &&
11798		    src_reg->type == SCALAR_VALUE) {
11799			if (tnum_is_const(src_reg->var_off) ||
11800			    (is_jmp32 &&
11801			     tnum_is_const(tnum_subreg(src_reg->var_off))))
11802				reg_set_min_max(&other_branch_regs[insn->dst_reg],
11803						dst_reg,
11804						src_reg->var_off.value,
11805						tnum_subreg(src_reg->var_off).value,
11806						opcode, is_jmp32);
11807			else if (tnum_is_const(dst_reg->var_off) ||
11808				 (is_jmp32 &&
11809				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
11810				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
11811						    src_reg,
11812						    dst_reg->var_off.value,
11813						    tnum_subreg(dst_reg->var_off).value,
11814						    opcode, is_jmp32);
11815			else if (!is_jmp32 &&
11816				 (opcode == BPF_JEQ || opcode == BPF_JNE))
11817				/* Comparing for equality, we can combine knowledge */
11818				reg_combine_min_max(&other_branch_regs[insn->src_reg],
11819						    &other_branch_regs[insn->dst_reg],
11820						    src_reg, dst_reg, opcode);
11821			if (src_reg->id &&
11822			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
11823				find_equal_scalars(this_branch, src_reg);
11824				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
11825			}
11826
11827		}
11828	} else if (dst_reg->type == SCALAR_VALUE) {
11829		reg_set_min_max(&other_branch_regs[insn->dst_reg],
11830					dst_reg, insn->imm, (u32)insn->imm,
11831					opcode, is_jmp32);
11832	}
11833
11834	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
11835	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
11836		find_equal_scalars(this_branch, dst_reg);
11837		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
11838	}
11839
11840	/* if one pointer register is compared to another pointer
11841	 * register check if PTR_MAYBE_NULL could be lifted.
11842	 * E.g. register A - maybe null
11843	 *      register B - not null
11844	 * for JNE A, B, ... - A is not null in the false branch;
11845	 * for JEQ A, B, ... - A is not null in the true branch.
11846	 *
11847	 * Since PTR_TO_BTF_ID points to a kernel struct that does
11848	 * not need to be null checked by the BPF program, i.e.,
11849	 * could be null even without PTR_MAYBE_NULL marking, so
11850	 * only propagate nullness when neither reg is that type.
11851	 */
11852	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
11853	    __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
11854	    type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
11855	    base_type(src_reg->type) != PTR_TO_BTF_ID &&
11856	    base_type(dst_reg->type) != PTR_TO_BTF_ID) {
11857		eq_branch_regs = NULL;
11858		switch (opcode) {
11859		case BPF_JEQ:
11860			eq_branch_regs = other_branch_regs;
11861			break;
11862		case BPF_JNE:
11863			eq_branch_regs = regs;
11864			break;
11865		default:
11866			/* do nothing */
11867			break;
11868		}
11869		if (eq_branch_regs) {
11870			if (type_may_be_null(src_reg->type))
11871				mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
11872			else
11873				mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
11874		}
11875	}
11876
11877	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
11878	 * NOTE: these optimizations below are related with pointer comparison
11879	 *       which will never be JMP32.
11880	 */
11881	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
11882	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
11883	    type_may_be_null(dst_reg->type)) {
11884		/* Mark all identical registers in each branch as either
11885		 * safe or unknown depending R == 0 or R != 0 conditional.
11886		 */
11887		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
11888				      opcode == BPF_JNE);
11889		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
11890				      opcode == BPF_JEQ);
11891	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
11892					   this_branch, other_branch) &&
11893		   is_pointer_value(env, insn->dst_reg)) {
11894		verbose(env, "R%d pointer comparison prohibited\n",
11895			insn->dst_reg);
11896		return -EACCES;
11897	}
11898	if (env->log.level & BPF_LOG_LEVEL)
11899		print_insn_state(env, this_branch->frame[this_branch->curframe]);
11900	return 0;
11901}
11902
11903/* verify BPF_LD_IMM64 instruction */
11904static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
11905{
11906	struct bpf_insn_aux_data *aux = cur_aux(env);
11907	struct bpf_reg_state *regs = cur_regs(env);
11908	struct bpf_reg_state *dst_reg;
11909	struct bpf_map *map;
11910	int err;
11911
11912	if (BPF_SIZE(insn->code) != BPF_DW) {
11913		verbose(env, "invalid BPF_LD_IMM insn\n");
11914		return -EINVAL;
11915	}
11916	if (insn->off != 0) {
11917		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
11918		return -EINVAL;
11919	}
11920
11921	err = check_reg_arg(env, insn->dst_reg, DST_OP);
11922	if (err)
11923		return err;
11924
11925	dst_reg = &regs[insn->dst_reg];
11926	if (insn->src_reg == 0) {
11927		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
11928
11929		dst_reg->type = SCALAR_VALUE;
11930		__mark_reg_known(&regs[insn->dst_reg], imm);
11931		return 0;
11932	}
11933
11934	/* All special src_reg cases are listed below. From this point onwards
11935	 * we either succeed and assign a corresponding dst_reg->type after
11936	 * zeroing the offset, or fail and reject the program.
11937	 */
11938	mark_reg_known_zero(env, regs, insn->dst_reg);
11939
11940	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
11941		dst_reg->type = aux->btf_var.reg_type;
11942		switch (base_type(dst_reg->type)) {
11943		case PTR_TO_MEM:
11944			dst_reg->mem_size = aux->btf_var.mem_size;
11945			break;
11946		case PTR_TO_BTF_ID:
11947			dst_reg->btf = aux->btf_var.btf;
11948			dst_reg->btf_id = aux->btf_var.btf_id;
11949			break;
11950		default:
11951			verbose(env, "bpf verifier is misconfigured\n");
11952			return -EFAULT;
11953		}
11954		return 0;
11955	}
11956
11957	if (insn->src_reg == BPF_PSEUDO_FUNC) {
11958		struct bpf_prog_aux *aux = env->prog->aux;
11959		u32 subprogno = find_subprog(env,
11960					     env->insn_idx + insn->imm + 1);
11961
11962		if (!aux->func_info) {
11963			verbose(env, "missing btf func_info\n");
11964			return -EINVAL;
11965		}
11966		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
11967			verbose(env, "callback function not static\n");
11968			return -EINVAL;
11969		}
11970
11971		dst_reg->type = PTR_TO_FUNC;
11972		dst_reg->subprogno = subprogno;
11973		return 0;
11974	}
11975
11976	map = env->used_maps[aux->map_index];
11977	dst_reg->map_ptr = map;
11978
11979	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
11980	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
11981		dst_reg->type = PTR_TO_MAP_VALUE;
11982		dst_reg->off = aux->map_off;
11983		WARN_ON_ONCE(map->max_entries != 1);
11984		/* We want reg->id to be same (0) as map_value is not distinct */
11985	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
11986		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
11987		dst_reg->type = CONST_PTR_TO_MAP;
11988	} else {
11989		verbose(env, "bpf verifier is misconfigured\n");
11990		return -EINVAL;
11991	}
11992
11993	return 0;
11994}
11995
11996static bool may_access_skb(enum bpf_prog_type type)
11997{
11998	switch (type) {
11999	case BPF_PROG_TYPE_SOCKET_FILTER:
12000	case BPF_PROG_TYPE_SCHED_CLS:
12001	case BPF_PROG_TYPE_SCHED_ACT:
12002		return true;
12003	default:
12004		return false;
12005	}
12006}
12007
12008/* verify safety of LD_ABS|LD_IND instructions:
12009 * - they can only appear in the programs where ctx == skb
12010 * - since they are wrappers of function calls, they scratch R1-R5 registers,
12011 *   preserve R6-R9, and store return value into R0
12012 *
12013 * Implicit input:
12014 *   ctx == skb == R6 == CTX
12015 *
12016 * Explicit input:
12017 *   SRC == any register
12018 *   IMM == 32-bit immediate
12019 *
12020 * Output:
12021 *   R0 - 8/16/32-bit skb data converted to cpu endianness
12022 */
12023static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
12024{
12025	struct bpf_reg_state *regs = cur_regs(env);
12026	static const int ctx_reg = BPF_REG_6;
12027	u8 mode = BPF_MODE(insn->code);
12028	int i, err;
12029
12030	if (!may_access_skb(resolve_prog_type(env->prog))) {
12031		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
12032		return -EINVAL;
12033	}
12034
12035	if (!env->ops->gen_ld_abs) {
12036		verbose(env, "bpf verifier is misconfigured\n");
12037		return -EINVAL;
12038	}
12039
12040	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
12041	    BPF_SIZE(insn->code) == BPF_DW ||
12042	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
12043		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
12044		return -EINVAL;
12045	}
12046
12047	/* check whether implicit source operand (register R6) is readable */
12048	err = check_reg_arg(env, ctx_reg, SRC_OP);
12049	if (err)
12050		return err;
12051
12052	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
12053	 * gen_ld_abs() may terminate the program at runtime, leading to
12054	 * reference leak.
12055	 */
12056	err = check_reference_leak(env);
12057	if (err) {
12058		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
12059		return err;
12060	}
12061
12062	if (env->cur_state->active_lock.ptr) {
12063		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
12064		return -EINVAL;
12065	}
12066
12067	if (env->cur_state->active_rcu_lock) {
12068		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
12069		return -EINVAL;
12070	}
12071
12072	if (regs[ctx_reg].type != PTR_TO_CTX) {
12073		verbose(env,
12074			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
12075		return -EINVAL;
12076	}
12077
12078	if (mode == BPF_IND) {
12079		/* check explicit source operand */
12080		err = check_reg_arg(env, insn->src_reg, SRC_OP);
12081		if (err)
12082			return err;
12083	}
12084
12085	err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
12086	if (err < 0)
12087		return err;
12088
12089	/* reset caller saved regs to unreadable */
12090	for (i = 0; i < CALLER_SAVED_REGS; i++) {
12091		mark_reg_not_init(env, regs, caller_saved[i]);
12092		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
12093	}
12094
12095	/* mark destination R0 register as readable, since it contains
12096	 * the value fetched from the packet.
12097	 * Already marked as written above.
12098	 */
12099	mark_reg_unknown(env, regs, BPF_REG_0);
12100	/* ld_abs load up to 32-bit skb data. */
12101	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
12102	return 0;
12103}
12104
12105static int check_return_code(struct bpf_verifier_env *env)
12106{
12107	struct tnum enforce_attach_type_range = tnum_unknown;
12108	const struct bpf_prog *prog = env->prog;
12109	struct bpf_reg_state *reg;
12110	struct tnum range = tnum_range(0, 1);
12111	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
12112	int err;
12113	struct bpf_func_state *frame = env->cur_state->frame[0];
12114	const bool is_subprog = frame->subprogno;
12115
12116	/* LSM and struct_ops func-ptr's return type could be "void" */
12117	if (!is_subprog) {
12118		switch (prog_type) {
12119		case BPF_PROG_TYPE_LSM:
12120			if (prog->expected_attach_type == BPF_LSM_CGROUP)
12121				/* See below, can be 0 or 0-1 depending on hook. */
12122				break;
12123			fallthrough;
12124		case BPF_PROG_TYPE_STRUCT_OPS:
12125			if (!prog->aux->attach_func_proto->type)
12126				return 0;
12127			break;
12128		default:
12129			break;
12130		}
12131	}
12132
12133	/* eBPF calling convention is such that R0 is used
12134	 * to return the value from eBPF program.
12135	 * Make sure that it's readable at this time
12136	 * of bpf_exit, which means that program wrote
12137	 * something into it earlier
12138	 */
12139	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
12140	if (err)
12141		return err;
12142
12143	if (is_pointer_value(env, BPF_REG_0)) {
12144		verbose(env, "R0 leaks addr as return value\n");
12145		return -EACCES;
12146	}
12147
12148	reg = cur_regs(env) + BPF_REG_0;
12149
12150	if (frame->in_async_callback_fn) {
12151		/* enforce return zero from async callbacks like timer */
12152		if (reg->type != SCALAR_VALUE) {
12153			verbose(env, "In async callback the register R0 is not a known value (%s)\n",
12154				reg_type_str(env, reg->type));
12155			return -EINVAL;
12156		}
12157
12158		if (!tnum_in(tnum_const(0), reg->var_off)) {
12159			verbose_invalid_scalar(env, reg, &range, "async callback", "R0");
12160			return -EINVAL;
12161		}
12162		return 0;
12163	}
12164
12165	if (is_subprog) {
12166		if (reg->type != SCALAR_VALUE) {
12167			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
12168				reg_type_str(env, reg->type));
12169			return -EINVAL;
12170		}
12171		return 0;
12172	}
12173
12174	switch (prog_type) {
12175	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
12176		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
12177		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
12178		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
12179		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
12180		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
12181		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
12182			range = tnum_range(1, 1);
12183		if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
12184		    env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
12185			range = tnum_range(0, 3);
12186		break;
12187	case BPF_PROG_TYPE_CGROUP_SKB:
12188		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
12189			range = tnum_range(0, 3);
12190			enforce_attach_type_range = tnum_range(2, 3);
12191		}
12192		break;
12193	case BPF_PROG_TYPE_CGROUP_SOCK:
12194	case BPF_PROG_TYPE_SOCK_OPS:
12195	case BPF_PROG_TYPE_CGROUP_DEVICE:
12196	case BPF_PROG_TYPE_CGROUP_SYSCTL:
12197	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
12198		break;
12199	case BPF_PROG_TYPE_RAW_TRACEPOINT:
12200		if (!env->prog->aux->attach_btf_id)
12201			return 0;
12202		range = tnum_const(0);
12203		break;
12204	case BPF_PROG_TYPE_TRACING:
12205		switch (env->prog->expected_attach_type) {
12206		case BPF_TRACE_FENTRY:
12207		case BPF_TRACE_FEXIT:
12208			range = tnum_const(0);
12209			break;
12210		case BPF_TRACE_RAW_TP:
12211		case BPF_MODIFY_RETURN:
12212			return 0;
12213		case BPF_TRACE_ITER:
12214			break;
12215		default:
12216			return -ENOTSUPP;
12217		}
12218		break;
12219	case BPF_PROG_TYPE_SK_LOOKUP:
12220		range = tnum_range(SK_DROP, SK_PASS);
12221		break;
12222
12223	case BPF_PROG_TYPE_LSM:
12224		if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
12225			/* Regular BPF_PROG_TYPE_LSM programs can return
12226			 * any value.
12227			 */
12228			return 0;
12229		}
12230		if (!env->prog->aux->attach_func_proto->type) {
12231			/* Make sure programs that attach to void
12232			 * hooks don't try to modify return value.
12233			 */
12234			range = tnum_range(1, 1);
12235		}
12236		break;
12237
12238	case BPF_PROG_TYPE_EXT:
12239		/* freplace program can return anything as its return value
12240		 * depends on the to-be-replaced kernel func or bpf program.
12241		 */
12242	default:
12243		return 0;
12244	}
12245
12246	if (reg->type != SCALAR_VALUE) {
12247		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
12248			reg_type_str(env, reg->type));
12249		return -EINVAL;
12250	}
12251
12252	if (!tnum_in(range, reg->var_off)) {
12253		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
12254		if (prog->expected_attach_type == BPF_LSM_CGROUP &&
12255		    prog_type == BPF_PROG_TYPE_LSM &&
12256		    !prog->aux->attach_func_proto->type)
12257			verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
12258		return -EINVAL;
12259	}
12260
12261	if (!tnum_is_unknown(enforce_attach_type_range) &&
12262	    tnum_in(enforce_attach_type_range, reg->var_off))
12263		env->prog->enforce_expected_attach_type = 1;
12264	return 0;
12265}
12266
12267/* non-recursive DFS pseudo code
12268 * 1  procedure DFS-iterative(G,v):
12269 * 2      label v as discovered
12270 * 3      let S be a stack
12271 * 4      S.push(v)
12272 * 5      while S is not empty
12273 * 6            t <- S.peek()
12274 * 7            if t is what we're looking for:
12275 * 8                return t
12276 * 9            for all edges e in G.adjacentEdges(t) do
12277 * 10               if edge e is already labelled
12278 * 11                   continue with the next edge
12279 * 12               w <- G.adjacentVertex(t,e)
12280 * 13               if vertex w is not discovered and not explored
12281 * 14                   label e as tree-edge
12282 * 15                   label w as discovered
12283 * 16                   S.push(w)
12284 * 17                   continue at 5
12285 * 18               else if vertex w is discovered
12286 * 19                   label e as back-edge
12287 * 20               else
12288 * 21                   // vertex w is explored
12289 * 22                   label e as forward- or cross-edge
12290 * 23           label t as explored
12291 * 24           S.pop()
12292 *
12293 * convention:
12294 * 0x10 - discovered
12295 * 0x11 - discovered and fall-through edge labelled
12296 * 0x12 - discovered and fall-through and branch edges labelled
12297 * 0x20 - explored
12298 */
12299
12300enum {
12301	DISCOVERED = 0x10,
12302	EXPLORED = 0x20,
12303	FALLTHROUGH = 1,
12304	BRANCH = 2,
12305};
12306
12307static u32 state_htab_size(struct bpf_verifier_env *env)
12308{
12309	return env->prog->len;
12310}
12311
12312static struct bpf_verifier_state_list **explored_state(
12313					struct bpf_verifier_env *env,
12314					int idx)
12315{
12316	struct bpf_verifier_state *cur = env->cur_state;
12317	struct bpf_func_state *state = cur->frame[cur->curframe];
12318
12319	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
12320}
12321
12322static void mark_prune_point(struct bpf_verifier_env *env, int idx)
12323{
12324	env->insn_aux_data[idx].prune_point = true;
12325}
12326
12327static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
12328{
12329	return env->insn_aux_data[insn_idx].prune_point;
12330}
12331
12332enum {
12333	DONE_EXPLORING = 0,
12334	KEEP_EXPLORING = 1,
12335};
12336
12337/* t, w, e - match pseudo-code above:
12338 * t - index of current instruction
12339 * w - next instruction
12340 * e - edge
12341 */
12342static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
12343		     bool loop_ok)
12344{
12345	int *insn_stack = env->cfg.insn_stack;
12346	int *insn_state = env->cfg.insn_state;
12347
12348	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
12349		return DONE_EXPLORING;
12350
12351	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
12352		return DONE_EXPLORING;
12353
12354	if (w < 0 || w >= env->prog->len) {
12355		verbose_linfo(env, t, "%d: ", t);
12356		verbose(env, "jump out of range from insn %d to %d\n", t, w);
12357		return -EINVAL;
12358	}
12359
12360	if (e == BRANCH) {
12361		/* mark branch target for state pruning */
12362		mark_prune_point(env, w);
12363		mark_jmp_point(env, w);
12364	}
12365
12366	if (insn_state[w] == 0) {
12367		/* tree-edge */
12368		insn_state[t] = DISCOVERED | e;
12369		insn_state[w] = DISCOVERED;
12370		if (env->cfg.cur_stack >= env->prog->len)
12371			return -E2BIG;
12372		insn_stack[env->cfg.cur_stack++] = w;
12373		return KEEP_EXPLORING;
12374	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
12375		if (loop_ok && env->bpf_capable)
12376			return DONE_EXPLORING;
12377		verbose_linfo(env, t, "%d: ", t);
12378		verbose_linfo(env, w, "%d: ", w);
12379		verbose(env, "back-edge from insn %d to %d\n", t, w);
12380		return -EINVAL;
12381	} else if (insn_state[w] == EXPLORED) {
12382		/* forward- or cross-edge */
12383		insn_state[t] = DISCOVERED | e;
12384	} else {
12385		verbose(env, "insn state internal bug\n");
12386		return -EFAULT;
12387	}
12388	return DONE_EXPLORING;
12389}
12390
12391static int visit_func_call_insn(int t, struct bpf_insn *insns,
12392				struct bpf_verifier_env *env,
12393				bool visit_callee)
12394{
12395	int ret;
12396
12397	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
12398	if (ret)
12399		return ret;
12400
12401	mark_prune_point(env, t + 1);
12402	/* when we exit from subprog, we need to record non-linear history */
12403	mark_jmp_point(env, t + 1);
12404
12405	if (visit_callee) {
12406		mark_prune_point(env, t);
12407		ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env,
12408				/* It's ok to allow recursion from CFG point of
12409				 * view. __check_func_call() will do the actual
12410				 * check.
12411				 */
12412				bpf_pseudo_func(insns + t));
12413	}
12414	return ret;
12415}
12416
12417/* Visits the instruction at index t and returns one of the following:
12418 *  < 0 - an error occurred
12419 *  DONE_EXPLORING - the instruction was fully explored
12420 *  KEEP_EXPLORING - there is still work to be done before it is fully explored
12421 */
12422static int visit_insn(int t, struct bpf_verifier_env *env)
12423{
12424	struct bpf_insn *insns = env->prog->insnsi;
12425	int ret;
12426
12427	if (bpf_pseudo_func(insns + t))
12428		return visit_func_call_insn(t, insns, env, true);
12429
12430	/* All non-branch instructions have a single fall-through edge. */
12431	if (BPF_CLASS(insns[t].code) != BPF_JMP &&
12432	    BPF_CLASS(insns[t].code) != BPF_JMP32)
12433		return push_insn(t, t + 1, FALLTHROUGH, env, false);
12434
12435	switch (BPF_OP(insns[t].code)) {
12436	case BPF_EXIT:
12437		return DONE_EXPLORING;
12438
12439	case BPF_CALL:
12440		if (insns[t].imm == BPF_FUNC_timer_set_callback)
12441			/* Mark this call insn as a prune point to trigger
12442			 * is_state_visited() check before call itself is
12443			 * processed by __check_func_call(). Otherwise new
12444			 * async state will be pushed for further exploration.
12445			 */
12446			mark_prune_point(env, t);
12447		return visit_func_call_insn(t, insns, env,
12448					    insns[t].src_reg == BPF_PSEUDO_CALL);
12449
12450	case BPF_JA:
12451		if (BPF_SRC(insns[t].code) != BPF_K)
12452			return -EINVAL;
12453
12454		/* unconditional jump with single edge */
12455		ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env,
12456				true);
12457		if (ret)
12458			return ret;
12459
12460		mark_prune_point(env, t + insns[t].off + 1);
12461		mark_jmp_point(env, t + insns[t].off + 1);
12462
12463		return ret;
12464
12465	default:
12466		/* conditional jump with two edges */
12467		mark_prune_point(env, t);
12468
12469		ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
12470		if (ret)
12471			return ret;
12472
12473		return push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
12474	}
12475}
12476
12477/* non-recursive depth-first-search to detect loops in BPF program
12478 * loop == back-edge in directed graph
12479 */
12480static int check_cfg(struct bpf_verifier_env *env)
12481{
12482	int insn_cnt = env->prog->len;
12483	int *insn_stack, *insn_state;
12484	int ret = 0;
12485	int i;
12486
12487	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
12488	if (!insn_state)
12489		return -ENOMEM;
12490
12491	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
12492	if (!insn_stack) {
12493		kvfree(insn_state);
12494		return -ENOMEM;
12495	}
12496
12497	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
12498	insn_stack[0] = 0; /* 0 is the first instruction */
12499	env->cfg.cur_stack = 1;
12500
12501	while (env->cfg.cur_stack > 0) {
12502		int t = insn_stack[env->cfg.cur_stack - 1];
12503
12504		ret = visit_insn(t, env);
12505		switch (ret) {
12506		case DONE_EXPLORING:
12507			insn_state[t] = EXPLORED;
12508			env->cfg.cur_stack--;
12509			break;
12510		case KEEP_EXPLORING:
12511			break;
12512		default:
12513			if (ret > 0) {
12514				verbose(env, "visit_insn internal bug\n");
12515				ret = -EFAULT;
12516			}
12517			goto err_free;
12518		}
12519	}
12520
12521	if (env->cfg.cur_stack < 0) {
12522		verbose(env, "pop stack internal bug\n");
12523		ret = -EFAULT;
12524		goto err_free;
12525	}
12526
12527	for (i = 0; i < insn_cnt; i++) {
12528		if (insn_state[i] != EXPLORED) {
12529			verbose(env, "unreachable insn %d\n", i);
12530			ret = -EINVAL;
12531			goto err_free;
12532		}
12533	}
12534	ret = 0; /* cfg looks good */
12535
12536err_free:
12537	kvfree(insn_state);
12538	kvfree(insn_stack);
12539	env->cfg.insn_state = env->cfg.insn_stack = NULL;
12540	return ret;
12541}
12542
12543static int check_abnormal_return(struct bpf_verifier_env *env)
12544{
12545	int i;
12546
12547	for (i = 1; i < env->subprog_cnt; i++) {
12548		if (env->subprog_info[i].has_ld_abs) {
12549			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
12550			return -EINVAL;
12551		}
12552		if (env->subprog_info[i].has_tail_call) {
12553			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
12554			return -EINVAL;
12555		}
12556	}
12557	return 0;
12558}
12559
12560/* The minimum supported BTF func info size */
12561#define MIN_BPF_FUNCINFO_SIZE	8
12562#define MAX_FUNCINFO_REC_SIZE	252
12563
12564static int check_btf_func(struct bpf_verifier_env *env,
12565			  const union bpf_attr *attr,
12566			  bpfptr_t uattr)
12567{
12568	const struct btf_type *type, *func_proto, *ret_type;
12569	u32 i, nfuncs, urec_size, min_size;
12570	u32 krec_size = sizeof(struct bpf_func_info);
12571	struct bpf_func_info *krecord;
12572	struct bpf_func_info_aux *info_aux = NULL;
12573	struct bpf_prog *prog;
12574	const struct btf *btf;
12575	bpfptr_t urecord;
12576	u32 prev_offset = 0;
12577	bool scalar_return;
12578	int ret = -ENOMEM;
12579
12580	nfuncs = attr->func_info_cnt;
12581	if (!nfuncs) {
12582		if (check_abnormal_return(env))
12583			return -EINVAL;
12584		return 0;
12585	}
12586
12587	if (nfuncs != env->subprog_cnt) {
12588		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
12589		return -EINVAL;
12590	}
12591
12592	urec_size = attr->func_info_rec_size;
12593	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
12594	    urec_size > MAX_FUNCINFO_REC_SIZE ||
12595	    urec_size % sizeof(u32)) {
12596		verbose(env, "invalid func info rec size %u\n", urec_size);
12597		return -EINVAL;
12598	}
12599
12600	prog = env->prog;
12601	btf = prog->aux->btf;
12602
12603	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
12604	min_size = min_t(u32, krec_size, urec_size);
12605
12606	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
12607	if (!krecord)
12608		return -ENOMEM;
12609	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
12610	if (!info_aux)
12611		goto err_free;
12612
12613	for (i = 0; i < nfuncs; i++) {
12614		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
12615		if (ret) {
12616			if (ret == -E2BIG) {
12617				verbose(env, "nonzero tailing record in func info");
12618				/* set the size kernel expects so loader can zero
12619				 * out the rest of the record.
12620				 */
12621				if (copy_to_bpfptr_offset(uattr,
12622							  offsetof(union bpf_attr, func_info_rec_size),
12623							  &min_size, sizeof(min_size)))
12624					ret = -EFAULT;
12625			}
12626			goto err_free;
12627		}
12628
12629		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
12630			ret = -EFAULT;
12631			goto err_free;
12632		}
12633
12634		/* check insn_off */
12635		ret = -EINVAL;
12636		if (i == 0) {
12637			if (krecord[i].insn_off) {
12638				verbose(env,
12639					"nonzero insn_off %u for the first func info record",
12640					krecord[i].insn_off);
12641				goto err_free;
12642			}
12643		} else if (krecord[i].insn_off <= prev_offset) {
12644			verbose(env,
12645				"same or smaller insn offset (%u) than previous func info record (%u)",
12646				krecord[i].insn_off, prev_offset);
12647			goto err_free;
12648		}
12649
12650		if (env->subprog_info[i].start != krecord[i].insn_off) {
12651			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
12652			goto err_free;
12653		}
12654
12655		/* check type_id */
12656		type = btf_type_by_id(btf, krecord[i].type_id);
12657		if (!type || !btf_type_is_func(type)) {
12658			verbose(env, "invalid type id %d in func info",
12659				krecord[i].type_id);
12660			goto err_free;
12661		}
12662		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
12663
12664		func_proto = btf_type_by_id(btf, type->type);
12665		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
12666			/* btf_func_check() already verified it during BTF load */
12667			goto err_free;
12668		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
12669		scalar_return =
12670			btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
12671		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
12672			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
12673			goto err_free;
12674		}
12675		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
12676			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
12677			goto err_free;
12678		}
12679
12680		prev_offset = krecord[i].insn_off;
12681		bpfptr_add(&urecord, urec_size);
12682	}
12683
12684	prog->aux->func_info = krecord;
12685	prog->aux->func_info_cnt = nfuncs;
12686	prog->aux->func_info_aux = info_aux;
12687	return 0;
12688
12689err_free:
12690	kvfree(krecord);
12691	kfree(info_aux);
12692	return ret;
12693}
12694
12695static void adjust_btf_func(struct bpf_verifier_env *env)
12696{
12697	struct bpf_prog_aux *aux = env->prog->aux;
12698	int i;
12699
12700	if (!aux->func_info)
12701		return;
12702
12703	for (i = 0; i < env->subprog_cnt; i++)
12704		aux->func_info[i].insn_off = env->subprog_info[i].start;
12705}
12706
12707#define MIN_BPF_LINEINFO_SIZE	offsetofend(struct bpf_line_info, line_col)
12708#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
12709
12710static int check_btf_line(struct bpf_verifier_env *env,
12711			  const union bpf_attr *attr,
12712			  bpfptr_t uattr)
12713{
12714	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
12715	struct bpf_subprog_info *sub;
12716	struct bpf_line_info *linfo;
12717	struct bpf_prog *prog;
12718	const struct btf *btf;
12719	bpfptr_t ulinfo;
12720	int err;
12721
12722	nr_linfo = attr->line_info_cnt;
12723	if (!nr_linfo)
12724		return 0;
12725	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
12726		return -EINVAL;
12727
12728	rec_size = attr->line_info_rec_size;
12729	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
12730	    rec_size > MAX_LINEINFO_REC_SIZE ||
12731	    rec_size & (sizeof(u32) - 1))
12732		return -EINVAL;
12733
12734	/* Need to zero it in case the userspace may
12735	 * pass in a smaller bpf_line_info object.
12736	 */
12737	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
12738			 GFP_KERNEL | __GFP_NOWARN);
12739	if (!linfo)
12740		return -ENOMEM;
12741
12742	prog = env->prog;
12743	btf = prog->aux->btf;
12744
12745	s = 0;
12746	sub = env->subprog_info;
12747	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
12748	expected_size = sizeof(struct bpf_line_info);
12749	ncopy = min_t(u32, expected_size, rec_size);
12750	for (i = 0; i < nr_linfo; i++) {
12751		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
12752		if (err) {
12753			if (err == -E2BIG) {
12754				verbose(env, "nonzero tailing record in line_info");
12755				if (copy_to_bpfptr_offset(uattr,
12756							  offsetof(union bpf_attr, line_info_rec_size),
12757							  &expected_size, sizeof(expected_size)))
12758					err = -EFAULT;
12759			}
12760			goto err_free;
12761		}
12762
12763		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
12764			err = -EFAULT;
12765			goto err_free;
12766		}
12767
12768		/*
12769		 * Check insn_off to ensure
12770		 * 1) strictly increasing AND
12771		 * 2) bounded by prog->len
12772		 *
12773		 * The linfo[0].insn_off == 0 check logically falls into
12774		 * the later "missing bpf_line_info for func..." case
12775		 * because the first linfo[0].insn_off must be the
12776		 * first sub also and the first sub must have
12777		 * subprog_info[0].start == 0.
12778		 */
12779		if ((i && linfo[i].insn_off <= prev_offset) ||
12780		    linfo[i].insn_off >= prog->len) {
12781			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
12782				i, linfo[i].insn_off, prev_offset,
12783				prog->len);
12784			err = -EINVAL;
12785			goto err_free;
12786		}
12787
12788		if (!prog->insnsi[linfo[i].insn_off].code) {
12789			verbose(env,
12790				"Invalid insn code at line_info[%u].insn_off\n",
12791				i);
12792			err = -EINVAL;
12793			goto err_free;
12794		}
12795
12796		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
12797		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
12798			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
12799			err = -EINVAL;
12800			goto err_free;
12801		}
12802
12803		if (s != env->subprog_cnt) {
12804			if (linfo[i].insn_off == sub[s].start) {
12805				sub[s].linfo_idx = i;
12806				s++;
12807			} else if (sub[s].start < linfo[i].insn_off) {
12808				verbose(env, "missing bpf_line_info for func#%u\n", s);
12809				err = -EINVAL;
12810				goto err_free;
12811			}
12812		}
12813
12814		prev_offset = linfo[i].insn_off;
12815		bpfptr_add(&ulinfo, rec_size);
12816	}
12817
12818	if (s != env->subprog_cnt) {
12819		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
12820			env->subprog_cnt - s, s);
12821		err = -EINVAL;
12822		goto err_free;
12823	}
12824
12825	prog->aux->linfo = linfo;
12826	prog->aux->nr_linfo = nr_linfo;
12827
12828	return 0;
12829
12830err_free:
12831	kvfree(linfo);
12832	return err;
12833}
12834
12835#define MIN_CORE_RELO_SIZE	sizeof(struct bpf_core_relo)
12836#define MAX_CORE_RELO_SIZE	MAX_FUNCINFO_REC_SIZE
12837
12838static int check_core_relo(struct bpf_verifier_env *env,
12839			   const union bpf_attr *attr,
12840			   bpfptr_t uattr)
12841{
12842	u32 i, nr_core_relo, ncopy, expected_size, rec_size;
12843	struct bpf_core_relo core_relo = {};
12844	struct bpf_prog *prog = env->prog;
12845	const struct btf *btf = prog->aux->btf;
12846	struct bpf_core_ctx ctx = {
12847		.log = &env->log,
12848		.btf = btf,
12849	};
12850	bpfptr_t u_core_relo;
12851	int err;
12852
12853	nr_core_relo = attr->core_relo_cnt;
12854	if (!nr_core_relo)
12855		return 0;
12856	if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
12857		return -EINVAL;
12858
12859	rec_size = attr->core_relo_rec_size;
12860	if (rec_size < MIN_CORE_RELO_SIZE ||
12861	    rec_size > MAX_CORE_RELO_SIZE ||
12862	    rec_size % sizeof(u32))
12863		return -EINVAL;
12864
12865	u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
12866	expected_size = sizeof(struct bpf_core_relo);
12867	ncopy = min_t(u32, expected_size, rec_size);
12868
12869	/* Unlike func_info and line_info, copy and apply each CO-RE
12870	 * relocation record one at a time.
12871	 */
12872	for (i = 0; i < nr_core_relo; i++) {
12873		/* future proofing when sizeof(bpf_core_relo) changes */
12874		err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
12875		if (err) {
12876			if (err == -E2BIG) {
12877				verbose(env, "nonzero tailing record in core_relo");
12878				if (copy_to_bpfptr_offset(uattr,
12879							  offsetof(union bpf_attr, core_relo_rec_size),
12880							  &expected_size, sizeof(expected_size)))
12881					err = -EFAULT;
12882			}
12883			break;
12884		}
12885
12886		if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
12887			err = -EFAULT;
12888			break;
12889		}
12890
12891		if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
12892			verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
12893				i, core_relo.insn_off, prog->len);
12894			err = -EINVAL;
12895			break;
12896		}
12897
12898		err = bpf_core_apply(&ctx, &core_relo, i,
12899				     &prog->insnsi[core_relo.insn_off / 8]);
12900		if (err)
12901			break;
12902		bpfptr_add(&u_core_relo, rec_size);
12903	}
12904	return err;
12905}
12906
12907static int check_btf_info(struct bpf_verifier_env *env,
12908			  const union bpf_attr *attr,
12909			  bpfptr_t uattr)
12910{
12911	struct btf *btf;
12912	int err;
12913
12914	if (!attr->func_info_cnt && !attr->line_info_cnt) {
12915		if (check_abnormal_return(env))
12916			return -EINVAL;
12917		return 0;
12918	}
12919
12920	btf = btf_get_by_fd(attr->prog_btf_fd);
12921	if (IS_ERR(btf))
12922		return PTR_ERR(btf);
12923	if (btf_is_kernel(btf)) {
12924		btf_put(btf);
12925		return -EACCES;
12926	}
12927	env->prog->aux->btf = btf;
12928
12929	err = check_btf_func(env, attr, uattr);
12930	if (err)
12931		return err;
12932
12933	err = check_btf_line(env, attr, uattr);
12934	if (err)
12935		return err;
12936
12937	err = check_core_relo(env, attr, uattr);
12938	if (err)
12939		return err;
12940
12941	return 0;
12942}
12943
12944/* check %cur's range satisfies %old's */
12945static bool range_within(struct bpf_reg_state *old,
12946			 struct bpf_reg_state *cur)
12947{
12948	return old->umin_value <= cur->umin_value &&
12949	       old->umax_value >= cur->umax_value &&
12950	       old->smin_value <= cur->smin_value &&
12951	       old->smax_value >= cur->smax_value &&
12952	       old->u32_min_value <= cur->u32_min_value &&
12953	       old->u32_max_value >= cur->u32_max_value &&
12954	       old->s32_min_value <= cur->s32_min_value &&
12955	       old->s32_max_value >= cur->s32_max_value;
12956}
12957
12958/* If in the old state two registers had the same id, then they need to have
12959 * the same id in the new state as well.  But that id could be different from
12960 * the old state, so we need to track the mapping from old to new ids.
12961 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
12962 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
12963 * regs with a different old id could still have new id 9, we don't care about
12964 * that.
12965 * So we look through our idmap to see if this old id has been seen before.  If
12966 * so, we require the new id to match; otherwise, we add the id pair to the map.
12967 */
12968static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
12969{
12970	unsigned int i;
12971
12972	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
12973		if (!idmap[i].old) {
12974			/* Reached an empty slot; haven't seen this id before */
12975			idmap[i].old = old_id;
12976			idmap[i].cur = cur_id;
12977			return true;
12978		}
12979		if (idmap[i].old == old_id)
12980			return idmap[i].cur == cur_id;
12981	}
12982	/* We ran out of idmap slots, which should be impossible */
12983	WARN_ON_ONCE(1);
12984	return false;
12985}
12986
12987static void clean_func_state(struct bpf_verifier_env *env,
12988			     struct bpf_func_state *st)
12989{
12990	enum bpf_reg_liveness live;
12991	int i, j;
12992
12993	for (i = 0; i < BPF_REG_FP; i++) {
12994		live = st->regs[i].live;
12995		/* liveness must not touch this register anymore */
12996		st->regs[i].live |= REG_LIVE_DONE;
12997		if (!(live & REG_LIVE_READ))
12998			/* since the register is unused, clear its state
12999			 * to make further comparison simpler
13000			 */
13001			__mark_reg_not_init(env, &st->regs[i]);
13002	}
13003
13004	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
13005		live = st->stack[i].spilled_ptr.live;
13006		/* liveness must not touch this stack slot anymore */
13007		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
13008		if (!(live & REG_LIVE_READ)) {
13009			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
13010			for (j = 0; j < BPF_REG_SIZE; j++)
13011				st->stack[i].slot_type[j] = STACK_INVALID;
13012		}
13013	}
13014}
13015
13016static void clean_verifier_state(struct bpf_verifier_env *env,
13017				 struct bpf_verifier_state *st)
13018{
13019	int i;
13020
13021	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
13022		/* all regs in this state in all frames were already marked */
13023		return;
13024
13025	for (i = 0; i <= st->curframe; i++)
13026		clean_func_state(env, st->frame[i]);
13027}
13028
13029/* the parentage chains form a tree.
13030 * the verifier states are added to state lists at given insn and
13031 * pushed into state stack for future exploration.
13032 * when the verifier reaches bpf_exit insn some of the verifer states
13033 * stored in the state lists have their final liveness state already,
13034 * but a lot of states will get revised from liveness point of view when
13035 * the verifier explores other branches.
13036 * Example:
13037 * 1: r0 = 1
13038 * 2: if r1 == 100 goto pc+1
13039 * 3: r0 = 2
13040 * 4: exit
13041 * when the verifier reaches exit insn the register r0 in the state list of
13042 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
13043 * of insn 2 and goes exploring further. At the insn 4 it will walk the
13044 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
13045 *
13046 * Since the verifier pushes the branch states as it sees them while exploring
13047 * the program the condition of walking the branch instruction for the second
13048 * time means that all states below this branch were already explored and
13049 * their final liveness marks are already propagated.
13050 * Hence when the verifier completes the search of state list in is_state_visited()
13051 * we can call this clean_live_states() function to mark all liveness states
13052 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
13053 * will not be used.
13054 * This function also clears the registers and stack for states that !READ
13055 * to simplify state merging.
13056 *
13057 * Important note here that walking the same branch instruction in the callee
13058 * doesn't meant that the states are DONE. The verifier has to compare
13059 * the callsites
13060 */
13061static void clean_live_states(struct bpf_verifier_env *env, int insn,
13062			      struct bpf_verifier_state *cur)
13063{
13064	struct bpf_verifier_state_list *sl;
13065	int i;
13066
13067	sl = *explored_state(env, insn);
13068	while (sl) {
13069		if (sl->state.branches)
13070			goto next;
13071		if (sl->state.insn_idx != insn ||
13072		    sl->state.curframe != cur->curframe)
13073			goto next;
13074		for (i = 0; i <= cur->curframe; i++)
13075			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
13076				goto next;
13077		clean_verifier_state(env, &sl->state);
13078next:
13079		sl = sl->next;
13080	}
13081}
13082
13083/* Returns true if (rold safe implies rcur safe) */
13084static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
13085		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
13086{
13087	bool equal;
13088
13089	if (!(rold->live & REG_LIVE_READ))
13090		/* explored state didn't use this */
13091		return true;
13092
13093	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
13094
13095	if (rold->type == NOT_INIT)
13096		/* explored state can't have used this */
13097		return true;
13098	if (rcur->type == NOT_INIT)
13099		return false;
13100	switch (base_type(rold->type)) {
13101	case SCALAR_VALUE:
13102		if (equal)
13103			return true;
13104		if (env->explore_alu_limits)
13105			return false;
13106		if (rcur->type == SCALAR_VALUE) {
13107			if (!rold->precise)
13108				return true;
13109			/* new val must satisfy old val knowledge */
13110			return range_within(rold, rcur) &&
13111			       tnum_in(rold->var_off, rcur->var_off);
13112		} else {
13113			/* We're trying to use a pointer in place of a scalar.
13114			 * Even if the scalar was unbounded, this could lead to
13115			 * pointer leaks because scalars are allowed to leak
13116			 * while pointers are not. We could make this safe in
13117			 * special cases if root is calling us, but it's
13118			 * probably not worth the hassle.
13119			 */
13120			return false;
13121		}
13122	case PTR_TO_MAP_KEY:
13123	case PTR_TO_MAP_VALUE:
13124		/* a PTR_TO_MAP_VALUE could be safe to use as a
13125		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
13126		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
13127		 * checked, doing so could have affected others with the same
13128		 * id, and we can't check for that because we lost the id when
13129		 * we converted to a PTR_TO_MAP_VALUE.
13130		 */
13131		if (type_may_be_null(rold->type)) {
13132			if (!type_may_be_null(rcur->type))
13133				return false;
13134			if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
13135				return false;
13136			/* Check our ids match any regs they're supposed to */
13137			return check_ids(rold->id, rcur->id, idmap);
13138		}
13139
13140		/* If the new min/max/var_off satisfy the old ones and
13141		 * everything else matches, we are OK.
13142		 * 'id' is not compared, since it's only used for maps with
13143		 * bpf_spin_lock inside map element and in such cases if
13144		 * the rest of the prog is valid for one map element then
13145		 * it's valid for all map elements regardless of the key
13146		 * used in bpf_map_lookup()
13147		 */
13148		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
13149		       range_within(rold, rcur) &&
13150		       tnum_in(rold->var_off, rcur->var_off) &&
13151		       check_ids(rold->id, rcur->id, idmap);
13152	case PTR_TO_PACKET_META:
13153	case PTR_TO_PACKET:
13154		if (rcur->type != rold->type)
13155			return false;
13156		/* We must have at least as much range as the old ptr
13157		 * did, so that any accesses which were safe before are
13158		 * still safe.  This is true even if old range < old off,
13159		 * since someone could have accessed through (ptr - k), or
13160		 * even done ptr -= k in a register, to get a safe access.
13161		 */
13162		if (rold->range > rcur->range)
13163			return false;
13164		/* If the offsets don't match, we can't trust our alignment;
13165		 * nor can we be sure that we won't fall out of range.
13166		 */
13167		if (rold->off != rcur->off)
13168			return false;
13169		/* id relations must be preserved */
13170		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
13171			return false;
13172		/* new val must satisfy old val knowledge */
13173		return range_within(rold, rcur) &&
13174		       tnum_in(rold->var_off, rcur->var_off);
13175	case PTR_TO_STACK:
13176		/* two stack pointers are equal only if they're pointing to
13177		 * the same stack frame, since fp-8 in foo != fp-8 in bar
13178		 */
13179		return equal && rold->frameno == rcur->frameno;
13180	default:
13181		/* Only valid matches are exact, which memcmp() */
13182		return equal;
13183	}
13184
13185	/* Shouldn't get here; if we do, say it's not safe */
13186	WARN_ON_ONCE(1);
13187	return false;
13188}
13189
13190static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
13191		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
13192{
13193	int i, spi;
13194
13195	/* walk slots of the explored stack and ignore any additional
13196	 * slots in the current stack, since explored(safe) state
13197	 * didn't use them
13198	 */
13199	for (i = 0; i < old->allocated_stack; i++) {
13200		spi = i / BPF_REG_SIZE;
13201
13202		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
13203			i += BPF_REG_SIZE - 1;
13204			/* explored state didn't use this */
13205			continue;
13206		}
13207
13208		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
13209			continue;
13210
13211		/* explored stack has more populated slots than current stack
13212		 * and these slots were used
13213		 */
13214		if (i >= cur->allocated_stack)
13215			return false;
13216
13217		/* if old state was safe with misc data in the stack
13218		 * it will be safe with zero-initialized stack.
13219		 * The opposite is not true
13220		 */
13221		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
13222		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
13223			continue;
13224		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
13225		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
13226			/* Ex: old explored (safe) state has STACK_SPILL in
13227			 * this stack slot, but current has STACK_MISC ->
13228			 * this verifier states are not equivalent,
13229			 * return false to continue verification of this path
13230			 */
13231			return false;
13232		if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
13233			continue;
13234		if (!is_spilled_reg(&old->stack[spi]))
13235			continue;
13236		if (!regsafe(env, &old->stack[spi].spilled_ptr,
13237			     &cur->stack[spi].spilled_ptr, idmap))
13238			/* when explored and current stack slot are both storing
13239			 * spilled registers, check that stored pointers types
13240			 * are the same as well.
13241			 * Ex: explored safe path could have stored
13242			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
13243			 * but current path has stored:
13244			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
13245			 * such verifier states are not equivalent.
13246			 * return false to continue verification of this path
13247			 */
13248			return false;
13249	}
13250	return true;
13251}
13252
13253static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
13254{
13255	if (old->acquired_refs != cur->acquired_refs)
13256		return false;
13257	return !memcmp(old->refs, cur->refs,
13258		       sizeof(*old->refs) * old->acquired_refs);
13259}
13260
13261/* compare two verifier states
13262 *
13263 * all states stored in state_list are known to be valid, since
13264 * verifier reached 'bpf_exit' instruction through them
13265 *
13266 * this function is called when verifier exploring different branches of
13267 * execution popped from the state stack. If it sees an old state that has
13268 * more strict register state and more strict stack state then this execution
13269 * branch doesn't need to be explored further, since verifier already
13270 * concluded that more strict state leads to valid finish.
13271 *
13272 * Therefore two states are equivalent if register state is more conservative
13273 * and explored stack state is more conservative than the current one.
13274 * Example:
13275 *       explored                   current
13276 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
13277 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
13278 *
13279 * In other words if current stack state (one being explored) has more
13280 * valid slots than old one that already passed validation, it means
13281 * the verifier can stop exploring and conclude that current state is valid too
13282 *
13283 * Similarly with registers. If explored state has register type as invalid
13284 * whereas register type in current state is meaningful, it means that
13285 * the current state will reach 'bpf_exit' instruction safely
13286 */
13287static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
13288			      struct bpf_func_state *cur)
13289{
13290	int i;
13291
13292	for (i = 0; i < MAX_BPF_REG; i++)
13293		if (!regsafe(env, &old->regs[i], &cur->regs[i],
13294			     env->idmap_scratch))
13295			return false;
13296
13297	if (!stacksafe(env, old, cur, env->idmap_scratch))
13298		return false;
13299
13300	if (!refsafe(old, cur))
13301		return false;
13302
13303	return true;
13304}
13305
13306static bool states_equal(struct bpf_verifier_env *env,
13307			 struct bpf_verifier_state *old,
13308			 struct bpf_verifier_state *cur)
13309{
13310	int i;
13311
13312	if (old->curframe != cur->curframe)
13313		return false;
13314
13315	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
13316
13317	/* Verification state from speculative execution simulation
13318	 * must never prune a non-speculative execution one.
13319	 */
13320	if (old->speculative && !cur->speculative)
13321		return false;
13322
13323	if (old->active_lock.ptr != cur->active_lock.ptr)
13324		return false;
13325
13326	/* Old and cur active_lock's have to be either both present
13327	 * or both absent.
13328	 */
13329	if (!!old->active_lock.id != !!cur->active_lock.id)
13330		return false;
13331
13332	if (old->active_lock.id &&
13333	    !check_ids(old->active_lock.id, cur->active_lock.id, env->idmap_scratch))
13334		return false;
13335
13336	if (old->active_rcu_lock != cur->active_rcu_lock)
13337		return false;
13338
13339	/* for states to be equal callsites have to be the same
13340	 * and all frame states need to be equivalent
13341	 */
13342	for (i = 0; i <= old->curframe; i++) {
13343		if (old->frame[i]->callsite != cur->frame[i]->callsite)
13344			return false;
13345		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
13346			return false;
13347	}
13348	return true;
13349}
13350
13351/* Return 0 if no propagation happened. Return negative error code if error
13352 * happened. Otherwise, return the propagated bit.
13353 */
13354static int propagate_liveness_reg(struct bpf_verifier_env *env,
13355				  struct bpf_reg_state *reg,
13356				  struct bpf_reg_state *parent_reg)
13357{
13358	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
13359	u8 flag = reg->live & REG_LIVE_READ;
13360	int err;
13361
13362	/* When comes here, read flags of PARENT_REG or REG could be any of
13363	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
13364	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
13365	 */
13366	if (parent_flag == REG_LIVE_READ64 ||
13367	    /* Or if there is no read flag from REG. */
13368	    !flag ||
13369	    /* Or if the read flag from REG is the same as PARENT_REG. */
13370	    parent_flag == flag)
13371		return 0;
13372
13373	err = mark_reg_read(env, reg, parent_reg, flag);
13374	if (err)
13375		return err;
13376
13377	return flag;
13378}
13379
13380/* A write screens off any subsequent reads; but write marks come from the
13381 * straight-line code between a state and its parent.  When we arrive at an
13382 * equivalent state (jump target or such) we didn't arrive by the straight-line
13383 * code, so read marks in the state must propagate to the parent regardless
13384 * of the state's write marks. That's what 'parent == state->parent' comparison
13385 * in mark_reg_read() is for.
13386 */
13387static int propagate_liveness(struct bpf_verifier_env *env,
13388			      const struct bpf_verifier_state *vstate,
13389			      struct bpf_verifier_state *vparent)
13390{
13391	struct bpf_reg_state *state_reg, *parent_reg;
13392	struct bpf_func_state *state, *parent;
13393	int i, frame, err = 0;
13394
13395	if (vparent->curframe != vstate->curframe) {
13396		WARN(1, "propagate_live: parent frame %d current frame %d\n",
13397		     vparent->curframe, vstate->curframe);
13398		return -EFAULT;
13399	}
13400	/* Propagate read liveness of registers... */
13401	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
13402	for (frame = 0; frame <= vstate->curframe; frame++) {
13403		parent = vparent->frame[frame];
13404		state = vstate->frame[frame];
13405		parent_reg = parent->regs;
13406		state_reg = state->regs;
13407		/* We don't need to worry about FP liveness, it's read-only */
13408		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
13409			err = propagate_liveness_reg(env, &state_reg[i],
13410						     &parent_reg[i]);
13411			if (err < 0)
13412				return err;
13413			if (err == REG_LIVE_READ64)
13414				mark_insn_zext(env, &parent_reg[i]);
13415		}
13416
13417		/* Propagate stack slots. */
13418		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
13419			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
13420			parent_reg = &parent->stack[i].spilled_ptr;
13421			state_reg = &state->stack[i].spilled_ptr;
13422			err = propagate_liveness_reg(env, state_reg,
13423						     parent_reg);
13424			if (err < 0)
13425				return err;
13426		}
13427	}
13428	return 0;
13429}
13430
13431/* find precise scalars in the previous equivalent state and
13432 * propagate them into the current state
13433 */
13434static int propagate_precision(struct bpf_verifier_env *env,
13435			       const struct bpf_verifier_state *old)
13436{
13437	struct bpf_reg_state *state_reg;
13438	struct bpf_func_state *state;
13439	int i, err = 0, fr;
13440
13441	for (fr = old->curframe; fr >= 0; fr--) {
13442		state = old->frame[fr];
13443		state_reg = state->regs;
13444		for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
13445			if (state_reg->type != SCALAR_VALUE ||
13446			    !state_reg->precise)
13447				continue;
13448			if (env->log.level & BPF_LOG_LEVEL2)
13449				verbose(env, "frame %d: propagating r%d\n", i, fr);
13450			err = mark_chain_precision_frame(env, fr, i);
13451			if (err < 0)
13452				return err;
13453		}
13454
13455		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
13456			if (!is_spilled_reg(&state->stack[i]))
13457				continue;
13458			state_reg = &state->stack[i].spilled_ptr;
13459			if (state_reg->type != SCALAR_VALUE ||
13460			    !state_reg->precise)
13461				continue;
13462			if (env->log.level & BPF_LOG_LEVEL2)
13463				verbose(env, "frame %d: propagating fp%d\n",
13464					(-i - 1) * BPF_REG_SIZE, fr);
13465			err = mark_chain_precision_stack_frame(env, fr, i);
13466			if (err < 0)
13467				return err;
13468		}
13469	}
13470	return 0;
13471}
13472
13473static bool states_maybe_looping(struct bpf_verifier_state *old,
13474				 struct bpf_verifier_state *cur)
13475{
13476	struct bpf_func_state *fold, *fcur;
13477	int i, fr = cur->curframe;
13478
13479	if (old->curframe != fr)
13480		return false;
13481
13482	fold = old->frame[fr];
13483	fcur = cur->frame[fr];
13484	for (i = 0; i < MAX_BPF_REG; i++)
13485		if (memcmp(&fold->regs[i], &fcur->regs[i],
13486			   offsetof(struct bpf_reg_state, parent)))
13487			return false;
13488	return true;
13489}
13490
13491
13492static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
13493{
13494	struct bpf_verifier_state_list *new_sl;
13495	struct bpf_verifier_state_list *sl, **pprev;
13496	struct bpf_verifier_state *cur = env->cur_state, *new;
13497	int i, j, err, states_cnt = 0;
13498	bool add_new_state = env->test_state_freq ? true : false;
13499
13500	/* bpf progs typically have pruning point every 4 instructions
13501	 * http://vger.kernel.org/bpfconf2019.html#session-1
13502	 * Do not add new state for future pruning if the verifier hasn't seen
13503	 * at least 2 jumps and at least 8 instructions.
13504	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
13505	 * In tests that amounts to up to 50% reduction into total verifier
13506	 * memory consumption and 20% verifier time speedup.
13507	 */
13508	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
13509	    env->insn_processed - env->prev_insn_processed >= 8)
13510		add_new_state = true;
13511
13512	pprev = explored_state(env, insn_idx);
13513	sl = *pprev;
13514
13515	clean_live_states(env, insn_idx, cur);
13516
13517	while (sl) {
13518		states_cnt++;
13519		if (sl->state.insn_idx != insn_idx)
13520			goto next;
13521
13522		if (sl->state.branches) {
13523			struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
13524
13525			if (frame->in_async_callback_fn &&
13526			    frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
13527				/* Different async_entry_cnt means that the verifier is
13528				 * processing another entry into async callback.
13529				 * Seeing the same state is not an indication of infinite
13530				 * loop or infinite recursion.
13531				 * But finding the same state doesn't mean that it's safe
13532				 * to stop processing the current state. The previous state
13533				 * hasn't yet reached bpf_exit, since state.branches > 0.
13534				 * Checking in_async_callback_fn alone is not enough either.
13535				 * Since the verifier still needs to catch infinite loops
13536				 * inside async callbacks.
13537				 */
13538			} else if (states_maybe_looping(&sl->state, cur) &&
13539				   states_equal(env, &sl->state, cur)) {
13540				verbose_linfo(env, insn_idx, "; ");
13541				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
13542				return -EINVAL;
13543			}
13544			/* if the verifier is processing a loop, avoid adding new state
13545			 * too often, since different loop iterations have distinct
13546			 * states and may not help future pruning.
13547			 * This threshold shouldn't be too low to make sure that
13548			 * a loop with large bound will be rejected quickly.
13549			 * The most abusive loop will be:
13550			 * r1 += 1
13551			 * if r1 < 1000000 goto pc-2
13552			 * 1M insn_procssed limit / 100 == 10k peak states.
13553			 * This threshold shouldn't be too high either, since states
13554			 * at the end of the loop are likely to be useful in pruning.
13555			 */
13556			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
13557			    env->insn_processed - env->prev_insn_processed < 100)
13558				add_new_state = false;
13559			goto miss;
13560		}
13561		if (states_equal(env, &sl->state, cur)) {
13562			sl->hit_cnt++;
13563			/* reached equivalent register/stack state,
13564			 * prune the search.
13565			 * Registers read by the continuation are read by us.
13566			 * If we have any write marks in env->cur_state, they
13567			 * will prevent corresponding reads in the continuation
13568			 * from reaching our parent (an explored_state).  Our
13569			 * own state will get the read marks recorded, but
13570			 * they'll be immediately forgotten as we're pruning
13571			 * this state and will pop a new one.
13572			 */
13573			err = propagate_liveness(env, &sl->state, cur);
13574
13575			/* if previous state reached the exit with precision and
13576			 * current state is equivalent to it (except precsion marks)
13577			 * the precision needs to be propagated back in
13578			 * the current state.
13579			 */
13580			err = err ? : push_jmp_history(env, cur);
13581			err = err ? : propagate_precision(env, &sl->state);
13582			if (err)
13583				return err;
13584			return 1;
13585		}
13586miss:
13587		/* when new state is not going to be added do not increase miss count.
13588		 * Otherwise several loop iterations will remove the state
13589		 * recorded earlier. The goal of these heuristics is to have
13590		 * states from some iterations of the loop (some in the beginning
13591		 * and some at the end) to help pruning.
13592		 */
13593		if (add_new_state)
13594			sl->miss_cnt++;
13595		/* heuristic to determine whether this state is beneficial
13596		 * to keep checking from state equivalence point of view.
13597		 * Higher numbers increase max_states_per_insn and verification time,
13598		 * but do not meaningfully decrease insn_processed.
13599		 */
13600		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
13601			/* the state is unlikely to be useful. Remove it to
13602			 * speed up verification
13603			 */
13604			*pprev = sl->next;
13605			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
13606				u32 br = sl->state.branches;
13607
13608				WARN_ONCE(br,
13609					  "BUG live_done but branches_to_explore %d\n",
13610					  br);
13611				free_verifier_state(&sl->state, false);
13612				kfree(sl);
13613				env->peak_states--;
13614			} else {
13615				/* cannot free this state, since parentage chain may
13616				 * walk it later. Add it for free_list instead to
13617				 * be freed at the end of verification
13618				 */
13619				sl->next = env->free_list;
13620				env->free_list = sl;
13621			}
13622			sl = *pprev;
13623			continue;
13624		}
13625next:
13626		pprev = &sl->next;
13627		sl = *pprev;
13628	}
13629
13630	if (env->max_states_per_insn < states_cnt)
13631		env->max_states_per_insn = states_cnt;
13632
13633	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
13634		return 0;
13635
13636	if (!add_new_state)
13637		return 0;
13638
13639	/* There were no equivalent states, remember the current one.
13640	 * Technically the current state is not proven to be safe yet,
13641	 * but it will either reach outer most bpf_exit (which means it's safe)
13642	 * or it will be rejected. When there are no loops the verifier won't be
13643	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
13644	 * again on the way to bpf_exit.
13645	 * When looping the sl->state.branches will be > 0 and this state
13646	 * will not be considered for equivalence until branches == 0.
13647	 */
13648	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
13649	if (!new_sl)
13650		return -ENOMEM;
13651	env->total_states++;
13652	env->peak_states++;
13653	env->prev_jmps_processed = env->jmps_processed;
13654	env->prev_insn_processed = env->insn_processed;
13655
13656	/* forget precise markings we inherited, see __mark_chain_precision */
13657	if (env->bpf_capable)
13658		mark_all_scalars_imprecise(env, cur);
13659
13660	/* add new state to the head of linked list */
13661	new = &new_sl->state;
13662	err = copy_verifier_state(new, cur);
13663	if (err) {
13664		free_verifier_state(new, false);
13665		kfree(new_sl);
13666		return err;
13667	}
13668	new->insn_idx = insn_idx;
13669	WARN_ONCE(new->branches != 1,
13670		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
13671
13672	cur->parent = new;
13673	cur->first_insn_idx = insn_idx;
13674	clear_jmp_history(cur);
13675	new_sl->next = *explored_state(env, insn_idx);
13676	*explored_state(env, insn_idx) = new_sl;
13677	/* connect new state to parentage chain. Current frame needs all
13678	 * registers connected. Only r6 - r9 of the callers are alive (pushed
13679	 * to the stack implicitly by JITs) so in callers' frames connect just
13680	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
13681	 * the state of the call instruction (with WRITTEN set), and r0 comes
13682	 * from callee with its full parentage chain, anyway.
13683	 */
13684	/* clear write marks in current state: the writes we did are not writes
13685	 * our child did, so they don't screen off its reads from us.
13686	 * (There are no read marks in current state, because reads always mark
13687	 * their parent and current state never has children yet.  Only
13688	 * explored_states can get read marks.)
13689	 */
13690	for (j = 0; j <= cur->curframe; j++) {
13691		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
13692			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
13693		for (i = 0; i < BPF_REG_FP; i++)
13694			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
13695	}
13696
13697	/* all stack frames are accessible from callee, clear them all */
13698	for (j = 0; j <= cur->curframe; j++) {
13699		struct bpf_func_state *frame = cur->frame[j];
13700		struct bpf_func_state *newframe = new->frame[j];
13701
13702		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
13703			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
13704			frame->stack[i].spilled_ptr.parent =
13705						&newframe->stack[i].spilled_ptr;
13706		}
13707	}
13708	return 0;
13709}
13710
13711/* Return true if it's OK to have the same insn return a different type. */
13712static bool reg_type_mismatch_ok(enum bpf_reg_type type)
13713{
13714	switch (base_type(type)) {
13715	case PTR_TO_CTX:
13716	case PTR_TO_SOCKET:
13717	case PTR_TO_SOCK_COMMON:
13718	case PTR_TO_TCP_SOCK:
13719	case PTR_TO_XDP_SOCK:
13720	case PTR_TO_BTF_ID:
13721		return false;
13722	default:
13723		return true;
13724	}
13725}
13726
13727/* If an instruction was previously used with particular pointer types, then we
13728 * need to be careful to avoid cases such as the below, where it may be ok
13729 * for one branch accessing the pointer, but not ok for the other branch:
13730 *
13731 * R1 = sock_ptr
13732 * goto X;
13733 * ...
13734 * R1 = some_other_valid_ptr;
13735 * goto X;
13736 * ...
13737 * R2 = *(u32 *)(R1 + 0);
13738 */
13739static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
13740{
13741	return src != prev && (!reg_type_mismatch_ok(src) ||
13742			       !reg_type_mismatch_ok(prev));
13743}
13744
13745static int do_check(struct bpf_verifier_env *env)
13746{
13747	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
13748	struct bpf_verifier_state *state = env->cur_state;
13749	struct bpf_insn *insns = env->prog->insnsi;
13750	struct bpf_reg_state *regs;
13751	int insn_cnt = env->prog->len;
13752	bool do_print_state = false;
13753	int prev_insn_idx = -1;
13754
13755	for (;;) {
13756		struct bpf_insn *insn;
13757		u8 class;
13758		int err;
13759
13760		env->prev_insn_idx = prev_insn_idx;
13761		if (env->insn_idx >= insn_cnt) {
13762			verbose(env, "invalid insn idx %d insn_cnt %d\n",
13763				env->insn_idx, insn_cnt);
13764			return -EFAULT;
13765		}
13766
13767		insn = &insns[env->insn_idx];
13768		class = BPF_CLASS(insn->code);
13769
13770		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
13771			verbose(env,
13772				"BPF program is too large. Processed %d insn\n",
13773				env->insn_processed);
13774			return -E2BIG;
13775		}
13776
13777		state->last_insn_idx = env->prev_insn_idx;
13778
13779		if (is_prune_point(env, env->insn_idx)) {
13780			err = is_state_visited(env, env->insn_idx);
13781			if (err < 0)
13782				return err;
13783			if (err == 1) {
13784				/* found equivalent state, can prune the search */
13785				if (env->log.level & BPF_LOG_LEVEL) {
13786					if (do_print_state)
13787						verbose(env, "\nfrom %d to %d%s: safe\n",
13788							env->prev_insn_idx, env->insn_idx,
13789							env->cur_state->speculative ?
13790							" (speculative execution)" : "");
13791					else
13792						verbose(env, "%d: safe\n", env->insn_idx);
13793				}
13794				goto process_bpf_exit;
13795			}
13796		}
13797
13798		if (is_jmp_point(env, env->insn_idx)) {
13799			err = push_jmp_history(env, state);
13800			if (err)
13801				return err;
13802		}
13803
13804		if (signal_pending(current))
13805			return -EAGAIN;
13806
13807		if (need_resched())
13808			cond_resched();
13809
13810		if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
13811			verbose(env, "\nfrom %d to %d%s:",
13812				env->prev_insn_idx, env->insn_idx,
13813				env->cur_state->speculative ?
13814				" (speculative execution)" : "");
13815			print_verifier_state(env, state->frame[state->curframe], true);
13816			do_print_state = false;
13817		}
13818
13819		if (env->log.level & BPF_LOG_LEVEL) {
13820			const struct bpf_insn_cbs cbs = {
13821				.cb_call	= disasm_kfunc_name,
13822				.cb_print	= verbose,
13823				.private_data	= env,
13824			};
13825
13826			if (verifier_state_scratched(env))
13827				print_insn_state(env, state->frame[state->curframe]);
13828
13829			verbose_linfo(env, env->insn_idx, "; ");
13830			env->prev_log_len = env->log.len_used;
13831			verbose(env, "%d: ", env->insn_idx);
13832			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
13833			env->prev_insn_print_len = env->log.len_used - env->prev_log_len;
13834			env->prev_log_len = env->log.len_used;
13835		}
13836
13837		if (bpf_prog_is_dev_bound(env->prog->aux)) {
13838			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
13839							   env->prev_insn_idx);
13840			if (err)
13841				return err;
13842		}
13843
13844		regs = cur_regs(env);
13845		sanitize_mark_insn_seen(env);
13846		prev_insn_idx = env->insn_idx;
13847
13848		if (class == BPF_ALU || class == BPF_ALU64) {
13849			err = check_alu_op(env, insn);
13850			if (err)
13851				return err;
13852
13853		} else if (class == BPF_LDX) {
13854			enum bpf_reg_type *prev_src_type, src_reg_type;
13855
13856			/* check for reserved fields is already done */
13857
13858			/* check src operand */
13859			err = check_reg_arg(env, insn->src_reg, SRC_OP);
13860			if (err)
13861				return err;
13862
13863			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
13864			if (err)
13865				return err;
13866
13867			src_reg_type = regs[insn->src_reg].type;
13868
13869			/* check that memory (src_reg + off) is readable,
13870			 * the state of dst_reg will be updated by this func
13871			 */
13872			err = check_mem_access(env, env->insn_idx, insn->src_reg,
13873					       insn->off, BPF_SIZE(insn->code),
13874					       BPF_READ, insn->dst_reg, false);
13875			if (err)
13876				return err;
13877
13878			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
13879
13880			if (*prev_src_type == NOT_INIT) {
13881				/* saw a valid insn
13882				 * dst_reg = *(u32 *)(src_reg + off)
13883				 * save type to validate intersecting paths
13884				 */
13885				*prev_src_type = src_reg_type;
13886
13887			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
13888				/* ABuser program is trying to use the same insn
13889				 * dst_reg = *(u32*) (src_reg + off)
13890				 * with different pointer types:
13891				 * src_reg == ctx in one branch and
13892				 * src_reg == stack|map in some other branch.
13893				 * Reject it.
13894				 */
13895				verbose(env, "same insn cannot be used with different pointers\n");
13896				return -EINVAL;
13897			}
13898
13899		} else if (class == BPF_STX) {
13900			enum bpf_reg_type *prev_dst_type, dst_reg_type;
13901
13902			if (BPF_MODE(insn->code) == BPF_ATOMIC) {
13903				err = check_atomic(env, env->insn_idx, insn);
13904				if (err)
13905					return err;
13906				env->insn_idx++;
13907				continue;
13908			}
13909
13910			if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
13911				verbose(env, "BPF_STX uses reserved fields\n");
13912				return -EINVAL;
13913			}
13914
13915			/* check src1 operand */
13916			err = check_reg_arg(env, insn->src_reg, SRC_OP);
13917			if (err)
13918				return err;
13919			/* check src2 operand */
13920			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13921			if (err)
13922				return err;
13923
13924			dst_reg_type = regs[insn->dst_reg].type;
13925
13926			/* check that memory (dst_reg + off) is writeable */
13927			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
13928					       insn->off, BPF_SIZE(insn->code),
13929					       BPF_WRITE, insn->src_reg, false);
13930			if (err)
13931				return err;
13932
13933			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
13934
13935			if (*prev_dst_type == NOT_INIT) {
13936				*prev_dst_type = dst_reg_type;
13937			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
13938				verbose(env, "same insn cannot be used with different pointers\n");
13939				return -EINVAL;
13940			}
13941
13942		} else if (class == BPF_ST) {
13943			if (BPF_MODE(insn->code) != BPF_MEM ||
13944			    insn->src_reg != BPF_REG_0) {
13945				verbose(env, "BPF_ST uses reserved fields\n");
13946				return -EINVAL;
13947			}
13948			/* check src operand */
13949			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13950			if (err)
13951				return err;
13952
13953			if (is_ctx_reg(env, insn->dst_reg)) {
13954				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
13955					insn->dst_reg,
13956					reg_type_str(env, reg_state(env, insn->dst_reg)->type));
13957				return -EACCES;
13958			}
13959
13960			/* check that memory (dst_reg + off) is writeable */
13961			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
13962					       insn->off, BPF_SIZE(insn->code),
13963					       BPF_WRITE, -1, false);
13964			if (err)
13965				return err;
13966
13967		} else if (class == BPF_JMP || class == BPF_JMP32) {
13968			u8 opcode = BPF_OP(insn->code);
13969
13970			env->jmps_processed++;
13971			if (opcode == BPF_CALL) {
13972				if (BPF_SRC(insn->code) != BPF_K ||
13973				    (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
13974				     && insn->off != 0) ||
13975				    (insn->src_reg != BPF_REG_0 &&
13976				     insn->src_reg != BPF_PSEUDO_CALL &&
13977				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
13978				    insn->dst_reg != BPF_REG_0 ||
13979				    class == BPF_JMP32) {
13980					verbose(env, "BPF_CALL uses reserved fields\n");
13981					return -EINVAL;
13982				}
13983
13984				if (env->cur_state->active_lock.ptr) {
13985					if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
13986					    (insn->src_reg == BPF_PSEUDO_CALL) ||
13987					    (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
13988					     (insn->off != 0 || !is_bpf_list_api_kfunc(insn->imm)))) {
13989						verbose(env, "function calls are not allowed while holding a lock\n");
13990						return -EINVAL;
13991					}
13992				}
13993				if (insn->src_reg == BPF_PSEUDO_CALL)
13994					err = check_func_call(env, insn, &env->insn_idx);
13995				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
13996					err = check_kfunc_call(env, insn, &env->insn_idx);
13997				else
13998					err = check_helper_call(env, insn, &env->insn_idx);
13999				if (err)
14000					return err;
14001			} else if (opcode == BPF_JA) {
14002				if (BPF_SRC(insn->code) != BPF_K ||
14003				    insn->imm != 0 ||
14004				    insn->src_reg != BPF_REG_0 ||
14005				    insn->dst_reg != BPF_REG_0 ||
14006				    class == BPF_JMP32) {
14007					verbose(env, "BPF_JA uses reserved fields\n");
14008					return -EINVAL;
14009				}
14010
14011				env->insn_idx += insn->off + 1;
14012				continue;
14013
14014			} else if (opcode == BPF_EXIT) {
14015				if (BPF_SRC(insn->code) != BPF_K ||
14016				    insn->imm != 0 ||
14017				    insn->src_reg != BPF_REG_0 ||
14018				    insn->dst_reg != BPF_REG_0 ||
14019				    class == BPF_JMP32) {
14020					verbose(env, "BPF_EXIT uses reserved fields\n");
14021					return -EINVAL;
14022				}
14023
14024				if (env->cur_state->active_lock.ptr) {
14025					verbose(env, "bpf_spin_unlock is missing\n");
14026					return -EINVAL;
14027				}
14028
14029				if (env->cur_state->active_rcu_lock) {
14030					verbose(env, "bpf_rcu_read_unlock is missing\n");
14031					return -EINVAL;
14032				}
14033
14034				/* We must do check_reference_leak here before
14035				 * prepare_func_exit to handle the case when
14036				 * state->curframe > 0, it may be a callback
14037				 * function, for which reference_state must
14038				 * match caller reference state when it exits.
14039				 */
14040				err = check_reference_leak(env);
14041				if (err)
14042					return err;
14043
14044				if (state->curframe) {
14045					/* exit from nested function */
14046					err = prepare_func_exit(env, &env->insn_idx);
14047					if (err)
14048						return err;
14049					do_print_state = true;
14050					continue;
14051				}
14052
14053				err = check_return_code(env);
14054				if (err)
14055					return err;
14056process_bpf_exit:
14057				mark_verifier_state_scratched(env);
14058				update_branch_counts(env, env->cur_state);
14059				err = pop_stack(env, &prev_insn_idx,
14060						&env->insn_idx, pop_log);
14061				if (err < 0) {
14062					if (err != -ENOENT)
14063						return err;
14064					break;
14065				} else {
14066					do_print_state = true;
14067					continue;
14068				}
14069			} else {
14070				err = check_cond_jmp_op(env, insn, &env->insn_idx);
14071				if (err)
14072					return err;
14073			}
14074		} else if (class == BPF_LD) {
14075			u8 mode = BPF_MODE(insn->code);
14076
14077			if (mode == BPF_ABS || mode == BPF_IND) {
14078				err = check_ld_abs(env, insn);
14079				if (err)
14080					return err;
14081
14082			} else if (mode == BPF_IMM) {
14083				err = check_ld_imm(env, insn);
14084				if (err)
14085					return err;
14086
14087				env->insn_idx++;
14088				sanitize_mark_insn_seen(env);
14089			} else {
14090				verbose(env, "invalid BPF_LD mode\n");
14091				return -EINVAL;
14092			}
14093		} else {
14094			verbose(env, "unknown insn class %d\n", class);
14095			return -EINVAL;
14096		}
14097
14098		env->insn_idx++;
14099	}
14100
14101	return 0;
14102}
14103
14104static int find_btf_percpu_datasec(struct btf *btf)
14105{
14106	const struct btf_type *t;
14107	const char *tname;
14108	int i, n;
14109
14110	/*
14111	 * Both vmlinux and module each have their own ".data..percpu"
14112	 * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
14113	 * types to look at only module's own BTF types.
14114	 */
14115	n = btf_nr_types(btf);
14116	if (btf_is_module(btf))
14117		i = btf_nr_types(btf_vmlinux);
14118	else
14119		i = 1;
14120
14121	for(; i < n; i++) {
14122		t = btf_type_by_id(btf, i);
14123		if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
14124			continue;
14125
14126		tname = btf_name_by_offset(btf, t->name_off);
14127		if (!strcmp(tname, ".data..percpu"))
14128			return i;
14129	}
14130
14131	return -ENOENT;
14132}
14133
14134/* replace pseudo btf_id with kernel symbol address */
14135static int check_pseudo_btf_id(struct bpf_verifier_env *env,
14136			       struct bpf_insn *insn,
14137			       struct bpf_insn_aux_data *aux)
14138{
14139	const struct btf_var_secinfo *vsi;
14140	const struct btf_type *datasec;
14141	struct btf_mod_pair *btf_mod;
14142	const struct btf_type *t;
14143	const char *sym_name;
14144	bool percpu = false;
14145	u32 type, id = insn->imm;
14146	struct btf *btf;
14147	s32 datasec_id;
14148	u64 addr;
14149	int i, btf_fd, err;
14150
14151	btf_fd = insn[1].imm;
14152	if (btf_fd) {
14153		btf = btf_get_by_fd(btf_fd);
14154		if (IS_ERR(btf)) {
14155			verbose(env, "invalid module BTF object FD specified.\n");
14156			return -EINVAL;
14157		}
14158	} else {
14159		if (!btf_vmlinux) {
14160			verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
14161			return -EINVAL;
14162		}
14163		btf = btf_vmlinux;
14164		btf_get(btf);
14165	}
14166
14167	t = btf_type_by_id(btf, id);
14168	if (!t) {
14169		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
14170		err = -ENOENT;
14171		goto err_put;
14172	}
14173
14174	if (!btf_type_is_var(t)) {
14175		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
14176		err = -EINVAL;
14177		goto err_put;
14178	}
14179
14180	sym_name = btf_name_by_offset(btf, t->name_off);
14181	addr = kallsyms_lookup_name(sym_name);
14182	if (!addr) {
14183		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
14184			sym_name);
14185		err = -ENOENT;
14186		goto err_put;
14187	}
14188
14189	datasec_id = find_btf_percpu_datasec(btf);
14190	if (datasec_id > 0) {
14191		datasec = btf_type_by_id(btf, datasec_id);
14192		for_each_vsi(i, datasec, vsi) {
14193			if (vsi->type == id) {
14194				percpu = true;
14195				break;
14196			}
14197		}
14198	}
14199
14200	insn[0].imm = (u32)addr;
14201	insn[1].imm = addr >> 32;
14202
14203	type = t->type;
14204	t = btf_type_skip_modifiers(btf, type, NULL);
14205	if (percpu) {
14206		aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
14207		aux->btf_var.btf = btf;
14208		aux->btf_var.btf_id = type;
14209	} else if (!btf_type_is_struct(t)) {
14210		const struct btf_type *ret;
14211		const char *tname;
14212		u32 tsize;
14213
14214		/* resolve the type size of ksym. */
14215		ret = btf_resolve_size(btf, t, &tsize);
14216		if (IS_ERR(ret)) {
14217			tname = btf_name_by_offset(btf, t->name_off);
14218			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
14219				tname, PTR_ERR(ret));
14220			err = -EINVAL;
14221			goto err_put;
14222		}
14223		aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
14224		aux->btf_var.mem_size = tsize;
14225	} else {
14226		aux->btf_var.reg_type = PTR_TO_BTF_ID;
14227		aux->btf_var.btf = btf;
14228		aux->btf_var.btf_id = type;
14229	}
14230
14231	/* check whether we recorded this BTF (and maybe module) already */
14232	for (i = 0; i < env->used_btf_cnt; i++) {
14233		if (env->used_btfs[i].btf == btf) {
14234			btf_put(btf);
14235			return 0;
14236		}
14237	}
14238
14239	if (env->used_btf_cnt >= MAX_USED_BTFS) {
14240		err = -E2BIG;
14241		goto err_put;
14242	}
14243
14244	btf_mod = &env->used_btfs[env->used_btf_cnt];
14245	btf_mod->btf = btf;
14246	btf_mod->module = NULL;
14247
14248	/* if we reference variables from kernel module, bump its refcount */
14249	if (btf_is_module(btf)) {
14250		btf_mod->module = btf_try_get_module(btf);
14251		if (!btf_mod->module) {
14252			err = -ENXIO;
14253			goto err_put;
14254		}
14255	}
14256
14257	env->used_btf_cnt++;
14258
14259	return 0;
14260err_put:
14261	btf_put(btf);
14262	return err;
14263}
14264
14265static bool is_tracing_prog_type(enum bpf_prog_type type)
14266{
14267	switch (type) {
14268	case BPF_PROG_TYPE_KPROBE:
14269	case BPF_PROG_TYPE_TRACEPOINT:
14270	case BPF_PROG_TYPE_PERF_EVENT:
14271	case BPF_PROG_TYPE_RAW_TRACEPOINT:
14272	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
14273		return true;
14274	default:
14275		return false;
14276	}
14277}
14278
14279static int check_map_prog_compatibility(struct bpf_verifier_env *env,
14280					struct bpf_map *map,
14281					struct bpf_prog *prog)
14282
14283{
14284	enum bpf_prog_type prog_type = resolve_prog_type(prog);
14285
14286	if (btf_record_has_field(map->record, BPF_LIST_HEAD)) {
14287		if (is_tracing_prog_type(prog_type)) {
14288			verbose(env, "tracing progs cannot use bpf_list_head yet\n");
14289			return -EINVAL;
14290		}
14291	}
14292
14293	if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
14294		if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
14295			verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
14296			return -EINVAL;
14297		}
14298
14299		if (is_tracing_prog_type(prog_type)) {
14300			verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
14301			return -EINVAL;
14302		}
14303
14304		if (prog->aux->sleepable) {
14305			verbose(env, "sleepable progs cannot use bpf_spin_lock yet\n");
14306			return -EINVAL;
14307		}
14308	}
14309
14310	if (btf_record_has_field(map->record, BPF_TIMER)) {
14311		if (is_tracing_prog_type(prog_type)) {
14312			verbose(env, "tracing progs cannot use bpf_timer yet\n");
14313			return -EINVAL;
14314		}
14315	}
14316
14317	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
14318	    !bpf_offload_prog_map_match(prog, map)) {
14319		verbose(env, "offload device mismatch between prog and map\n");
14320		return -EINVAL;
14321	}
14322
14323	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
14324		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
14325		return -EINVAL;
14326	}
14327
14328	if (prog->aux->sleepable)
14329		switch (map->map_type) {
14330		case BPF_MAP_TYPE_HASH:
14331		case BPF_MAP_TYPE_LRU_HASH:
14332		case BPF_MAP_TYPE_ARRAY:
14333		case BPF_MAP_TYPE_PERCPU_HASH:
14334		case BPF_MAP_TYPE_PERCPU_ARRAY:
14335		case BPF_MAP_TYPE_LRU_PERCPU_HASH:
14336		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
14337		case BPF_MAP_TYPE_HASH_OF_MAPS:
14338		case BPF_MAP_TYPE_RINGBUF:
14339		case BPF_MAP_TYPE_USER_RINGBUF:
14340		case BPF_MAP_TYPE_INODE_STORAGE:
14341		case BPF_MAP_TYPE_SK_STORAGE:
14342		case BPF_MAP_TYPE_TASK_STORAGE:
14343		case BPF_MAP_TYPE_CGRP_STORAGE:
14344			break;
14345		default:
14346			verbose(env,
14347				"Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
14348			return -EINVAL;
14349		}
14350
14351	return 0;
14352}
14353
14354static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
14355{
14356	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
14357		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
14358}
14359
14360/* find and rewrite pseudo imm in ld_imm64 instructions:
14361 *
14362 * 1. if it accesses map FD, replace it with actual map pointer.
14363 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
14364 *
14365 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
14366 */
14367static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
14368{
14369	struct bpf_insn *insn = env->prog->insnsi;
14370	int insn_cnt = env->prog->len;
14371	int i, j, err;
14372
14373	err = bpf_prog_calc_tag(env->prog);
14374	if (err)
14375		return err;
14376
14377	for (i = 0; i < insn_cnt; i++, insn++) {
14378		if (BPF_CLASS(insn->code) == BPF_LDX &&
14379		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
14380			verbose(env, "BPF_LDX uses reserved fields\n");
14381			return -EINVAL;
14382		}
14383
14384		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
14385			struct bpf_insn_aux_data *aux;
14386			struct bpf_map *map;
14387			struct fd f;
14388			u64 addr;
14389			u32 fd;
14390
14391			if (i == insn_cnt - 1 || insn[1].code != 0 ||
14392			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
14393			    insn[1].off != 0) {
14394				verbose(env, "invalid bpf_ld_imm64 insn\n");
14395				return -EINVAL;
14396			}
14397
14398			if (insn[0].src_reg == 0)
14399				/* valid generic load 64-bit imm */
14400				goto next_insn;
14401
14402			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
14403				aux = &env->insn_aux_data[i];
14404				err = check_pseudo_btf_id(env, insn, aux);
14405				if (err)
14406					return err;
14407				goto next_insn;
14408			}
14409
14410			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
14411				aux = &env->insn_aux_data[i];
14412				aux->ptr_type = PTR_TO_FUNC;
14413				goto next_insn;
14414			}
14415
14416			/* In final convert_pseudo_ld_imm64() step, this is
14417			 * converted into regular 64-bit imm load insn.
14418			 */
14419			switch (insn[0].src_reg) {
14420			case BPF_PSEUDO_MAP_VALUE:
14421			case BPF_PSEUDO_MAP_IDX_VALUE:
14422				break;
14423			case BPF_PSEUDO_MAP_FD:
14424			case BPF_PSEUDO_MAP_IDX:
14425				if (insn[1].imm == 0)
14426					break;
14427				fallthrough;
14428			default:
14429				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
14430				return -EINVAL;
14431			}
14432
14433			switch (insn[0].src_reg) {
14434			case BPF_PSEUDO_MAP_IDX_VALUE:
14435			case BPF_PSEUDO_MAP_IDX:
14436				if (bpfptr_is_null(env->fd_array)) {
14437					verbose(env, "fd_idx without fd_array is invalid\n");
14438					return -EPROTO;
14439				}
14440				if (copy_from_bpfptr_offset(&fd, env->fd_array,
14441							    insn[0].imm * sizeof(fd),
14442							    sizeof(fd)))
14443					return -EFAULT;
14444				break;
14445			default:
14446				fd = insn[0].imm;
14447				break;
14448			}
14449
14450			f = fdget(fd);
14451			map = __bpf_map_get(f);
14452			if (IS_ERR(map)) {
14453				verbose(env, "fd %d is not pointing to valid bpf_map\n",
14454					insn[0].imm);
14455				return PTR_ERR(map);
14456			}
14457
14458			err = check_map_prog_compatibility(env, map, env->prog);
14459			if (err) {
14460				fdput(f);
14461				return err;
14462			}
14463
14464			aux = &env->insn_aux_data[i];
14465			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
14466			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
14467				addr = (unsigned long)map;
14468			} else {
14469				u32 off = insn[1].imm;
14470
14471				if (off >= BPF_MAX_VAR_OFF) {
14472					verbose(env, "direct value offset of %u is not allowed\n", off);
14473					fdput(f);
14474					return -EINVAL;
14475				}
14476
14477				if (!map->ops->map_direct_value_addr) {
14478					verbose(env, "no direct value access support for this map type\n");
14479					fdput(f);
14480					return -EINVAL;
14481				}
14482
14483				err = map->ops->map_direct_value_addr(map, &addr, off);
14484				if (err) {
14485					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
14486						map->value_size, off);
14487					fdput(f);
14488					return err;
14489				}
14490
14491				aux->map_off = off;
14492				addr += off;
14493			}
14494
14495			insn[0].imm = (u32)addr;
14496			insn[1].imm = addr >> 32;
14497
14498			/* check whether we recorded this map already */
14499			for (j = 0; j < env->used_map_cnt; j++) {
14500				if (env->used_maps[j] == map) {
14501					aux->map_index = j;
14502					fdput(f);
14503					goto next_insn;
14504				}
14505			}
14506
14507			if (env->used_map_cnt >= MAX_USED_MAPS) {
14508				fdput(f);
14509				return -E2BIG;
14510			}
14511
14512			/* hold the map. If the program is rejected by verifier,
14513			 * the map will be released by release_maps() or it
14514			 * will be used by the valid program until it's unloaded
14515			 * and all maps are released in free_used_maps()
14516			 */
14517			bpf_map_inc(map);
14518
14519			aux->map_index = env->used_map_cnt;
14520			env->used_maps[env->used_map_cnt++] = map;
14521
14522			if (bpf_map_is_cgroup_storage(map) &&
14523			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
14524				verbose(env, "only one cgroup storage of each type is allowed\n");
14525				fdput(f);
14526				return -EBUSY;
14527			}
14528
14529			fdput(f);
14530next_insn:
14531			insn++;
14532			i++;
14533			continue;
14534		}
14535
14536		/* Basic sanity check before we invest more work here. */
14537		if (!bpf_opcode_in_insntable(insn->code)) {
14538			verbose(env, "unknown opcode %02x\n", insn->code);
14539			return -EINVAL;
14540		}
14541	}
14542
14543	/* now all pseudo BPF_LD_IMM64 instructions load valid
14544	 * 'struct bpf_map *' into a register instead of user map_fd.
14545	 * These pointers will be used later by verifier to validate map access.
14546	 */
14547	return 0;
14548}
14549
14550/* drop refcnt of maps used by the rejected program */
14551static void release_maps(struct bpf_verifier_env *env)
14552{
14553	__bpf_free_used_maps(env->prog->aux, env->used_maps,
14554			     env->used_map_cnt);
14555}
14556
14557/* drop refcnt of maps used by the rejected program */
14558static void release_btfs(struct bpf_verifier_env *env)
14559{
14560	__bpf_free_used_btfs(env->prog->aux, env->used_btfs,
14561			     env->used_btf_cnt);
14562}
14563
14564/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
14565static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
14566{
14567	struct bpf_insn *insn = env->prog->insnsi;
14568	int insn_cnt = env->prog->len;
14569	int i;
14570
14571	for (i = 0; i < insn_cnt; i++, insn++) {
14572		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
14573			continue;
14574		if (insn->src_reg == BPF_PSEUDO_FUNC)
14575			continue;
14576		insn->src_reg = 0;
14577	}
14578}
14579
14580/* single env->prog->insni[off] instruction was replaced with the range
14581 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
14582 * [0, off) and [off, end) to new locations, so the patched range stays zero
14583 */
14584static void adjust_insn_aux_data(struct bpf_verifier_env *env,
14585				 struct bpf_insn_aux_data *new_data,
14586				 struct bpf_prog *new_prog, u32 off, u32 cnt)
14587{
14588	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
14589	struct bpf_insn *insn = new_prog->insnsi;
14590	u32 old_seen = old_data[off].seen;
14591	u32 prog_len;
14592	int i;
14593
14594	/* aux info at OFF always needs adjustment, no matter fast path
14595	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
14596	 * original insn at old prog.
14597	 */
14598	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
14599
14600	if (cnt == 1)
14601		return;
14602	prog_len = new_prog->len;
14603
14604	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
14605	memcpy(new_data + off + cnt - 1, old_data + off,
14606	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
14607	for (i = off; i < off + cnt - 1; i++) {
14608		/* Expand insni[off]'s seen count to the patched range. */
14609		new_data[i].seen = old_seen;
14610		new_data[i].zext_dst = insn_has_def32(env, insn + i);
14611	}
14612	env->insn_aux_data = new_data;
14613	vfree(old_data);
14614}
14615
14616static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
14617{
14618	int i;
14619
14620	if (len == 1)
14621		return;
14622	/* NOTE: fake 'exit' subprog should be updated as well. */
14623	for (i = 0; i <= env->subprog_cnt; i++) {
14624		if (env->subprog_info[i].start <= off)
14625			continue;
14626		env->subprog_info[i].start += len - 1;
14627	}
14628}
14629
14630static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
14631{
14632	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
14633	int i, sz = prog->aux->size_poke_tab;
14634	struct bpf_jit_poke_descriptor *desc;
14635
14636	for (i = 0; i < sz; i++) {
14637		desc = &tab[i];
14638		if (desc->insn_idx <= off)
14639			continue;
14640		desc->insn_idx += len - 1;
14641	}
14642}
14643
14644static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
14645					    const struct bpf_insn *patch, u32 len)
14646{
14647	struct bpf_prog *new_prog;
14648	struct bpf_insn_aux_data *new_data = NULL;
14649
14650	if (len > 1) {
14651		new_data = vzalloc(array_size(env->prog->len + len - 1,
14652					      sizeof(struct bpf_insn_aux_data)));
14653		if (!new_data)
14654			return NULL;
14655	}
14656
14657	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
14658	if (IS_ERR(new_prog)) {
14659		if (PTR_ERR(new_prog) == -ERANGE)
14660			verbose(env,
14661				"insn %d cannot be patched due to 16-bit range\n",
14662				env->insn_aux_data[off].orig_idx);
14663		vfree(new_data);
14664		return NULL;
14665	}
14666	adjust_insn_aux_data(env, new_data, new_prog, off, len);
14667	adjust_subprog_starts(env, off, len);
14668	adjust_poke_descs(new_prog, off, len);
14669	return new_prog;
14670}
14671
14672static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
14673					      u32 off, u32 cnt)
14674{
14675	int i, j;
14676
14677	/* find first prog starting at or after off (first to remove) */
14678	for (i = 0; i < env->subprog_cnt; i++)
14679		if (env->subprog_info[i].start >= off)
14680			break;
14681	/* find first prog starting at or after off + cnt (first to stay) */
14682	for (j = i; j < env->subprog_cnt; j++)
14683		if (env->subprog_info[j].start >= off + cnt)
14684			break;
14685	/* if j doesn't start exactly at off + cnt, we are just removing
14686	 * the front of previous prog
14687	 */
14688	if (env->subprog_info[j].start != off + cnt)
14689		j--;
14690
14691	if (j > i) {
14692		struct bpf_prog_aux *aux = env->prog->aux;
14693		int move;
14694
14695		/* move fake 'exit' subprog as well */
14696		move = env->subprog_cnt + 1 - j;
14697
14698		memmove(env->subprog_info + i,
14699			env->subprog_info + j,
14700			sizeof(*env->subprog_info) * move);
14701		env->subprog_cnt -= j - i;
14702
14703		/* remove func_info */
14704		if (aux->func_info) {
14705			move = aux->func_info_cnt - j;
14706
14707			memmove(aux->func_info + i,
14708				aux->func_info + j,
14709				sizeof(*aux->func_info) * move);
14710			aux->func_info_cnt -= j - i;
14711			/* func_info->insn_off is set after all code rewrites,
14712			 * in adjust_btf_func() - no need to adjust
14713			 */
14714		}
14715	} else {
14716		/* convert i from "first prog to remove" to "first to adjust" */
14717		if (env->subprog_info[i].start == off)
14718			i++;
14719	}
14720
14721	/* update fake 'exit' subprog as well */
14722	for (; i <= env->subprog_cnt; i++)
14723		env->subprog_info[i].start -= cnt;
14724
14725	return 0;
14726}
14727
14728static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
14729				      u32 cnt)
14730{
14731	struct bpf_prog *prog = env->prog;
14732	u32 i, l_off, l_cnt, nr_linfo;
14733	struct bpf_line_info *linfo;
14734
14735	nr_linfo = prog->aux->nr_linfo;
14736	if (!nr_linfo)
14737		return 0;
14738
14739	linfo = prog->aux->linfo;
14740
14741	/* find first line info to remove, count lines to be removed */
14742	for (i = 0; i < nr_linfo; i++)
14743		if (linfo[i].insn_off >= off)
14744			break;
14745
14746	l_off = i;
14747	l_cnt = 0;
14748	for (; i < nr_linfo; i++)
14749		if (linfo[i].insn_off < off + cnt)
14750			l_cnt++;
14751		else
14752			break;
14753
14754	/* First live insn doesn't match first live linfo, it needs to "inherit"
14755	 * last removed linfo.  prog is already modified, so prog->len == off
14756	 * means no live instructions after (tail of the program was removed).
14757	 */
14758	if (prog->len != off && l_cnt &&
14759	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
14760		l_cnt--;
14761		linfo[--i].insn_off = off + cnt;
14762	}
14763
14764	/* remove the line info which refer to the removed instructions */
14765	if (l_cnt) {
14766		memmove(linfo + l_off, linfo + i,
14767			sizeof(*linfo) * (nr_linfo - i));
14768
14769		prog->aux->nr_linfo -= l_cnt;
14770		nr_linfo = prog->aux->nr_linfo;
14771	}
14772
14773	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
14774	for (i = l_off; i < nr_linfo; i++)
14775		linfo[i].insn_off -= cnt;
14776
14777	/* fix up all subprogs (incl. 'exit') which start >= off */
14778	for (i = 0; i <= env->subprog_cnt; i++)
14779		if (env->subprog_info[i].linfo_idx > l_off) {
14780			/* program may have started in the removed region but
14781			 * may not be fully removed
14782			 */
14783			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
14784				env->subprog_info[i].linfo_idx -= l_cnt;
14785			else
14786				env->subprog_info[i].linfo_idx = l_off;
14787		}
14788
14789	return 0;
14790}
14791
14792static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
14793{
14794	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14795	unsigned int orig_prog_len = env->prog->len;
14796	int err;
14797
14798	if (bpf_prog_is_dev_bound(env->prog->aux))
14799		bpf_prog_offload_remove_insns(env, off, cnt);
14800
14801	err = bpf_remove_insns(env->prog, off, cnt);
14802	if (err)
14803		return err;
14804
14805	err = adjust_subprog_starts_after_remove(env, off, cnt);
14806	if (err)
14807		return err;
14808
14809	err = bpf_adj_linfo_after_remove(env, off, cnt);
14810	if (err)
14811		return err;
14812
14813	memmove(aux_data + off,	aux_data + off + cnt,
14814		sizeof(*aux_data) * (orig_prog_len - off - cnt));
14815
14816	return 0;
14817}
14818
14819/* The verifier does more data flow analysis than llvm and will not
14820 * explore branches that are dead at run time. Malicious programs can
14821 * have dead code too. Therefore replace all dead at-run-time code
14822 * with 'ja -1'.
14823 *
14824 * Just nops are not optimal, e.g. if they would sit at the end of the
14825 * program and through another bug we would manage to jump there, then
14826 * we'd execute beyond program memory otherwise. Returning exception
14827 * code also wouldn't work since we can have subprogs where the dead
14828 * code could be located.
14829 */
14830static void sanitize_dead_code(struct bpf_verifier_env *env)
14831{
14832	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14833	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
14834	struct bpf_insn *insn = env->prog->insnsi;
14835	const int insn_cnt = env->prog->len;
14836	int i;
14837
14838	for (i = 0; i < insn_cnt; i++) {
14839		if (aux_data[i].seen)
14840			continue;
14841		memcpy(insn + i, &trap, sizeof(trap));
14842		aux_data[i].zext_dst = false;
14843	}
14844}
14845
14846static bool insn_is_cond_jump(u8 code)
14847{
14848	u8 op;
14849
14850	if (BPF_CLASS(code) == BPF_JMP32)
14851		return true;
14852
14853	if (BPF_CLASS(code) != BPF_JMP)
14854		return false;
14855
14856	op = BPF_OP(code);
14857	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
14858}
14859
14860static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
14861{
14862	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14863	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
14864	struct bpf_insn *insn = env->prog->insnsi;
14865	const int insn_cnt = env->prog->len;
14866	int i;
14867
14868	for (i = 0; i < insn_cnt; i++, insn++) {
14869		if (!insn_is_cond_jump(insn->code))
14870			continue;
14871
14872		if (!aux_data[i + 1].seen)
14873			ja.off = insn->off;
14874		else if (!aux_data[i + 1 + insn->off].seen)
14875			ja.off = 0;
14876		else
14877			continue;
14878
14879		if (bpf_prog_is_dev_bound(env->prog->aux))
14880			bpf_prog_offload_replace_insn(env, i, &ja);
14881
14882		memcpy(insn, &ja, sizeof(ja));
14883	}
14884}
14885
14886static int opt_remove_dead_code(struct bpf_verifier_env *env)
14887{
14888	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
14889	int insn_cnt = env->prog->len;
14890	int i, err;
14891
14892	for (i = 0; i < insn_cnt; i++) {
14893		int j;
14894
14895		j = 0;
14896		while (i + j < insn_cnt && !aux_data[i + j].seen)
14897			j++;
14898		if (!j)
14899			continue;
14900
14901		err = verifier_remove_insns(env, i, j);
14902		if (err)
14903			return err;
14904		insn_cnt = env->prog->len;
14905	}
14906
14907	return 0;
14908}
14909
14910static int opt_remove_nops(struct bpf_verifier_env *env)
14911{
14912	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
14913	struct bpf_insn *insn = env->prog->insnsi;
14914	int insn_cnt = env->prog->len;
14915	int i, err;
14916
14917	for (i = 0; i < insn_cnt; i++) {
14918		if (memcmp(&insn[i], &ja, sizeof(ja)))
14919			continue;
14920
14921		err = verifier_remove_insns(env, i, 1);
14922		if (err)
14923			return err;
14924		insn_cnt--;
14925		i--;
14926	}
14927
14928	return 0;
14929}
14930
14931static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
14932					 const union bpf_attr *attr)
14933{
14934	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
14935	struct bpf_insn_aux_data *aux = env->insn_aux_data;
14936	int i, patch_len, delta = 0, len = env->prog->len;
14937	struct bpf_insn *insns = env->prog->insnsi;
14938	struct bpf_prog *new_prog;
14939	bool rnd_hi32;
14940
14941	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
14942	zext_patch[1] = BPF_ZEXT_REG(0);
14943	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
14944	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
14945	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
14946	for (i = 0; i < len; i++) {
14947		int adj_idx = i + delta;
14948		struct bpf_insn insn;
14949		int load_reg;
14950
14951		insn = insns[adj_idx];
14952		load_reg = insn_def_regno(&insn);
14953		if (!aux[adj_idx].zext_dst) {
14954			u8 code, class;
14955			u32 imm_rnd;
14956
14957			if (!rnd_hi32)
14958				continue;
14959
14960			code = insn.code;
14961			class = BPF_CLASS(code);
14962			if (load_reg == -1)
14963				continue;
14964
14965			/* NOTE: arg "reg" (the fourth one) is only used for
14966			 *       BPF_STX + SRC_OP, so it is safe to pass NULL
14967			 *       here.
14968			 */
14969			if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
14970				if (class == BPF_LD &&
14971				    BPF_MODE(code) == BPF_IMM)
14972					i++;
14973				continue;
14974			}
14975
14976			/* ctx load could be transformed into wider load. */
14977			if (class == BPF_LDX &&
14978			    aux[adj_idx].ptr_type == PTR_TO_CTX)
14979				continue;
14980
14981			imm_rnd = get_random_u32();
14982			rnd_hi32_patch[0] = insn;
14983			rnd_hi32_patch[1].imm = imm_rnd;
14984			rnd_hi32_patch[3].dst_reg = load_reg;
14985			patch = rnd_hi32_patch;
14986			patch_len = 4;
14987			goto apply_patch_buffer;
14988		}
14989
14990		/* Add in an zero-extend instruction if a) the JIT has requested
14991		 * it or b) it's a CMPXCHG.
14992		 *
14993		 * The latter is because: BPF_CMPXCHG always loads a value into
14994		 * R0, therefore always zero-extends. However some archs'
14995		 * equivalent instruction only does this load when the
14996		 * comparison is successful. This detail of CMPXCHG is
14997		 * orthogonal to the general zero-extension behaviour of the
14998		 * CPU, so it's treated independently of bpf_jit_needs_zext.
14999		 */
15000		if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
15001			continue;
15002
15003		/* Zero-extension is done by the caller. */
15004		if (bpf_pseudo_kfunc_call(&insn))
15005			continue;
15006
15007		if (WARN_ON(load_reg == -1)) {
15008			verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
15009			return -EFAULT;
15010		}
15011
15012		zext_patch[0] = insn;
15013		zext_patch[1].dst_reg = load_reg;
15014		zext_patch[1].src_reg = load_reg;
15015		patch = zext_patch;
15016		patch_len = 2;
15017apply_patch_buffer:
15018		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
15019		if (!new_prog)
15020			return -ENOMEM;
15021		env->prog = new_prog;
15022		insns = new_prog->insnsi;
15023		aux = env->insn_aux_data;
15024		delta += patch_len - 1;
15025	}
15026
15027	return 0;
15028}
15029
15030/* convert load instructions that access fields of a context type into a
15031 * sequence of instructions that access fields of the underlying structure:
15032 *     struct __sk_buff    -> struct sk_buff
15033 *     struct bpf_sock_ops -> struct sock
15034 */
15035static int convert_ctx_accesses(struct bpf_verifier_env *env)
15036{
15037	const struct bpf_verifier_ops *ops = env->ops;
15038	int i, cnt, size, ctx_field_size, delta = 0;
15039	const int insn_cnt = env->prog->len;
15040	struct bpf_insn insn_buf[16], *insn;
15041	u32 target_size, size_default, off;
15042	struct bpf_prog *new_prog;
15043	enum bpf_access_type type;
15044	bool is_narrower_load;
15045
15046	if (ops->gen_prologue || env->seen_direct_write) {
15047		if (!ops->gen_prologue) {
15048			verbose(env, "bpf verifier is misconfigured\n");
15049			return -EINVAL;
15050		}
15051		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
15052					env->prog);
15053		if (cnt >= ARRAY_SIZE(insn_buf)) {
15054			verbose(env, "bpf verifier is misconfigured\n");
15055			return -EINVAL;
15056		} else if (cnt) {
15057			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
15058			if (!new_prog)
15059				return -ENOMEM;
15060
15061			env->prog = new_prog;
15062			delta += cnt - 1;
15063		}
15064	}
15065
15066	if (bpf_prog_is_dev_bound(env->prog->aux))
15067		return 0;
15068
15069	insn = env->prog->insnsi + delta;
15070
15071	for (i = 0; i < insn_cnt; i++, insn++) {
15072		bpf_convert_ctx_access_t convert_ctx_access;
15073		bool ctx_access;
15074
15075		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
15076		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
15077		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
15078		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
15079			type = BPF_READ;
15080			ctx_access = true;
15081		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
15082			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
15083			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
15084			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
15085			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
15086			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
15087			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
15088			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
15089			type = BPF_WRITE;
15090			ctx_access = BPF_CLASS(insn->code) == BPF_STX;
15091		} else {
15092			continue;
15093		}
15094
15095		if (type == BPF_WRITE &&
15096		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
15097			struct bpf_insn patch[] = {
15098				*insn,
15099				BPF_ST_NOSPEC(),
15100			};
15101
15102			cnt = ARRAY_SIZE(patch);
15103			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
15104			if (!new_prog)
15105				return -ENOMEM;
15106
15107			delta    += cnt - 1;
15108			env->prog = new_prog;
15109			insn      = new_prog->insnsi + i + delta;
15110			continue;
15111		}
15112
15113		if (!ctx_access)
15114			continue;
15115
15116		switch ((int)env->insn_aux_data[i + delta].ptr_type) {
15117		case PTR_TO_CTX:
15118			if (!ops->convert_ctx_access)
15119				continue;
15120			convert_ctx_access = ops->convert_ctx_access;
15121			break;
15122		case PTR_TO_SOCKET:
15123		case PTR_TO_SOCK_COMMON:
15124			convert_ctx_access = bpf_sock_convert_ctx_access;
15125			break;
15126		case PTR_TO_TCP_SOCK:
15127			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
15128			break;
15129		case PTR_TO_XDP_SOCK:
15130			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
15131			break;
15132		case PTR_TO_BTF_ID:
15133		case PTR_TO_BTF_ID | PTR_UNTRUSTED:
15134		/* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
15135		 * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
15136		 * be said once it is marked PTR_UNTRUSTED, hence we must handle
15137		 * any faults for loads into such types. BPF_WRITE is disallowed
15138		 * for this case.
15139		 */
15140		case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
15141			if (type == BPF_READ) {
15142				insn->code = BPF_LDX | BPF_PROBE_MEM |
15143					BPF_SIZE((insn)->code);
15144				env->prog->aux->num_exentries++;
15145			}
15146			continue;
15147		default:
15148			continue;
15149		}
15150
15151		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
15152		size = BPF_LDST_BYTES(insn);
15153
15154		/* If the read access is a narrower load of the field,
15155		 * convert to a 4/8-byte load, to minimum program type specific
15156		 * convert_ctx_access changes. If conversion is successful,
15157		 * we will apply proper mask to the result.
15158		 */
15159		is_narrower_load = size < ctx_field_size;
15160		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
15161		off = insn->off;
15162		if (is_narrower_load) {
15163			u8 size_code;
15164
15165			if (type == BPF_WRITE) {
15166				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
15167				return -EINVAL;
15168			}
15169
15170			size_code = BPF_H;
15171			if (ctx_field_size == 4)
15172				size_code = BPF_W;
15173			else if (ctx_field_size == 8)
15174				size_code = BPF_DW;
15175
15176			insn->off = off & ~(size_default - 1);
15177			insn->code = BPF_LDX | BPF_MEM | size_code;
15178		}
15179
15180		target_size = 0;
15181		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
15182					 &target_size);
15183		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
15184		    (ctx_field_size && !target_size)) {
15185			verbose(env, "bpf verifier is misconfigured\n");
15186			return -EINVAL;
15187		}
15188
15189		if (is_narrower_load && size < target_size) {
15190			u8 shift = bpf_ctx_narrow_access_offset(
15191				off, size, size_default) * 8;
15192			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
15193				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
15194				return -EINVAL;
15195			}
15196			if (ctx_field_size <= 4) {
15197				if (shift)
15198					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
15199									insn->dst_reg,
15200									shift);
15201				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
15202								(1 << size * 8) - 1);
15203			} else {
15204				if (shift)
15205					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
15206									insn->dst_reg,
15207									shift);
15208				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
15209								(1ULL << size * 8) - 1);
15210			}
15211		}
15212
15213		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15214		if (!new_prog)
15215			return -ENOMEM;
15216
15217		delta += cnt - 1;
15218
15219		/* keep walking new program and skip insns we just inserted */
15220		env->prog = new_prog;
15221		insn      = new_prog->insnsi + i + delta;
15222	}
15223
15224	return 0;
15225}
15226
15227static int jit_subprogs(struct bpf_verifier_env *env)
15228{
15229	struct bpf_prog *prog = env->prog, **func, *tmp;
15230	int i, j, subprog_start, subprog_end = 0, len, subprog;
15231	struct bpf_map *map_ptr;
15232	struct bpf_insn *insn;
15233	void *old_bpf_func;
15234	int err, num_exentries;
15235
15236	if (env->subprog_cnt <= 1)
15237		return 0;
15238
15239	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
15240		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
15241			continue;
15242
15243		/* Upon error here we cannot fall back to interpreter but
15244		 * need a hard reject of the program. Thus -EFAULT is
15245		 * propagated in any case.
15246		 */
15247		subprog = find_subprog(env, i + insn->imm + 1);
15248		if (subprog < 0) {
15249			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
15250				  i + insn->imm + 1);
15251			return -EFAULT;
15252		}
15253		/* temporarily remember subprog id inside insn instead of
15254		 * aux_data, since next loop will split up all insns into funcs
15255		 */
15256		insn->off = subprog;
15257		/* remember original imm in case JIT fails and fallback
15258		 * to interpreter will be needed
15259		 */
15260		env->insn_aux_data[i].call_imm = insn->imm;
15261		/* point imm to __bpf_call_base+1 from JITs point of view */
15262		insn->imm = 1;
15263		if (bpf_pseudo_func(insn))
15264			/* jit (e.g. x86_64) may emit fewer instructions
15265			 * if it learns a u32 imm is the same as a u64 imm.
15266			 * Force a non zero here.
15267			 */
15268			insn[1].imm = 1;
15269	}
15270
15271	err = bpf_prog_alloc_jited_linfo(prog);
15272	if (err)
15273		goto out_undo_insn;
15274
15275	err = -ENOMEM;
15276	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
15277	if (!func)
15278		goto out_undo_insn;
15279
15280	for (i = 0; i < env->subprog_cnt; i++) {
15281		subprog_start = subprog_end;
15282		subprog_end = env->subprog_info[i + 1].start;
15283
15284		len = subprog_end - subprog_start;
15285		/* bpf_prog_run() doesn't call subprogs directly,
15286		 * hence main prog stats include the runtime of subprogs.
15287		 * subprogs don't have IDs and not reachable via prog_get_next_id
15288		 * func[i]->stats will never be accessed and stays NULL
15289		 */
15290		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
15291		if (!func[i])
15292			goto out_free;
15293		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
15294		       len * sizeof(struct bpf_insn));
15295		func[i]->type = prog->type;
15296		func[i]->len = len;
15297		if (bpf_prog_calc_tag(func[i]))
15298			goto out_free;
15299		func[i]->is_func = 1;
15300		func[i]->aux->func_idx = i;
15301		/* Below members will be freed only at prog->aux */
15302		func[i]->aux->btf = prog->aux->btf;
15303		func[i]->aux->func_info = prog->aux->func_info;
15304		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
15305		func[i]->aux->poke_tab = prog->aux->poke_tab;
15306		func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
15307
15308		for (j = 0; j < prog->aux->size_poke_tab; j++) {
15309			struct bpf_jit_poke_descriptor *poke;
15310
15311			poke = &prog->aux->poke_tab[j];
15312			if (poke->insn_idx < subprog_end &&
15313			    poke->insn_idx >= subprog_start)
15314				poke->aux = func[i]->aux;
15315		}
15316
15317		func[i]->aux->name[0] = 'F';
15318		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
15319		func[i]->jit_requested = 1;
15320		func[i]->blinding_requested = prog->blinding_requested;
15321		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
15322		func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
15323		func[i]->aux->linfo = prog->aux->linfo;
15324		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
15325		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
15326		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
15327		num_exentries = 0;
15328		insn = func[i]->insnsi;
15329		for (j = 0; j < func[i]->len; j++, insn++) {
15330			if (BPF_CLASS(insn->code) == BPF_LDX &&
15331			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
15332				num_exentries++;
15333		}
15334		func[i]->aux->num_exentries = num_exentries;
15335		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
15336		func[i] = bpf_int_jit_compile(func[i]);
15337		if (!func[i]->jited) {
15338			err = -ENOTSUPP;
15339			goto out_free;
15340		}
15341		cond_resched();
15342	}
15343
15344	/* at this point all bpf functions were successfully JITed
15345	 * now populate all bpf_calls with correct addresses and
15346	 * run last pass of JIT
15347	 */
15348	for (i = 0; i < env->subprog_cnt; i++) {
15349		insn = func[i]->insnsi;
15350		for (j = 0; j < func[i]->len; j++, insn++) {
15351			if (bpf_pseudo_func(insn)) {
15352				subprog = insn->off;
15353				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
15354				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
15355				continue;
15356			}
15357			if (!bpf_pseudo_call(insn))
15358				continue;
15359			subprog = insn->off;
15360			insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
15361		}
15362
15363		/* we use the aux data to keep a list of the start addresses
15364		 * of the JITed images for each function in the program
15365		 *
15366		 * for some architectures, such as powerpc64, the imm field
15367		 * might not be large enough to hold the offset of the start
15368		 * address of the callee's JITed image from __bpf_call_base
15369		 *
15370		 * in such cases, we can lookup the start address of a callee
15371		 * by using its subprog id, available from the off field of
15372		 * the call instruction, as an index for this list
15373		 */
15374		func[i]->aux->func = func;
15375		func[i]->aux->func_cnt = env->subprog_cnt;
15376	}
15377	for (i = 0; i < env->subprog_cnt; i++) {
15378		old_bpf_func = func[i]->bpf_func;
15379		tmp = bpf_int_jit_compile(func[i]);
15380		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
15381			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
15382			err = -ENOTSUPP;
15383			goto out_free;
15384		}
15385		cond_resched();
15386	}
15387
15388	/* finally lock prog and jit images for all functions and
15389	 * populate kallsysm
15390	 */
15391	for (i = 0; i < env->subprog_cnt; i++) {
15392		bpf_prog_lock_ro(func[i]);
15393		bpf_prog_kallsyms_add(func[i]);
15394	}
15395
15396	/* Last step: make now unused interpreter insns from main
15397	 * prog consistent for later dump requests, so they can
15398	 * later look the same as if they were interpreted only.
15399	 */
15400	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
15401		if (bpf_pseudo_func(insn)) {
15402			insn[0].imm = env->insn_aux_data[i].call_imm;
15403			insn[1].imm = insn->off;
15404			insn->off = 0;
15405			continue;
15406		}
15407		if (!bpf_pseudo_call(insn))
15408			continue;
15409		insn->off = env->insn_aux_data[i].call_imm;
15410		subprog = find_subprog(env, i + insn->off + 1);
15411		insn->imm = subprog;
15412	}
15413
15414	prog->jited = 1;
15415	prog->bpf_func = func[0]->bpf_func;
15416	prog->jited_len = func[0]->jited_len;
15417	prog->aux->func = func;
15418	prog->aux->func_cnt = env->subprog_cnt;
15419	bpf_prog_jit_attempt_done(prog);
15420	return 0;
15421out_free:
15422	/* We failed JIT'ing, so at this point we need to unregister poke
15423	 * descriptors from subprogs, so that kernel is not attempting to
15424	 * patch it anymore as we're freeing the subprog JIT memory.
15425	 */
15426	for (i = 0; i < prog->aux->size_poke_tab; i++) {
15427		map_ptr = prog->aux->poke_tab[i].tail_call.map;
15428		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
15429	}
15430	/* At this point we're guaranteed that poke descriptors are not
15431	 * live anymore. We can just unlink its descriptor table as it's
15432	 * released with the main prog.
15433	 */
15434	for (i = 0; i < env->subprog_cnt; i++) {
15435		if (!func[i])
15436			continue;
15437		func[i]->aux->poke_tab = NULL;
15438		bpf_jit_free(func[i]);
15439	}
15440	kfree(func);
15441out_undo_insn:
15442	/* cleanup main prog to be interpreted */
15443	prog->jit_requested = 0;
15444	prog->blinding_requested = 0;
15445	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
15446		if (!bpf_pseudo_call(insn))
15447			continue;
15448		insn->off = 0;
15449		insn->imm = env->insn_aux_data[i].call_imm;
15450	}
15451	bpf_prog_jit_attempt_done(prog);
15452	return err;
15453}
15454
15455static int fixup_call_args(struct bpf_verifier_env *env)
15456{
15457#ifndef CONFIG_BPF_JIT_ALWAYS_ON
15458	struct bpf_prog *prog = env->prog;
15459	struct bpf_insn *insn = prog->insnsi;
15460	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
15461	int i, depth;
15462#endif
15463	int err = 0;
15464
15465	if (env->prog->jit_requested &&
15466	    !bpf_prog_is_dev_bound(env->prog->aux)) {
15467		err = jit_subprogs(env);
15468		if (err == 0)
15469			return 0;
15470		if (err == -EFAULT)
15471			return err;
15472	}
15473#ifndef CONFIG_BPF_JIT_ALWAYS_ON
15474	if (has_kfunc_call) {
15475		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
15476		return -EINVAL;
15477	}
15478	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
15479		/* When JIT fails the progs with bpf2bpf calls and tail_calls
15480		 * have to be rejected, since interpreter doesn't support them yet.
15481		 */
15482		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
15483		return -EINVAL;
15484	}
15485	for (i = 0; i < prog->len; i++, insn++) {
15486		if (bpf_pseudo_func(insn)) {
15487			/* When JIT fails the progs with callback calls
15488			 * have to be rejected, since interpreter doesn't support them yet.
15489			 */
15490			verbose(env, "callbacks are not allowed in non-JITed programs\n");
15491			return -EINVAL;
15492		}
15493
15494		if (!bpf_pseudo_call(insn))
15495			continue;
15496		depth = get_callee_stack_depth(env, insn, i);
15497		if (depth < 0)
15498			return depth;
15499		bpf_patch_call_args(insn, depth);
15500	}
15501	err = 0;
15502#endif
15503	return err;
15504}
15505
15506static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
15507			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
15508{
15509	const struct bpf_kfunc_desc *desc;
15510
15511	if (!insn->imm) {
15512		verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
15513		return -EINVAL;
15514	}
15515
15516	/* insn->imm has the btf func_id. Replace it with
15517	 * an address (relative to __bpf_call_base).
15518	 */
15519	desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
15520	if (!desc) {
15521		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
15522			insn->imm);
15523		return -EFAULT;
15524	}
15525
15526	*cnt = 0;
15527	insn->imm = desc->imm;
15528	if (insn->off)
15529		return 0;
15530	if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
15531		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
15532		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
15533		u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
15534
15535		insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
15536		insn_buf[1] = addr[0];
15537		insn_buf[2] = addr[1];
15538		insn_buf[3] = *insn;
15539		*cnt = 4;
15540	} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
15541		struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
15542		struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
15543
15544		insn_buf[0] = addr[0];
15545		insn_buf[1] = addr[1];
15546		insn_buf[2] = *insn;
15547		*cnt = 3;
15548	} else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
15549		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
15550		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
15551		*cnt = 1;
15552	}
15553	return 0;
15554}
15555
15556/* Do various post-verification rewrites in a single program pass.
15557 * These rewrites simplify JIT and interpreter implementations.
15558 */
15559static int do_misc_fixups(struct bpf_verifier_env *env)
15560{
15561	struct bpf_prog *prog = env->prog;
15562	enum bpf_attach_type eatype = prog->expected_attach_type;
15563	enum bpf_prog_type prog_type = resolve_prog_type(prog);
15564	struct bpf_insn *insn = prog->insnsi;
15565	const struct bpf_func_proto *fn;
15566	const int insn_cnt = prog->len;
15567	const struct bpf_map_ops *ops;
15568	struct bpf_insn_aux_data *aux;
15569	struct bpf_insn insn_buf[16];
15570	struct bpf_prog *new_prog;
15571	struct bpf_map *map_ptr;
15572	int i, ret, cnt, delta = 0;
15573
15574	for (i = 0; i < insn_cnt; i++, insn++) {
15575		/* Make divide-by-zero exceptions impossible. */
15576		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
15577		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
15578		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
15579		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
15580			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
15581			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
15582			struct bpf_insn *patchlet;
15583			struct bpf_insn chk_and_div[] = {
15584				/* [R,W]x div 0 -> 0 */
15585				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
15586					     BPF_JNE | BPF_K, insn->src_reg,
15587					     0, 2, 0),
15588				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
15589				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
15590				*insn,
15591			};
15592			struct bpf_insn chk_and_mod[] = {
15593				/* [R,W]x mod 0 -> [R,W]x */
15594				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
15595					     BPF_JEQ | BPF_K, insn->src_reg,
15596					     0, 1 + (is64 ? 0 : 1), 0),
15597				*insn,
15598				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
15599				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
15600			};
15601
15602			patchlet = isdiv ? chk_and_div : chk_and_mod;
15603			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
15604				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
15605
15606			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
15607			if (!new_prog)
15608				return -ENOMEM;
15609
15610			delta    += cnt - 1;
15611			env->prog = prog = new_prog;
15612			insn      = new_prog->insnsi + i + delta;
15613			continue;
15614		}
15615
15616		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
15617		if (BPF_CLASS(insn->code) == BPF_LD &&
15618		    (BPF_MODE(insn->code) == BPF_ABS ||
15619		     BPF_MODE(insn->code) == BPF_IND)) {
15620			cnt = env->ops->gen_ld_abs(insn, insn_buf);
15621			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
15622				verbose(env, "bpf verifier is misconfigured\n");
15623				return -EINVAL;
15624			}
15625
15626			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15627			if (!new_prog)
15628				return -ENOMEM;
15629
15630			delta    += cnt - 1;
15631			env->prog = prog = new_prog;
15632			insn      = new_prog->insnsi + i + delta;
15633			continue;
15634		}
15635
15636		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
15637		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
15638		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
15639			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
15640			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
15641			struct bpf_insn *patch = &insn_buf[0];
15642			bool issrc, isneg, isimm;
15643			u32 off_reg;
15644
15645			aux = &env->insn_aux_data[i + delta];
15646			if (!aux->alu_state ||
15647			    aux->alu_state == BPF_ALU_NON_POINTER)
15648				continue;
15649
15650			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
15651			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
15652				BPF_ALU_SANITIZE_SRC;
15653			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
15654
15655			off_reg = issrc ? insn->src_reg : insn->dst_reg;
15656			if (isimm) {
15657				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
15658			} else {
15659				if (isneg)
15660					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
15661				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
15662				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
15663				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
15664				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
15665				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
15666				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
15667			}
15668			if (!issrc)
15669				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
15670			insn->src_reg = BPF_REG_AX;
15671			if (isneg)
15672				insn->code = insn->code == code_add ?
15673					     code_sub : code_add;
15674			*patch++ = *insn;
15675			if (issrc && isneg && !isimm)
15676				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
15677			cnt = patch - insn_buf;
15678
15679			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15680			if (!new_prog)
15681				return -ENOMEM;
15682
15683			delta    += cnt - 1;
15684			env->prog = prog = new_prog;
15685			insn      = new_prog->insnsi + i + delta;
15686			continue;
15687		}
15688
15689		if (insn->code != (BPF_JMP | BPF_CALL))
15690			continue;
15691		if (insn->src_reg == BPF_PSEUDO_CALL)
15692			continue;
15693		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
15694			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
15695			if (ret)
15696				return ret;
15697			if (cnt == 0)
15698				continue;
15699
15700			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15701			if (!new_prog)
15702				return -ENOMEM;
15703
15704			delta	 += cnt - 1;
15705			env->prog = prog = new_prog;
15706			insn	  = new_prog->insnsi + i + delta;
15707			continue;
15708		}
15709
15710		if (insn->imm == BPF_FUNC_get_route_realm)
15711			prog->dst_needed = 1;
15712		if (insn->imm == BPF_FUNC_get_prandom_u32)
15713			bpf_user_rnd_init_once();
15714		if (insn->imm == BPF_FUNC_override_return)
15715			prog->kprobe_override = 1;
15716		if (insn->imm == BPF_FUNC_tail_call) {
15717			/* If we tail call into other programs, we
15718			 * cannot make any assumptions since they can
15719			 * be replaced dynamically during runtime in
15720			 * the program array.
15721			 */
15722			prog->cb_access = 1;
15723			if (!allow_tail_call_in_subprogs(env))
15724				prog->aux->stack_depth = MAX_BPF_STACK;
15725			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
15726
15727			/* mark bpf_tail_call as different opcode to avoid
15728			 * conditional branch in the interpreter for every normal
15729			 * call and to prevent accidental JITing by JIT compiler
15730			 * that doesn't support bpf_tail_call yet
15731			 */
15732			insn->imm = 0;
15733			insn->code = BPF_JMP | BPF_TAIL_CALL;
15734
15735			aux = &env->insn_aux_data[i + delta];
15736			if (env->bpf_capable && !prog->blinding_requested &&
15737			    prog->jit_requested &&
15738			    !bpf_map_key_poisoned(aux) &&
15739			    !bpf_map_ptr_poisoned(aux) &&
15740			    !bpf_map_ptr_unpriv(aux)) {
15741				struct bpf_jit_poke_descriptor desc = {
15742					.reason = BPF_POKE_REASON_TAIL_CALL,
15743					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
15744					.tail_call.key = bpf_map_key_immediate(aux),
15745					.insn_idx = i + delta,
15746				};
15747
15748				ret = bpf_jit_add_poke_descriptor(prog, &desc);
15749				if (ret < 0) {
15750					verbose(env, "adding tail call poke descriptor failed\n");
15751					return ret;
15752				}
15753
15754				insn->imm = ret + 1;
15755				continue;
15756			}
15757
15758			if (!bpf_map_ptr_unpriv(aux))
15759				continue;
15760
15761			/* instead of changing every JIT dealing with tail_call
15762			 * emit two extra insns:
15763			 * if (index >= max_entries) goto out;
15764			 * index &= array->index_mask;
15765			 * to avoid out-of-bounds cpu speculation
15766			 */
15767			if (bpf_map_ptr_poisoned(aux)) {
15768				verbose(env, "tail_call abusing map_ptr\n");
15769				return -EINVAL;
15770			}
15771
15772			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
15773			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
15774						  map_ptr->max_entries, 2);
15775			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
15776						    container_of(map_ptr,
15777								 struct bpf_array,
15778								 map)->index_mask);
15779			insn_buf[2] = *insn;
15780			cnt = 3;
15781			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15782			if (!new_prog)
15783				return -ENOMEM;
15784
15785			delta    += cnt - 1;
15786			env->prog = prog = new_prog;
15787			insn      = new_prog->insnsi + i + delta;
15788			continue;
15789		}
15790
15791		if (insn->imm == BPF_FUNC_timer_set_callback) {
15792			/* The verifier will process callback_fn as many times as necessary
15793			 * with different maps and the register states prepared by
15794			 * set_timer_callback_state will be accurate.
15795			 *
15796			 * The following use case is valid:
15797			 *   map1 is shared by prog1, prog2, prog3.
15798			 *   prog1 calls bpf_timer_init for some map1 elements
15799			 *   prog2 calls bpf_timer_set_callback for some map1 elements.
15800			 *     Those that were not bpf_timer_init-ed will return -EINVAL.
15801			 *   prog3 calls bpf_timer_start for some map1 elements.
15802			 *     Those that were not both bpf_timer_init-ed and
15803			 *     bpf_timer_set_callback-ed will return -EINVAL.
15804			 */
15805			struct bpf_insn ld_addrs[2] = {
15806				BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
15807			};
15808
15809			insn_buf[0] = ld_addrs[0];
15810			insn_buf[1] = ld_addrs[1];
15811			insn_buf[2] = *insn;
15812			cnt = 3;
15813
15814			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15815			if (!new_prog)
15816				return -ENOMEM;
15817
15818			delta    += cnt - 1;
15819			env->prog = prog = new_prog;
15820			insn      = new_prog->insnsi + i + delta;
15821			goto patch_call_imm;
15822		}
15823
15824		if (is_storage_get_function(insn->imm)) {
15825			if (!env->prog->aux->sleepable ||
15826			    env->insn_aux_data[i + delta].storage_get_func_atomic)
15827				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
15828			else
15829				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
15830			insn_buf[1] = *insn;
15831			cnt = 2;
15832
15833			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15834			if (!new_prog)
15835				return -ENOMEM;
15836
15837			delta += cnt - 1;
15838			env->prog = prog = new_prog;
15839			insn = new_prog->insnsi + i + delta;
15840			goto patch_call_imm;
15841		}
15842
15843		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
15844		 * and other inlining handlers are currently limited to 64 bit
15845		 * only.
15846		 */
15847		if (prog->jit_requested && BITS_PER_LONG == 64 &&
15848		    (insn->imm == BPF_FUNC_map_lookup_elem ||
15849		     insn->imm == BPF_FUNC_map_update_elem ||
15850		     insn->imm == BPF_FUNC_map_delete_elem ||
15851		     insn->imm == BPF_FUNC_map_push_elem   ||
15852		     insn->imm == BPF_FUNC_map_pop_elem    ||
15853		     insn->imm == BPF_FUNC_map_peek_elem   ||
15854		     insn->imm == BPF_FUNC_redirect_map    ||
15855		     insn->imm == BPF_FUNC_for_each_map_elem ||
15856		     insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
15857			aux = &env->insn_aux_data[i + delta];
15858			if (bpf_map_ptr_poisoned(aux))
15859				goto patch_call_imm;
15860
15861			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
15862			ops = map_ptr->ops;
15863			if (insn->imm == BPF_FUNC_map_lookup_elem &&
15864			    ops->map_gen_lookup) {
15865				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
15866				if (cnt == -EOPNOTSUPP)
15867					goto patch_map_ops_generic;
15868				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
15869					verbose(env, "bpf verifier is misconfigured\n");
15870					return -EINVAL;
15871				}
15872
15873				new_prog = bpf_patch_insn_data(env, i + delta,
15874							       insn_buf, cnt);
15875				if (!new_prog)
15876					return -ENOMEM;
15877
15878				delta    += cnt - 1;
15879				env->prog = prog = new_prog;
15880				insn      = new_prog->insnsi + i + delta;
15881				continue;
15882			}
15883
15884			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
15885				     (void *(*)(struct bpf_map *map, void *key))NULL));
15886			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
15887				     (int (*)(struct bpf_map *map, void *key))NULL));
15888			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
15889				     (int (*)(struct bpf_map *map, void *key, void *value,
15890					      u64 flags))NULL));
15891			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
15892				     (int (*)(struct bpf_map *map, void *value,
15893					      u64 flags))NULL));
15894			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
15895				     (int (*)(struct bpf_map *map, void *value))NULL));
15896			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
15897				     (int (*)(struct bpf_map *map, void *value))NULL));
15898			BUILD_BUG_ON(!__same_type(ops->map_redirect,
15899				     (int (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
15900			BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
15901				     (int (*)(struct bpf_map *map,
15902					      bpf_callback_t callback_fn,
15903					      void *callback_ctx,
15904					      u64 flags))NULL));
15905			BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
15906				     (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
15907
15908patch_map_ops_generic:
15909			switch (insn->imm) {
15910			case BPF_FUNC_map_lookup_elem:
15911				insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
15912				continue;
15913			case BPF_FUNC_map_update_elem:
15914				insn->imm = BPF_CALL_IMM(ops->map_update_elem);
15915				continue;
15916			case BPF_FUNC_map_delete_elem:
15917				insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
15918				continue;
15919			case BPF_FUNC_map_push_elem:
15920				insn->imm = BPF_CALL_IMM(ops->map_push_elem);
15921				continue;
15922			case BPF_FUNC_map_pop_elem:
15923				insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
15924				continue;
15925			case BPF_FUNC_map_peek_elem:
15926				insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
15927				continue;
15928			case BPF_FUNC_redirect_map:
15929				insn->imm = BPF_CALL_IMM(ops->map_redirect);
15930				continue;
15931			case BPF_FUNC_for_each_map_elem:
15932				insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
15933				continue;
15934			case BPF_FUNC_map_lookup_percpu_elem:
15935				insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
15936				continue;
15937			}
15938
15939			goto patch_call_imm;
15940		}
15941
15942		/* Implement bpf_jiffies64 inline. */
15943		if (prog->jit_requested && BITS_PER_LONG == 64 &&
15944		    insn->imm == BPF_FUNC_jiffies64) {
15945			struct bpf_insn ld_jiffies_addr[2] = {
15946				BPF_LD_IMM64(BPF_REG_0,
15947					     (unsigned long)&jiffies),
15948			};
15949
15950			insn_buf[0] = ld_jiffies_addr[0];
15951			insn_buf[1] = ld_jiffies_addr[1];
15952			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
15953						  BPF_REG_0, 0);
15954			cnt = 3;
15955
15956			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
15957						       cnt);
15958			if (!new_prog)
15959				return -ENOMEM;
15960
15961			delta    += cnt - 1;
15962			env->prog = prog = new_prog;
15963			insn      = new_prog->insnsi + i + delta;
15964			continue;
15965		}
15966
15967		/* Implement bpf_get_func_arg inline. */
15968		if (prog_type == BPF_PROG_TYPE_TRACING &&
15969		    insn->imm == BPF_FUNC_get_func_arg) {
15970			/* Load nr_args from ctx - 8 */
15971			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
15972			insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
15973			insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
15974			insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
15975			insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
15976			insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
15977			insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
15978			insn_buf[7] = BPF_JMP_A(1);
15979			insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
15980			cnt = 9;
15981
15982			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
15983			if (!new_prog)
15984				return -ENOMEM;
15985
15986			delta    += cnt - 1;
15987			env->prog = prog = new_prog;
15988			insn      = new_prog->insnsi + i + delta;
15989			continue;
15990		}
15991
15992		/* Implement bpf_get_func_ret inline. */
15993		if (prog_type == BPF_PROG_TYPE_TRACING &&
15994		    insn->imm == BPF_FUNC_get_func_ret) {
15995			if (eatype == BPF_TRACE_FEXIT ||
15996			    eatype == BPF_MODIFY_RETURN) {
15997				/* Load nr_args from ctx - 8 */
15998				insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
15999				insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
16000				insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
16001				insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
16002				insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
16003				insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
16004				cnt = 6;
16005			} else {
16006				insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
16007				cnt = 1;
16008			}
16009
16010			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
16011			if (!new_prog)
16012				return -ENOMEM;
16013
16014			delta    += cnt - 1;
16015			env->prog = prog = new_prog;
16016			insn      = new_prog->insnsi + i + delta;
16017			continue;
16018		}
16019
16020		/* Implement get_func_arg_cnt inline. */
16021		if (prog_type == BPF_PROG_TYPE_TRACING &&
16022		    insn->imm == BPF_FUNC_get_func_arg_cnt) {
16023			/* Load nr_args from ctx - 8 */
16024			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
16025
16026			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
16027			if (!new_prog)
16028				return -ENOMEM;
16029
16030			env->prog = prog = new_prog;
16031			insn      = new_prog->insnsi + i + delta;
16032			continue;
16033		}
16034
16035		/* Implement bpf_get_func_ip inline. */
16036		if (prog_type == BPF_PROG_TYPE_TRACING &&
16037		    insn->imm == BPF_FUNC_get_func_ip) {
16038			/* Load IP address from ctx - 16 */
16039			insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
16040
16041			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
16042			if (!new_prog)
16043				return -ENOMEM;
16044
16045			env->prog = prog = new_prog;
16046			insn      = new_prog->insnsi + i + delta;
16047			continue;
16048		}
16049
16050patch_call_imm:
16051		fn = env->ops->get_func_proto(insn->imm, env->prog);
16052		/* all functions that have prototype and verifier allowed
16053		 * programs to call them, must be real in-kernel functions
16054		 */
16055		if (!fn->func) {
16056			verbose(env,
16057				"kernel subsystem misconfigured func %s#%d\n",
16058				func_id_name(insn->imm), insn->imm);
16059			return -EFAULT;
16060		}
16061		insn->imm = fn->func - __bpf_call_base;
16062	}
16063
16064	/* Since poke tab is now finalized, publish aux to tracker. */
16065	for (i = 0; i < prog->aux->size_poke_tab; i++) {
16066		map_ptr = prog->aux->poke_tab[i].tail_call.map;
16067		if (!map_ptr->ops->map_poke_track ||
16068		    !map_ptr->ops->map_poke_untrack ||
16069		    !map_ptr->ops->map_poke_run) {
16070			verbose(env, "bpf verifier is misconfigured\n");
16071			return -EINVAL;
16072		}
16073
16074		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
16075		if (ret < 0) {
16076			verbose(env, "tracking tail call prog failed\n");
16077			return ret;
16078		}
16079	}
16080
16081	sort_kfunc_descs_by_imm(env->prog);
16082
16083	return 0;
16084}
16085
16086static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
16087					int position,
16088					s32 stack_base,
16089					u32 callback_subprogno,
16090					u32 *cnt)
16091{
16092	s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
16093	s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
16094	s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
16095	int reg_loop_max = BPF_REG_6;
16096	int reg_loop_cnt = BPF_REG_7;
16097	int reg_loop_ctx = BPF_REG_8;
16098
16099	struct bpf_prog *new_prog;
16100	u32 callback_start;
16101	u32 call_insn_offset;
16102	s32 callback_offset;
16103
16104	/* This represents an inlined version of bpf_iter.c:bpf_loop,
16105	 * be careful to modify this code in sync.
16106	 */
16107	struct bpf_insn insn_buf[] = {
16108		/* Return error and jump to the end of the patch if
16109		 * expected number of iterations is too big.
16110		 */
16111		BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
16112		BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
16113		BPF_JMP_IMM(BPF_JA, 0, 0, 16),
16114		/* spill R6, R7, R8 to use these as loop vars */
16115		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
16116		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
16117		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
16118		/* initialize loop vars */
16119		BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
16120		BPF_MOV32_IMM(reg_loop_cnt, 0),
16121		BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
16122		/* loop header,
16123		 * if reg_loop_cnt >= reg_loop_max skip the loop body
16124		 */
16125		BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
16126		/* callback call,
16127		 * correct callback offset would be set after patching
16128		 */
16129		BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
16130		BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
16131		BPF_CALL_REL(0),
16132		/* increment loop counter */
16133		BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
16134		/* jump to loop header if callback returned 0 */
16135		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
16136		/* return value of bpf_loop,
16137		 * set R0 to the number of iterations
16138		 */
16139		BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
16140		/* restore original values of R6, R7, R8 */
16141		BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
16142		BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
16143		BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
16144	};
16145
16146	*cnt = ARRAY_SIZE(insn_buf);
16147	new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
16148	if (!new_prog)
16149		return new_prog;
16150
16151	/* callback start is known only after patching */
16152	callback_start = env->subprog_info[callback_subprogno].start;
16153	/* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
16154	call_insn_offset = position + 12;
16155	callback_offset = callback_start - call_insn_offset - 1;
16156	new_prog->insnsi[call_insn_offset].imm = callback_offset;
16157
16158	return new_prog;
16159}
16160
16161static bool is_bpf_loop_call(struct bpf_insn *insn)
16162{
16163	return insn->code == (BPF_JMP | BPF_CALL) &&
16164		insn->src_reg == 0 &&
16165		insn->imm == BPF_FUNC_loop;
16166}
16167
16168/* For all sub-programs in the program (including main) check
16169 * insn_aux_data to see if there are bpf_loop calls that require
16170 * inlining. If such calls are found the calls are replaced with a
16171 * sequence of instructions produced by `inline_bpf_loop` function and
16172 * subprog stack_depth is increased by the size of 3 registers.
16173 * This stack space is used to spill values of the R6, R7, R8.  These
16174 * registers are used to store the loop bound, counter and context
16175 * variables.
16176 */
16177static int optimize_bpf_loop(struct bpf_verifier_env *env)
16178{
16179	struct bpf_subprog_info *subprogs = env->subprog_info;
16180	int i, cur_subprog = 0, cnt, delta = 0;
16181	struct bpf_insn *insn = env->prog->insnsi;
16182	int insn_cnt = env->prog->len;
16183	u16 stack_depth = subprogs[cur_subprog].stack_depth;
16184	u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
16185	u16 stack_depth_extra = 0;
16186
16187	for (i = 0; i < insn_cnt; i++, insn++) {
16188		struct bpf_loop_inline_state *inline_state =
16189			&env->insn_aux_data[i + delta].loop_inline_state;
16190
16191		if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
16192			struct bpf_prog *new_prog;
16193
16194			stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
16195			new_prog = inline_bpf_loop(env,
16196						   i + delta,
16197						   -(stack_depth + stack_depth_extra),
16198						   inline_state->callback_subprogno,
16199						   &cnt);
16200			if (!new_prog)
16201				return -ENOMEM;
16202
16203			delta     += cnt - 1;
16204			env->prog  = new_prog;
16205			insn       = new_prog->insnsi + i + delta;
16206		}
16207
16208		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
16209			subprogs[cur_subprog].stack_depth += stack_depth_extra;
16210			cur_subprog++;
16211			stack_depth = subprogs[cur_subprog].stack_depth;
16212			stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
16213			stack_depth_extra = 0;
16214		}
16215	}
16216
16217	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
16218
16219	return 0;
16220}
16221
16222static void free_states(struct bpf_verifier_env *env)
16223{
16224	struct bpf_verifier_state_list *sl, *sln;
16225	int i;
16226
16227	sl = env->free_list;
16228	while (sl) {
16229		sln = sl->next;
16230		free_verifier_state(&sl->state, false);
16231		kfree(sl);
16232		sl = sln;
16233	}
16234	env->free_list = NULL;
16235
16236	if (!env->explored_states)
16237		return;
16238
16239	for (i = 0; i < state_htab_size(env); i++) {
16240		sl = env->explored_states[i];
16241
16242		while (sl) {
16243			sln = sl->next;
16244			free_verifier_state(&sl->state, false);
16245			kfree(sl);
16246			sl = sln;
16247		}
16248		env->explored_states[i] = NULL;
16249	}
16250}
16251
16252static int do_check_common(struct bpf_verifier_env *env, int subprog)
16253{
16254	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
16255	struct bpf_verifier_state *state;
16256	struct bpf_reg_state *regs;
16257	int ret, i;
16258
16259	env->prev_linfo = NULL;
16260	env->pass_cnt++;
16261
16262	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
16263	if (!state)
16264		return -ENOMEM;
16265	state->curframe = 0;
16266	state->speculative = false;
16267	state->branches = 1;
16268	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
16269	if (!state->frame[0]) {
16270		kfree(state);
16271		return -ENOMEM;
16272	}
16273	env->cur_state = state;
16274	init_func_state(env, state->frame[0],
16275			BPF_MAIN_FUNC /* callsite */,
16276			0 /* frameno */,
16277			subprog);
16278	state->first_insn_idx = env->subprog_info[subprog].start;
16279	state->last_insn_idx = -1;
16280
16281	regs = state->frame[state->curframe]->regs;
16282	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
16283		ret = btf_prepare_func_args(env, subprog, regs);
16284		if (ret)
16285			goto out;
16286		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
16287			if (regs[i].type == PTR_TO_CTX)
16288				mark_reg_known_zero(env, regs, i);
16289			else if (regs[i].type == SCALAR_VALUE)
16290				mark_reg_unknown(env, regs, i);
16291			else if (base_type(regs[i].type) == PTR_TO_MEM) {
16292				const u32 mem_size = regs[i].mem_size;
16293
16294				mark_reg_known_zero(env, regs, i);
16295				regs[i].mem_size = mem_size;
16296				regs[i].id = ++env->id_gen;
16297			}
16298		}
16299	} else {
16300		/* 1st arg to a function */
16301		regs[BPF_REG_1].type = PTR_TO_CTX;
16302		mark_reg_known_zero(env, regs, BPF_REG_1);
16303		ret = btf_check_subprog_arg_match(env, subprog, regs);
16304		if (ret == -EFAULT)
16305			/* unlikely verifier bug. abort.
16306			 * ret == 0 and ret < 0 are sadly acceptable for
16307			 * main() function due to backward compatibility.
16308			 * Like socket filter program may be written as:
16309			 * int bpf_prog(struct pt_regs *ctx)
16310			 * and never dereference that ctx in the program.
16311			 * 'struct pt_regs' is a type mismatch for socket
16312			 * filter that should be using 'struct __sk_buff'.
16313			 */
16314			goto out;
16315	}
16316
16317	ret = do_check(env);
16318out:
16319	/* check for NULL is necessary, since cur_state can be freed inside
16320	 * do_check() under memory pressure.
16321	 */
16322	if (env->cur_state) {
16323		free_verifier_state(env->cur_state, true);
16324		env->cur_state = NULL;
16325	}
16326	while (!pop_stack(env, NULL, NULL, false));
16327	if (!ret && pop_log)
16328		bpf_vlog_reset(&env->log, 0);
16329	free_states(env);
16330	return ret;
16331}
16332
16333/* Verify all global functions in a BPF program one by one based on their BTF.
16334 * All global functions must pass verification. Otherwise the whole program is rejected.
16335 * Consider:
16336 * int bar(int);
16337 * int foo(int f)
16338 * {
16339 *    return bar(f);
16340 * }
16341 * int bar(int b)
16342 * {
16343 *    ...
16344 * }
16345 * foo() will be verified first for R1=any_scalar_value. During verification it
16346 * will be assumed that bar() already verified successfully and call to bar()
16347 * from foo() will be checked for type match only. Later bar() will be verified
16348 * independently to check that it's safe for R1=any_scalar_value.
16349 */
16350static int do_check_subprogs(struct bpf_verifier_env *env)
16351{
16352	struct bpf_prog_aux *aux = env->prog->aux;
16353	int i, ret;
16354
16355	if (!aux->func_info)
16356		return 0;
16357
16358	for (i = 1; i < env->subprog_cnt; i++) {
16359		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
16360			continue;
16361		env->insn_idx = env->subprog_info[i].start;
16362		WARN_ON_ONCE(env->insn_idx == 0);
16363		ret = do_check_common(env, i);
16364		if (ret) {
16365			return ret;
16366		} else if (env->log.level & BPF_LOG_LEVEL) {
16367			verbose(env,
16368				"Func#%d is safe for any args that match its prototype\n",
16369				i);
16370		}
16371	}
16372	return 0;
16373}
16374
16375static int do_check_main(struct bpf_verifier_env *env)
16376{
16377	int ret;
16378
16379	env->insn_idx = 0;
16380	ret = do_check_common(env, 0);
16381	if (!ret)
16382		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
16383	return ret;
16384}
16385
16386
16387static void print_verification_stats(struct bpf_verifier_env *env)
16388{
16389	int i;
16390
16391	if (env->log.level & BPF_LOG_STATS) {
16392		verbose(env, "verification time %lld usec\n",
16393			div_u64(env->verification_time, 1000));
16394		verbose(env, "stack depth ");
16395		for (i = 0; i < env->subprog_cnt; i++) {
16396			u32 depth = env->subprog_info[i].stack_depth;
16397
16398			verbose(env, "%d", depth);
16399			if (i + 1 < env->subprog_cnt)
16400				verbose(env, "+");
16401		}
16402		verbose(env, "\n");
16403	}
16404	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
16405		"total_states %d peak_states %d mark_read %d\n",
16406		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
16407		env->max_states_per_insn, env->total_states,
16408		env->peak_states, env->longest_mark_read_walk);
16409}
16410
16411static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
16412{
16413	const struct btf_type *t, *func_proto;
16414	const struct bpf_struct_ops *st_ops;
16415	const struct btf_member *member;
16416	struct bpf_prog *prog = env->prog;
16417	u32 btf_id, member_idx;
16418	const char *mname;
16419
16420	if (!prog->gpl_compatible) {
16421		verbose(env, "struct ops programs must have a GPL compatible license\n");
16422		return -EINVAL;
16423	}
16424
16425	btf_id = prog->aux->attach_btf_id;
16426	st_ops = bpf_struct_ops_find(btf_id);
16427	if (!st_ops) {
16428		verbose(env, "attach_btf_id %u is not a supported struct\n",
16429			btf_id);
16430		return -ENOTSUPP;
16431	}
16432
16433	t = st_ops->type;
16434	member_idx = prog->expected_attach_type;
16435	if (member_idx >= btf_type_vlen(t)) {
16436		verbose(env, "attach to invalid member idx %u of struct %s\n",
16437			member_idx, st_ops->name);
16438		return -EINVAL;
16439	}
16440
16441	member = &btf_type_member(t)[member_idx];
16442	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
16443	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
16444					       NULL);
16445	if (!func_proto) {
16446		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
16447			mname, member_idx, st_ops->name);
16448		return -EINVAL;
16449	}
16450
16451	if (st_ops->check_member) {
16452		int err = st_ops->check_member(t, member);
16453
16454		if (err) {
16455			verbose(env, "attach to unsupported member %s of struct %s\n",
16456				mname, st_ops->name);
16457			return err;
16458		}
16459	}
16460
16461	prog->aux->attach_func_proto = func_proto;
16462	prog->aux->attach_func_name = mname;
16463	env->ops = st_ops->verifier_ops;
16464
16465	return 0;
16466}
16467#define SECURITY_PREFIX "security_"
16468
16469static int check_attach_modify_return(unsigned long addr, const char *func_name)
16470{
16471	if (within_error_injection_list(addr) ||
16472	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
16473		return 0;
16474
16475	return -EINVAL;
16476}
16477
16478/* list of non-sleepable functions that are otherwise on
16479 * ALLOW_ERROR_INJECTION list
16480 */
16481BTF_SET_START(btf_non_sleepable_error_inject)
16482/* Three functions below can be called from sleepable and non-sleepable context.
16483 * Assume non-sleepable from bpf safety point of view.
16484 */
16485BTF_ID(func, __filemap_add_folio)
16486BTF_ID(func, should_fail_alloc_page)
16487BTF_ID(func, should_failslab)
16488BTF_SET_END(btf_non_sleepable_error_inject)
16489
16490static int check_non_sleepable_error_inject(u32 btf_id)
16491{
16492	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
16493}
16494
16495int bpf_check_attach_target(struct bpf_verifier_log *log,
16496			    const struct bpf_prog *prog,
16497			    const struct bpf_prog *tgt_prog,
16498			    u32 btf_id,
16499			    struct bpf_attach_target_info *tgt_info)
16500{
16501	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
16502	const char prefix[] = "btf_trace_";
16503	int ret = 0, subprog = -1, i;
16504	const struct btf_type *t;
16505	bool conservative = true;
16506	const char *tname;
16507	struct btf *btf;
16508	long addr = 0;
16509
16510	if (!btf_id) {
16511		bpf_log(log, "Tracing programs must provide btf_id\n");
16512		return -EINVAL;
16513	}
16514	btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
16515	if (!btf) {
16516		bpf_log(log,
16517			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
16518		return -EINVAL;
16519	}
16520	t = btf_type_by_id(btf, btf_id);
16521	if (!t) {
16522		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
16523		return -EINVAL;
16524	}
16525	tname = btf_name_by_offset(btf, t->name_off);
16526	if (!tname) {
16527		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
16528		return -EINVAL;
16529	}
16530	if (tgt_prog) {
16531		struct bpf_prog_aux *aux = tgt_prog->aux;
16532
16533		for (i = 0; i < aux->func_info_cnt; i++)
16534			if (aux->func_info[i].type_id == btf_id) {
16535				subprog = i;
16536				break;
16537			}
16538		if (subprog == -1) {
16539			bpf_log(log, "Subprog %s doesn't exist\n", tname);
16540			return -EINVAL;
16541		}
16542		conservative = aux->func_info_aux[subprog].unreliable;
16543		if (prog_extension) {
16544			if (conservative) {
16545				bpf_log(log,
16546					"Cannot replace static functions\n");
16547				return -EINVAL;
16548			}
16549			if (!prog->jit_requested) {
16550				bpf_log(log,
16551					"Extension programs should be JITed\n");
16552				return -EINVAL;
16553			}
16554		}
16555		if (!tgt_prog->jited) {
16556			bpf_log(log, "Can attach to only JITed progs\n");
16557			return -EINVAL;
16558		}
16559		if (tgt_prog->type == prog->type) {
16560			/* Cannot fentry/fexit another fentry/fexit program.
16561			 * Cannot attach program extension to another extension.
16562			 * It's ok to attach fentry/fexit to extension program.
16563			 */
16564			bpf_log(log, "Cannot recursively attach\n");
16565			return -EINVAL;
16566		}
16567		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
16568		    prog_extension &&
16569		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
16570		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
16571			/* Program extensions can extend all program types
16572			 * except fentry/fexit. The reason is the following.
16573			 * The fentry/fexit programs are used for performance
16574			 * analysis, stats and can be attached to any program
16575			 * type except themselves. When extension program is
16576			 * replacing XDP function it is necessary to allow
16577			 * performance analysis of all functions. Both original
16578			 * XDP program and its program extension. Hence
16579			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
16580			 * allowed. If extending of fentry/fexit was allowed it
16581			 * would be possible to create long call chain
16582			 * fentry->extension->fentry->extension beyond
16583			 * reasonable stack size. Hence extending fentry is not
16584			 * allowed.
16585			 */
16586			bpf_log(log, "Cannot extend fentry/fexit\n");
16587			return -EINVAL;
16588		}
16589	} else {
16590		if (prog_extension) {
16591			bpf_log(log, "Cannot replace kernel functions\n");
16592			return -EINVAL;
16593		}
16594	}
16595
16596	switch (prog->expected_attach_type) {
16597	case BPF_TRACE_RAW_TP:
16598		if (tgt_prog) {
16599			bpf_log(log,
16600				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
16601			return -EINVAL;
16602		}
16603		if (!btf_type_is_typedef(t)) {
16604			bpf_log(log, "attach_btf_id %u is not a typedef\n",
16605				btf_id);
16606			return -EINVAL;
16607		}
16608		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
16609			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
16610				btf_id, tname);
16611			return -EINVAL;
16612		}
16613		tname += sizeof(prefix) - 1;
16614		t = btf_type_by_id(btf, t->type);
16615		if (!btf_type_is_ptr(t))
16616			/* should never happen in valid vmlinux build */
16617			return -EINVAL;
16618		t = btf_type_by_id(btf, t->type);
16619		if (!btf_type_is_func_proto(t))
16620			/* should never happen in valid vmlinux build */
16621			return -EINVAL;
16622
16623		break;
16624	case BPF_TRACE_ITER:
16625		if (!btf_type_is_func(t)) {
16626			bpf_log(log, "attach_btf_id %u is not a function\n",
16627				btf_id);
16628			return -EINVAL;
16629		}
16630		t = btf_type_by_id(btf, t->type);
16631		if (!btf_type_is_func_proto(t))
16632			return -EINVAL;
16633		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
16634		if (ret)
16635			return ret;
16636		break;
16637	default:
16638		if (!prog_extension)
16639			return -EINVAL;
16640		fallthrough;
16641	case BPF_MODIFY_RETURN:
16642	case BPF_LSM_MAC:
16643	case BPF_LSM_CGROUP:
16644	case BPF_TRACE_FENTRY:
16645	case BPF_TRACE_FEXIT:
16646		if (!btf_type_is_func(t)) {
16647			bpf_log(log, "attach_btf_id %u is not a function\n",
16648				btf_id);
16649			return -EINVAL;
16650		}
16651		if (prog_extension &&
16652		    btf_check_type_match(log, prog, btf, t))
16653			return -EINVAL;
16654		t = btf_type_by_id(btf, t->type);
16655		if (!btf_type_is_func_proto(t))
16656			return -EINVAL;
16657
16658		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
16659		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
16660		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
16661			return -EINVAL;
16662
16663		if (tgt_prog && conservative)
16664			t = NULL;
16665
16666		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
16667		if (ret < 0)
16668			return ret;
16669
16670		if (tgt_prog) {
16671			if (subprog == 0)
16672				addr = (long) tgt_prog->bpf_func;
16673			else
16674				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
16675		} else {
16676			addr = kallsyms_lookup_name(tname);
16677			if (!addr) {
16678				bpf_log(log,
16679					"The address of function %s cannot be found\n",
16680					tname);
16681				return -ENOENT;
16682			}
16683		}
16684
16685		if (prog->aux->sleepable) {
16686			ret = -EINVAL;
16687			switch (prog->type) {
16688			case BPF_PROG_TYPE_TRACING:
16689
16690				/* fentry/fexit/fmod_ret progs can be sleepable if they are
16691				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
16692				 */
16693				if (!check_non_sleepable_error_inject(btf_id) &&
16694				    within_error_injection_list(addr))
16695					ret = 0;
16696				/* fentry/fexit/fmod_ret progs can also be sleepable if they are
16697				 * in the fmodret id set with the KF_SLEEPABLE flag.
16698				 */
16699				else {
16700					u32 *flags = btf_kfunc_is_modify_return(btf, btf_id);
16701
16702					if (flags && (*flags & KF_SLEEPABLE))
16703						ret = 0;
16704				}
16705				break;
16706			case BPF_PROG_TYPE_LSM:
16707				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
16708				 * Only some of them are sleepable.
16709				 */
16710				if (bpf_lsm_is_sleepable_hook(btf_id))
16711					ret = 0;
16712				break;
16713			default:
16714				break;
16715			}
16716			if (ret) {
16717				bpf_log(log, "%s is not sleepable\n", tname);
16718				return ret;
16719			}
16720		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
16721			if (tgt_prog) {
16722				bpf_log(log, "can't modify return codes of BPF programs\n");
16723				return -EINVAL;
16724			}
16725			ret = -EINVAL;
16726			if (btf_kfunc_is_modify_return(btf, btf_id) ||
16727			    !check_attach_modify_return(addr, tname))
16728				ret = 0;
16729			if (ret) {
16730				bpf_log(log, "%s() is not modifiable\n", tname);
16731				return ret;
16732			}
16733		}
16734
16735		break;
16736	}
16737	tgt_info->tgt_addr = addr;
16738	tgt_info->tgt_name = tname;
16739	tgt_info->tgt_type = t;
16740	return 0;
16741}
16742
16743BTF_SET_START(btf_id_deny)
16744BTF_ID_UNUSED
16745#ifdef CONFIG_SMP
16746BTF_ID(func, migrate_disable)
16747BTF_ID(func, migrate_enable)
16748#endif
16749#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
16750BTF_ID(func, rcu_read_unlock_strict)
16751#endif
16752BTF_SET_END(btf_id_deny)
16753
16754static int check_attach_btf_id(struct bpf_verifier_env *env)
16755{
16756	struct bpf_prog *prog = env->prog;
16757	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
16758	struct bpf_attach_target_info tgt_info = {};
16759	u32 btf_id = prog->aux->attach_btf_id;
16760	struct bpf_trampoline *tr;
16761	int ret;
16762	u64 key;
16763
16764	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
16765		if (prog->aux->sleepable)
16766			/* attach_btf_id checked to be zero already */
16767			return 0;
16768		verbose(env, "Syscall programs can only be sleepable\n");
16769		return -EINVAL;
16770	}
16771
16772	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
16773	    prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
16774		verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
16775		return -EINVAL;
16776	}
16777
16778	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
16779		return check_struct_ops_btf_id(env);
16780
16781	if (prog->type != BPF_PROG_TYPE_TRACING &&
16782	    prog->type != BPF_PROG_TYPE_LSM &&
16783	    prog->type != BPF_PROG_TYPE_EXT)
16784		return 0;
16785
16786	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
16787	if (ret)
16788		return ret;
16789
16790	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
16791		/* to make freplace equivalent to their targets, they need to
16792		 * inherit env->ops and expected_attach_type for the rest of the
16793		 * verification
16794		 */
16795		env->ops = bpf_verifier_ops[tgt_prog->type];
16796		prog->expected_attach_type = tgt_prog->expected_attach_type;
16797	}
16798
16799	/* store info about the attachment target that will be used later */
16800	prog->aux->attach_func_proto = tgt_info.tgt_type;
16801	prog->aux->attach_func_name = tgt_info.tgt_name;
16802
16803	if (tgt_prog) {
16804		prog->aux->saved_dst_prog_type = tgt_prog->type;
16805		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
16806	}
16807
16808	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
16809		prog->aux->attach_btf_trace = true;
16810		return 0;
16811	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
16812		if (!bpf_iter_prog_supported(prog))
16813			return -EINVAL;
16814		return 0;
16815	}
16816
16817	if (prog->type == BPF_PROG_TYPE_LSM) {
16818		ret = bpf_lsm_verify_prog(&env->log, prog);
16819		if (ret < 0)
16820			return ret;
16821	} else if (prog->type == BPF_PROG_TYPE_TRACING &&
16822		   btf_id_set_contains(&btf_id_deny, btf_id)) {
16823		return -EINVAL;
16824	}
16825
16826	key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
16827	tr = bpf_trampoline_get(key, &tgt_info);
16828	if (!tr)
16829		return -ENOMEM;
16830
16831	prog->aux->dst_trampoline = tr;
16832	return 0;
16833}
16834
16835struct btf *bpf_get_btf_vmlinux(void)
16836{
16837	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
16838		mutex_lock(&bpf_verifier_lock);
16839		if (!btf_vmlinux)
16840			btf_vmlinux = btf_parse_vmlinux();
16841		mutex_unlock(&bpf_verifier_lock);
16842	}
16843	return btf_vmlinux;
16844}
16845
16846int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
16847{
16848	u64 start_time = ktime_get_ns();
16849	struct bpf_verifier_env *env;
16850	struct bpf_verifier_log *log;
16851	int i, len, ret = -EINVAL;
16852	bool is_priv;
16853
16854	/* no program is valid */
16855	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
16856		return -EINVAL;
16857
16858	/* 'struct bpf_verifier_env' can be global, but since it's not small,
16859	 * allocate/free it every time bpf_check() is called
16860	 */
16861	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
16862	if (!env)
16863		return -ENOMEM;
16864	log = &env->log;
16865
16866	len = (*prog)->len;
16867	env->insn_aux_data =
16868		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
16869	ret = -ENOMEM;
16870	if (!env->insn_aux_data)
16871		goto err_free_env;
16872	for (i = 0; i < len; i++)
16873		env->insn_aux_data[i].orig_idx = i;
16874	env->prog = *prog;
16875	env->ops = bpf_verifier_ops[env->prog->type];
16876	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
16877	is_priv = bpf_capable();
16878
16879	bpf_get_btf_vmlinux();
16880
16881	/* grab the mutex to protect few globals used by verifier */
16882	if (!is_priv)
16883		mutex_lock(&bpf_verifier_lock);
16884
16885	if (attr->log_level || attr->log_buf || attr->log_size) {
16886		/* user requested verbose verifier output
16887		 * and supplied buffer to store the verification trace
16888		 */
16889		log->level = attr->log_level;
16890		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
16891		log->len_total = attr->log_size;
16892
16893		/* log attributes have to be sane */
16894		if (!bpf_verifier_log_attr_valid(log)) {
16895			ret = -EINVAL;
16896			goto err_unlock;
16897		}
16898	}
16899
16900	mark_verifier_state_clean(env);
16901
16902	if (IS_ERR(btf_vmlinux)) {
16903		/* Either gcc or pahole or kernel are broken. */
16904		verbose(env, "in-kernel BTF is malformed\n");
16905		ret = PTR_ERR(btf_vmlinux);
16906		goto skip_full_check;
16907	}
16908
16909	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
16910	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
16911		env->strict_alignment = true;
16912	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
16913		env->strict_alignment = false;
16914
16915	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
16916	env->allow_uninit_stack = bpf_allow_uninit_stack();
16917	env->bypass_spec_v1 = bpf_bypass_spec_v1();
16918	env->bypass_spec_v4 = bpf_bypass_spec_v4();
16919	env->bpf_capable = bpf_capable();
16920	env->rcu_tag_supported = btf_vmlinux &&
16921		btf_find_by_name_kind(btf_vmlinux, "rcu", BTF_KIND_TYPE_TAG) > 0;
16922
16923	if (is_priv)
16924		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
16925
16926	env->explored_states = kvcalloc(state_htab_size(env),
16927				       sizeof(struct bpf_verifier_state_list *),
16928				       GFP_USER);
16929	ret = -ENOMEM;
16930	if (!env->explored_states)
16931		goto skip_full_check;
16932
16933	ret = add_subprog_and_kfunc(env);
16934	if (ret < 0)
16935		goto skip_full_check;
16936
16937	ret = check_subprogs(env);
16938	if (ret < 0)
16939		goto skip_full_check;
16940
16941	ret = check_btf_info(env, attr, uattr);
16942	if (ret < 0)
16943		goto skip_full_check;
16944
16945	ret = check_attach_btf_id(env);
16946	if (ret)
16947		goto skip_full_check;
16948
16949	ret = resolve_pseudo_ldimm64(env);
16950	if (ret < 0)
16951		goto skip_full_check;
16952
16953	if (bpf_prog_is_dev_bound(env->prog->aux)) {
16954		ret = bpf_prog_offload_verifier_prep(env->prog);
16955		if (ret)
16956			goto skip_full_check;
16957	}
16958
16959	ret = check_cfg(env);
16960	if (ret < 0)
16961		goto skip_full_check;
16962
16963	ret = do_check_subprogs(env);
16964	ret = ret ?: do_check_main(env);
16965
16966	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
16967		ret = bpf_prog_offload_finalize(env);
16968
16969skip_full_check:
16970	kvfree(env->explored_states);
16971
16972	if (ret == 0)
16973		ret = check_max_stack_depth(env);
16974
16975	/* instruction rewrites happen after this point */
16976	if (ret == 0)
16977		ret = optimize_bpf_loop(env);
16978
16979	if (is_priv) {
16980		if (ret == 0)
16981			opt_hard_wire_dead_code_branches(env);
16982		if (ret == 0)
16983			ret = opt_remove_dead_code(env);
16984		if (ret == 0)
16985			ret = opt_remove_nops(env);
16986	} else {
16987		if (ret == 0)
16988			sanitize_dead_code(env);
16989	}
16990
16991	if (ret == 0)
16992		/* program is valid, convert *(u32*)(ctx + off) accesses */
16993		ret = convert_ctx_accesses(env);
16994
16995	if (ret == 0)
16996		ret = do_misc_fixups(env);
16997
16998	/* do 32-bit optimization after insn patching has done so those patched
16999	 * insns could be handled correctly.
17000	 */
17001	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
17002		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
17003		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
17004								     : false;
17005	}
17006
17007	if (ret == 0)
17008		ret = fixup_call_args(env);
17009
17010	env->verification_time = ktime_get_ns() - start_time;
17011	print_verification_stats(env);
17012	env->prog->aux->verified_insns = env->insn_processed;
17013
17014	if (log->level && bpf_verifier_log_full(log))
17015		ret = -ENOSPC;
17016	if (log->level && !log->ubuf) {
17017		ret = -EFAULT;
17018		goto err_release_maps;
17019	}
17020
17021	if (ret)
17022		goto err_release_maps;
17023
17024	if (env->used_map_cnt) {
17025		/* if program passed verifier, update used_maps in bpf_prog_info */
17026		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
17027							  sizeof(env->used_maps[0]),
17028							  GFP_KERNEL);
17029
17030		if (!env->prog->aux->used_maps) {
17031			ret = -ENOMEM;
17032			goto err_release_maps;
17033		}
17034
17035		memcpy(env->prog->aux->used_maps, env->used_maps,
17036		       sizeof(env->used_maps[0]) * env->used_map_cnt);
17037		env->prog->aux->used_map_cnt = env->used_map_cnt;
17038	}
17039	if (env->used_btf_cnt) {
17040		/* if program passed verifier, update used_btfs in bpf_prog_aux */
17041		env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
17042							  sizeof(env->used_btfs[0]),
17043							  GFP_KERNEL);
17044		if (!env->prog->aux->used_btfs) {
17045			ret = -ENOMEM;
17046			goto err_release_maps;
17047		}
17048
17049		memcpy(env->prog->aux->used_btfs, env->used_btfs,
17050		       sizeof(env->used_btfs[0]) * env->used_btf_cnt);
17051		env->prog->aux->used_btf_cnt = env->used_btf_cnt;
17052	}
17053	if (env->used_map_cnt || env->used_btf_cnt) {
17054		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
17055		 * bpf_ld_imm64 instructions
17056		 */
17057		convert_pseudo_ld_imm64(env);
17058	}
17059
17060	adjust_btf_func(env);
17061
17062err_release_maps:
17063	if (!env->prog->aux->used_maps)
17064		/* if we didn't copy map pointers into bpf_prog_info, release
17065		 * them now. Otherwise free_used_maps() will release them.
17066		 */
17067		release_maps(env);
17068	if (!env->prog->aux->used_btfs)
17069		release_btfs(env);
17070
17071	/* extension progs temporarily inherit the attach_type of their targets
17072	   for verification purposes, so set it back to zero before returning
17073	 */
17074	if (env->prog->type == BPF_PROG_TYPE_EXT)
17075		env->prog->expected_attach_type = 0;
17076
17077	*prog = env->prog;
17078err_unlock:
17079	if (!is_priv)
17080		mutex_unlock(&bpf_verifier_lock);
17081	vfree(env->insn_aux_data);
17082err_free_env:
17083	kfree(env);
17084	return ret;
17085}