verifier.c - kernel/bpf/verifier.c - Linux source code v3.15

Note: File does not exist in v3.15.
    1// SPDX-License-Identifier: GPL-2.0-only
    2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
    3 * Copyright (c) 2016 Facebook
    4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
    5 */
    6#include <uapi/linux/btf.h>
    7#include <linux/kernel.h>
    8#include <linux/types.h>
    9#include <linux/slab.h>
   10#include <linux/bpf.h>
   11#include <linux/btf.h>
   12#include <linux/bpf_verifier.h>
   13#include <linux/filter.h>
   14#include <net/netlink.h>
   15#include <linux/file.h>
   16#include <linux/vmalloc.h>
   17#include <linux/stringify.h>
   18#include <linux/bsearch.h>
   19#include <linux/sort.h>
   20#include <linux/perf_event.h>
   21#include <linux/ctype.h>
   22#include <linux/error-injection.h>
   23#include <linux/bpf_lsm.h>
   24
   25#include "disasm.h"
   26
   27static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
   28#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
   29	[_id] = & _name ## _verifier_ops,
   30#define BPF_MAP_TYPE(_id, _ops)
   31#define BPF_LINK_TYPE(_id, _name)
   32#include <linux/bpf_types.h>
   33#undef BPF_PROG_TYPE
   34#undef BPF_MAP_TYPE
   35#undef BPF_LINK_TYPE
   36};
   37
   38/* bpf_check() is a static code analyzer that walks eBPF program
   39 * instruction by instruction and updates register/stack state.
   40 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
   41 *
   42 * The first pass is depth-first-search to check that the program is a DAG.
   43 * It rejects the following programs:
   44 * - larger than BPF_MAXINSNS insns
   45 * - if loop is present (detected via back-edge)
   46 * - unreachable insns exist (shouldn't be a forest. program = one function)
   47 * - out of bounds or malformed jumps
   48 * The second pass is all possible path descent from the 1st insn.
   49 * Since it's analyzing all pathes through the program, the length of the
   50 * analysis is limited to 64k insn, which may be hit even if total number of
   51 * insn is less then 4K, but there are too many branches that change stack/regs.
   52 * Number of 'branches to be analyzed' is limited to 1k
   53 *
   54 * On entry to each instruction, each register has a type, and the instruction
   55 * changes the types of the registers depending on instruction semantics.
   56 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
   57 * copied to R1.
   58 *
   59 * All registers are 64-bit.
   60 * R0 - return register
   61 * R1-R5 argument passing registers
   62 * R6-R9 callee saved registers
   63 * R10 - frame pointer read-only
   64 *
   65 * At the start of BPF program the register R1 contains a pointer to bpf_context
   66 * and has type PTR_TO_CTX.
   67 *
   68 * Verifier tracks arithmetic operations on pointers in case:
   69 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
   70 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
   71 * 1st insn copies R10 (which has FRAME_PTR) type into R1
   72 * and 2nd arithmetic instruction is pattern matched to recognize
   73 * that it wants to construct a pointer to some element within stack.
   74 * So after 2nd insn, the register R1 has type PTR_TO_STACK
   75 * (and -20 constant is saved for further stack bounds checking).
   76 * Meaning that this reg is a pointer to stack plus known immediate constant.
   77 *
   78 * Most of the time the registers have SCALAR_VALUE type, which
   79 * means the register has some value, but it's not a valid pointer.
   80 * (like pointer plus pointer becomes SCALAR_VALUE type)
   81 *
   82 * When verifier sees load or store instructions the type of base register
   83 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
   84 * four pointer types recognized by check_mem_access() function.
   85 *
   86 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
   87 * and the range of [ptr, ptr + map's value_size) is accessible.
   88 *
   89 * registers used to pass values to function calls are checked against
   90 * function argument constraints.
   91 *
   92 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
   93 * It means that the register type passed to this function must be
   94 * PTR_TO_STACK and it will be used inside the function as
   95 * 'pointer to map element key'
   96 *
   97 * For example the argument constraints for bpf_map_lookup_elem():
   98 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
   99 *   .arg1_type = ARG_CONST_MAP_PTR,
  100 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
  101 *
  102 * ret_type says that this function returns 'pointer to map elem value or null'
  103 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
  104 * 2nd argument should be a pointer to stack, which will be used inside
  105 * the helper function as a pointer to map element key.
  106 *
  107 * On the kernel side the helper function looks like:
  108 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
  109 * {
  110 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
  111 *    void *key = (void *) (unsigned long) r2;
  112 *    void *value;
  113 *
  114 *    here kernel can access 'key' and 'map' pointers safely, knowing that
  115 *    [key, key + map->key_size) bytes are valid and were initialized on
  116 *    the stack of eBPF program.
  117 * }
  118 *
  119 * Corresponding eBPF program may look like:
  120 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
  121 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
  122 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
  123 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
  124 * here verifier looks at prototype of map_lookup_elem() and sees:
  125 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
  126 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
  127 *
  128 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
  129 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
  130 * and were initialized prior to this call.
  131 * If it's ok, then verifier allows this BPF_CALL insn and looks at
  132 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
  133 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
  134 * returns ether pointer to map value or NULL.
  135 *
  136 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
  137 * insn, the register holding that pointer in the true branch changes state to
  138 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
  139 * branch. See check_cond_jmp_op().
  140 *
  141 * After the call R0 is set to return type of the function and registers R1-R5
  142 * are set to NOT_INIT to indicate that they are no longer readable.
  143 *
  144 * The following reference types represent a potential reference to a kernel
  145 * resource which, after first being allocated, must be checked and freed by
  146 * the BPF program:
  147 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
  148 *
  149 * When the verifier sees a helper call return a reference type, it allocates a
  150 * pointer id for the reference and stores it in the current function state.
  151 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
  152 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
  153 * passes through a NULL-check conditional. For the branch wherein the state is
  154 * changed to CONST_IMM, the verifier releases the reference.
  155 *
  156 * For each helper function that allocates a reference, such as
  157 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
  158 * bpf_sk_release(). When a reference type passes into the release function,
  159 * the verifier also releases the reference. If any unchecked or unreleased
  160 * reference remains at the end of the program, the verifier rejects it.
  161 */
  162
  163/* verifier_state + insn_idx are pushed to stack when branch is encountered */
  164struct bpf_verifier_stack_elem {
  165	/* verifer state is 'st'
  166	 * before processing instruction 'insn_idx'
  167	 * and after processing instruction 'prev_insn_idx'
  168	 */
  169	struct bpf_verifier_state st;
  170	int insn_idx;
  171	int prev_insn_idx;
  172	struct bpf_verifier_stack_elem *next;
  173	/* length of verifier log at the time this state was pushed on stack */
  174	u32 log_pos;
  175};
  176
  177#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
  178#define BPF_COMPLEXITY_LIMIT_STATES	64
  179
  180#define BPF_MAP_KEY_POISON	(1ULL << 63)
  181#define BPF_MAP_KEY_SEEN	(1ULL << 62)
  182
  183#define BPF_MAP_PTR_UNPRIV	1UL
  184#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
  185					  POISON_POINTER_DELTA))
  186#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
  187
  188static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
  189{
  190	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
  191}
  192
  193static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
  194{
  195	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
  196}
  197
  198static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
  199			      const struct bpf_map *map, bool unpriv)
  200{
  201	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
  202	unpriv |= bpf_map_ptr_unpriv(aux);
  203	aux->map_ptr_state = (unsigned long)map |
  204			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
  205}
  206
  207static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
  208{
  209	return aux->map_key_state & BPF_MAP_KEY_POISON;
  210}
  211
  212static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
  213{
  214	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
  215}
  216
  217static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
  218{
  219	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
  220}
  221
  222static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
  223{
  224	bool poisoned = bpf_map_key_poisoned(aux);
  225
  226	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
  227			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
  228}
  229
  230struct bpf_call_arg_meta {
  231	struct bpf_map *map_ptr;
  232	bool raw_mode;
  233	bool pkt_access;
  234	int regno;
  235	int access_size;
  236	int mem_size;
  237	u64 msize_max_value;
  238	int ref_obj_id;
  239	int func_id;
  240	u32 btf_id;
  241};
  242
  243struct btf *btf_vmlinux;
  244
  245static DEFINE_MUTEX(bpf_verifier_lock);
  246
  247static const struct bpf_line_info *
  248find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
  249{
  250	const struct bpf_line_info *linfo;
  251	const struct bpf_prog *prog;
  252	u32 i, nr_linfo;
  253
  254	prog = env->prog;
  255	nr_linfo = prog->aux->nr_linfo;
  256
  257	if (!nr_linfo || insn_off >= prog->len)
  258		return NULL;
  259
  260	linfo = prog->aux->linfo;
  261	for (i = 1; i < nr_linfo; i++)
  262		if (insn_off < linfo[i].insn_off)
  263			break;
  264
  265	return &linfo[i - 1];
  266}
  267
  268void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
  269		       va_list args)
  270{
  271	unsigned int n;
  272
  273	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
  274
  275	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
  276		  "verifier log line truncated - local buffer too short\n");
  277
  278	n = min(log->len_total - log->len_used - 1, n);
  279	log->kbuf[n] = '\0';
  280
  281	if (log->level == BPF_LOG_KERNEL) {
  282		pr_err("BPF:%s\n", log->kbuf);
  283		return;
  284	}
  285	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
  286		log->len_used += n;
  287	else
  288		log->ubuf = NULL;
  289}
  290
  291static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
  292{
  293	char zero = 0;
  294
  295	if (!bpf_verifier_log_needed(log))
  296		return;
  297
  298	log->len_used = new_pos;
  299	if (put_user(zero, log->ubuf + new_pos))
  300		log->ubuf = NULL;
  301}
  302
  303/* log_level controls verbosity level of eBPF verifier.
  304 * bpf_verifier_log_write() is used to dump the verification trace to the log,
  305 * so the user can figure out what's wrong with the program
  306 */
  307__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
  308					   const char *fmt, ...)
  309{
  310	va_list args;
  311
  312	if (!bpf_verifier_log_needed(&env->log))
  313		return;
  314
  315	va_start(args, fmt);
  316	bpf_verifier_vlog(&env->log, fmt, args);
  317	va_end(args);
  318}
  319EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
  320
  321__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
  322{
  323	struct bpf_verifier_env *env = private_data;
  324	va_list args;
  325
  326	if (!bpf_verifier_log_needed(&env->log))
  327		return;
  328
  329	va_start(args, fmt);
  330	bpf_verifier_vlog(&env->log, fmt, args);
  331	va_end(args);
  332}
  333
  334__printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
  335			    const char *fmt, ...)
  336{
  337	va_list args;
  338
  339	if (!bpf_verifier_log_needed(log))
  340		return;
  341
  342	va_start(args, fmt);
  343	bpf_verifier_vlog(log, fmt, args);
  344	va_end(args);
  345}
  346
  347static const char *ltrim(const char *s)
  348{
  349	while (isspace(*s))
  350		s++;
  351
  352	return s;
  353}
  354
  355__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
  356					 u32 insn_off,
  357					 const char *prefix_fmt, ...)
  358{
  359	const struct bpf_line_info *linfo;
  360
  361	if (!bpf_verifier_log_needed(&env->log))
  362		return;
  363
  364	linfo = find_linfo(env, insn_off);
  365	if (!linfo || linfo == env->prev_linfo)
  366		return;
  367
  368	if (prefix_fmt) {
  369		va_list args;
  370
  371		va_start(args, prefix_fmt);
  372		bpf_verifier_vlog(&env->log, prefix_fmt, args);
  373		va_end(args);
  374	}
  375
  376	verbose(env, "%s\n",
  377		ltrim(btf_name_by_offset(env->prog->aux->btf,
  378					 linfo->line_off)));
  379
  380	env->prev_linfo = linfo;
  381}
  382
  383static bool type_is_pkt_pointer(enum bpf_reg_type type)
  384{
  385	return type == PTR_TO_PACKET ||
  386	       type == PTR_TO_PACKET_META;
  387}
  388
  389static bool type_is_sk_pointer(enum bpf_reg_type type)
  390{
  391	return type == PTR_TO_SOCKET ||
  392		type == PTR_TO_SOCK_COMMON ||
  393		type == PTR_TO_TCP_SOCK ||
  394		type == PTR_TO_XDP_SOCK;
  395}
  396
  397static bool reg_type_not_null(enum bpf_reg_type type)
  398{
  399	return type == PTR_TO_SOCKET ||
  400		type == PTR_TO_TCP_SOCK ||
  401		type == PTR_TO_MAP_VALUE ||
  402		type == PTR_TO_SOCK_COMMON;
  403}
  404
  405static bool reg_type_may_be_null(enum bpf_reg_type type)
  406{
  407	return type == PTR_TO_MAP_VALUE_OR_NULL ||
  408	       type == PTR_TO_SOCKET_OR_NULL ||
  409	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
  410	       type == PTR_TO_TCP_SOCK_OR_NULL ||
  411	       type == PTR_TO_BTF_ID_OR_NULL ||
  412	       type == PTR_TO_MEM_OR_NULL ||
  413	       type == PTR_TO_RDONLY_BUF_OR_NULL ||
  414	       type == PTR_TO_RDWR_BUF_OR_NULL;
  415}
  416
  417static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
  418{
  419	return reg->type == PTR_TO_MAP_VALUE &&
  420		map_value_has_spin_lock(reg->map_ptr);
  421}
  422
  423static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
  424{
  425	return type == PTR_TO_SOCKET ||
  426		type == PTR_TO_SOCKET_OR_NULL ||
  427		type == PTR_TO_TCP_SOCK ||
  428		type == PTR_TO_TCP_SOCK_OR_NULL ||
  429		type == PTR_TO_MEM ||
  430		type == PTR_TO_MEM_OR_NULL;
  431}
  432
  433static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
  434{
  435	return type == ARG_PTR_TO_SOCK_COMMON;
  436}
  437
  438/* Determine whether the function releases some resources allocated by another
  439 * function call. The first reference type argument will be assumed to be
  440 * released by release_reference().
  441 */
  442static bool is_release_function(enum bpf_func_id func_id)
  443{
  444	return func_id == BPF_FUNC_sk_release ||
  445	       func_id == BPF_FUNC_ringbuf_submit ||
  446	       func_id == BPF_FUNC_ringbuf_discard;
  447}
  448
  449static bool may_be_acquire_function(enum bpf_func_id func_id)
  450{
  451	return func_id == BPF_FUNC_sk_lookup_tcp ||
  452		func_id == BPF_FUNC_sk_lookup_udp ||
  453		func_id == BPF_FUNC_skc_lookup_tcp ||
  454		func_id == BPF_FUNC_map_lookup_elem ||
  455	        func_id == BPF_FUNC_ringbuf_reserve;
  456}
  457
  458static bool is_acquire_function(enum bpf_func_id func_id,
  459				const struct bpf_map *map)
  460{
  461	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
  462
  463	if (func_id == BPF_FUNC_sk_lookup_tcp ||
  464	    func_id == BPF_FUNC_sk_lookup_udp ||
  465	    func_id == BPF_FUNC_skc_lookup_tcp ||
  466	    func_id == BPF_FUNC_ringbuf_reserve)
  467		return true;
  468
  469	if (func_id == BPF_FUNC_map_lookup_elem &&
  470	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
  471	     map_type == BPF_MAP_TYPE_SOCKHASH))
  472		return true;
  473
  474	return false;
  475}
  476
  477static bool is_ptr_cast_function(enum bpf_func_id func_id)
  478{
  479	return func_id == BPF_FUNC_tcp_sock ||
  480		func_id == BPF_FUNC_sk_fullsock;
  481}
  482
  483/* string representation of 'enum bpf_reg_type' */
  484static const char * const reg_type_str[] = {
  485	[NOT_INIT]		= "?",
  486	[SCALAR_VALUE]		= "inv",
  487	[PTR_TO_CTX]		= "ctx",
  488	[CONST_PTR_TO_MAP]	= "map_ptr",
  489	[PTR_TO_MAP_VALUE]	= "map_value",
  490	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
  491	[PTR_TO_STACK]		= "fp",
  492	[PTR_TO_PACKET]		= "pkt",
  493	[PTR_TO_PACKET_META]	= "pkt_meta",
  494	[PTR_TO_PACKET_END]	= "pkt_end",
  495	[PTR_TO_FLOW_KEYS]	= "flow_keys",
  496	[PTR_TO_SOCKET]		= "sock",
  497	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
  498	[PTR_TO_SOCK_COMMON]	= "sock_common",
  499	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
  500	[PTR_TO_TCP_SOCK]	= "tcp_sock",
  501	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
  502	[PTR_TO_TP_BUFFER]	= "tp_buffer",
  503	[PTR_TO_XDP_SOCK]	= "xdp_sock",
  504	[PTR_TO_BTF_ID]		= "ptr_",
  505	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
  506	[PTR_TO_MEM]		= "mem",
  507	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
  508	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
  509	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
  510	[PTR_TO_RDWR_BUF]	= "rdwr_buf",
  511	[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
  512};
  513
  514static char slot_type_char[] = {
  515	[STACK_INVALID]	= '?',
  516	[STACK_SPILL]	= 'r',
  517	[STACK_MISC]	= 'm',
  518	[STACK_ZERO]	= '0',
  519};
  520
  521static void print_liveness(struct bpf_verifier_env *env,
  522			   enum bpf_reg_liveness live)
  523{
  524	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
  525	    verbose(env, "_");
  526	if (live & REG_LIVE_READ)
  527		verbose(env, "r");
  528	if (live & REG_LIVE_WRITTEN)
  529		verbose(env, "w");
  530	if (live & REG_LIVE_DONE)
  531		verbose(env, "D");
  532}
  533
  534static struct bpf_func_state *func(struct bpf_verifier_env *env,
  535				   const struct bpf_reg_state *reg)
  536{
  537	struct bpf_verifier_state *cur = env->cur_state;
  538
  539	return cur->frame[reg->frameno];
  540}
  541
  542const char *kernel_type_name(u32 id)
  543{
  544	return btf_name_by_offset(btf_vmlinux,
  545				  btf_type_by_id(btf_vmlinux, id)->name_off);
  546}
  547
  548static void print_verifier_state(struct bpf_verifier_env *env,
  549				 const struct bpf_func_state *state)
  550{
  551	const struct bpf_reg_state *reg;
  552	enum bpf_reg_type t;
  553	int i;
  554
  555	if (state->frameno)
  556		verbose(env, " frame%d:", state->frameno);
  557	for (i = 0; i < MAX_BPF_REG; i++) {
  558		reg = &state->regs[i];
  559		t = reg->type;
  560		if (t == NOT_INIT)
  561			continue;
  562		verbose(env, " R%d", i);
  563		print_liveness(env, reg->live);
  564		verbose(env, "=%s", reg_type_str[t]);
  565		if (t == SCALAR_VALUE && reg->precise)
  566			verbose(env, "P");
  567		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
  568		    tnum_is_const(reg->var_off)) {
  569			/* reg->off should be 0 for SCALAR_VALUE */
  570			verbose(env, "%lld", reg->var_off.value + reg->off);
  571		} else {
  572			if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
  573				verbose(env, "%s", kernel_type_name(reg->btf_id));
  574			verbose(env, "(id=%d", reg->id);
  575			if (reg_type_may_be_refcounted_or_null(t))
  576				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
  577			if (t != SCALAR_VALUE)
  578				verbose(env, ",off=%d", reg->off);
  579			if (type_is_pkt_pointer(t))
  580				verbose(env, ",r=%d", reg->range);
  581			else if (t == CONST_PTR_TO_MAP ||
  582				 t == PTR_TO_MAP_VALUE ||
  583				 t == PTR_TO_MAP_VALUE_OR_NULL)
  584				verbose(env, ",ks=%d,vs=%d",
  585					reg->map_ptr->key_size,
  586					reg->map_ptr->value_size);
  587			if (tnum_is_const(reg->var_off)) {
  588				/* Typically an immediate SCALAR_VALUE, but
  589				 * could be a pointer whose offset is too big
  590				 * for reg->off
  591				 */
  592				verbose(env, ",imm=%llx", reg->var_off.value);
  593			} else {
  594				if (reg->smin_value != reg->umin_value &&
  595				    reg->smin_value != S64_MIN)
  596					verbose(env, ",smin_value=%lld",
  597						(long long)reg->smin_value);
  598				if (reg->smax_value != reg->umax_value &&
  599				    reg->smax_value != S64_MAX)
  600					verbose(env, ",smax_value=%lld",
  601						(long long)reg->smax_value);
  602				if (reg->umin_value != 0)
  603					verbose(env, ",umin_value=%llu",
  604						(unsigned long long)reg->umin_value);
  605				if (reg->umax_value != U64_MAX)
  606					verbose(env, ",umax_value=%llu",
  607						(unsigned long long)reg->umax_value);
  608				if (!tnum_is_unknown(reg->var_off)) {
  609					char tn_buf[48];
  610
  611					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  612					verbose(env, ",var_off=%s", tn_buf);
  613				}
  614				if (reg->s32_min_value != reg->smin_value &&
  615				    reg->s32_min_value != S32_MIN)
  616					verbose(env, ",s32_min_value=%d",
  617						(int)(reg->s32_min_value));
  618				if (reg->s32_max_value != reg->smax_value &&
  619				    reg->s32_max_value != S32_MAX)
  620					verbose(env, ",s32_max_value=%d",
  621						(int)(reg->s32_max_value));
  622				if (reg->u32_min_value != reg->umin_value &&
  623				    reg->u32_min_value != U32_MIN)
  624					verbose(env, ",u32_min_value=%d",
  625						(int)(reg->u32_min_value));
  626				if (reg->u32_max_value != reg->umax_value &&
  627				    reg->u32_max_value != U32_MAX)
  628					verbose(env, ",u32_max_value=%d",
  629						(int)(reg->u32_max_value));
  630			}
  631			verbose(env, ")");
  632		}
  633	}
  634	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
  635		char types_buf[BPF_REG_SIZE + 1];
  636		bool valid = false;
  637		int j;
  638
  639		for (j = 0; j < BPF_REG_SIZE; j++) {
  640			if (state->stack[i].slot_type[j] != STACK_INVALID)
  641				valid = true;
  642			types_buf[j] = slot_type_char[
  643					state->stack[i].slot_type[j]];
  644		}
  645		types_buf[BPF_REG_SIZE] = 0;
  646		if (!valid)
  647			continue;
  648		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
  649		print_liveness(env, state->stack[i].spilled_ptr.live);
  650		if (state->stack[i].slot_type[0] == STACK_SPILL) {
  651			reg = &state->stack[i].spilled_ptr;
  652			t = reg->type;
  653			verbose(env, "=%s", reg_type_str[t]);
  654			if (t == SCALAR_VALUE && reg->precise)
  655				verbose(env, "P");
  656			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
  657				verbose(env, "%lld", reg->var_off.value + reg->off);
  658		} else {
  659			verbose(env, "=%s", types_buf);
  660		}
  661	}
  662	if (state->acquired_refs && state->refs[0].id) {
  663		verbose(env, " refs=%d", state->refs[0].id);
  664		for (i = 1; i < state->acquired_refs; i++)
  665			if (state->refs[i].id)
  666				verbose(env, ",%d", state->refs[i].id);
  667	}
  668	verbose(env, "\n");
  669}
  670
  671#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
  672static int copy_##NAME##_state(struct bpf_func_state *dst,		\
  673			       const struct bpf_func_state *src)	\
  674{									\
  675	if (!src->FIELD)						\
  676		return 0;						\
  677	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
  678		/* internal bug, make state invalid to reject the program */ \
  679		memset(dst, 0, sizeof(*dst));				\
  680		return -EFAULT;						\
  681	}								\
  682	memcpy(dst->FIELD, src->FIELD,					\
  683	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
  684	return 0;							\
  685}
  686/* copy_reference_state() */
  687COPY_STATE_FN(reference, acquired_refs, refs, 1)
  688/* copy_stack_state() */
  689COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
  690#undef COPY_STATE_FN
  691
  692#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
  693static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
  694				  bool copy_old)			\
  695{									\
  696	u32 old_size = state->COUNT;					\
  697	struct bpf_##NAME##_state *new_##FIELD;				\
  698	int slot = size / SIZE;						\
  699									\
  700	if (size <= old_size || !size) {				\
  701		if (copy_old)						\
  702			return 0;					\
  703		state->COUNT = slot * SIZE;				\
  704		if (!size && old_size) {				\
  705			kfree(state->FIELD);				\
  706			state->FIELD = NULL;				\
  707		}							\
  708		return 0;						\
  709	}								\
  710	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
  711				    GFP_KERNEL);			\
  712	if (!new_##FIELD)						\
  713		return -ENOMEM;						\
  714	if (copy_old) {							\
  715		if (state->FIELD)					\
  716			memcpy(new_##FIELD, state->FIELD,		\
  717			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
  718		memset(new_##FIELD + old_size / SIZE, 0,		\
  719		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
  720	}								\
  721	state->COUNT = slot * SIZE;					\
  722	kfree(state->FIELD);						\
  723	state->FIELD = new_##FIELD;					\
  724	return 0;							\
  725}
  726/* realloc_reference_state() */
  727REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
  728/* realloc_stack_state() */
  729REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
  730#undef REALLOC_STATE_FN
  731
  732/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
  733 * make it consume minimal amount of memory. check_stack_write() access from
  734 * the program calls into realloc_func_state() to grow the stack size.
  735 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
  736 * which realloc_stack_state() copies over. It points to previous
  737 * bpf_verifier_state which is never reallocated.
  738 */
  739static int realloc_func_state(struct bpf_func_state *state, int stack_size,
  740			      int refs_size, bool copy_old)
  741{
  742	int err = realloc_reference_state(state, refs_size, copy_old);
  743	if (err)
  744		return err;
  745	return realloc_stack_state(state, stack_size, copy_old);
  746}
  747
  748/* Acquire a pointer id from the env and update the state->refs to include
  749 * this new pointer reference.
  750 * On success, returns a valid pointer id to associate with the register
  751 * On failure, returns a negative errno.
  752 */
  753static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
  754{
  755	struct bpf_func_state *state = cur_func(env);
  756	int new_ofs = state->acquired_refs;
  757	int id, err;
  758
  759	err = realloc_reference_state(state, state->acquired_refs + 1, true);
  760	if (err)
  761		return err;
  762	id = ++env->id_gen;
  763	state->refs[new_ofs].id = id;
  764	state->refs[new_ofs].insn_idx = insn_idx;
  765
  766	return id;
  767}
  768
  769/* release function corresponding to acquire_reference_state(). Idempotent. */
  770static int release_reference_state(struct bpf_func_state *state, int ptr_id)
  771{
  772	int i, last_idx;
  773
  774	last_idx = state->acquired_refs - 1;
  775	for (i = 0; i < state->acquired_refs; i++) {
  776		if (state->refs[i].id == ptr_id) {
  777			if (last_idx && i != last_idx)
  778				memcpy(&state->refs[i], &state->refs[last_idx],
  779				       sizeof(*state->refs));
  780			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
  781			state->acquired_refs--;
  782			return 0;
  783		}
  784	}
  785	return -EINVAL;
  786}
  787
  788static int transfer_reference_state(struct bpf_func_state *dst,
  789				    struct bpf_func_state *src)
  790{
  791	int err = realloc_reference_state(dst, src->acquired_refs, false);
  792	if (err)
  793		return err;
  794	err = copy_reference_state(dst, src);
  795	if (err)
  796		return err;
  797	return 0;
  798}
  799
  800static void free_func_state(struct bpf_func_state *state)
  801{
  802	if (!state)
  803		return;
  804	kfree(state->refs);
  805	kfree(state->stack);
  806	kfree(state);
  807}
  808
  809static void clear_jmp_history(struct bpf_verifier_state *state)
  810{
  811	kfree(state->jmp_history);
  812	state->jmp_history = NULL;
  813	state->jmp_history_cnt = 0;
  814}
  815
  816static void free_verifier_state(struct bpf_verifier_state *state,
  817				bool free_self)
  818{
  819	int i;
  820
  821	for (i = 0; i <= state->curframe; i++) {
  822		free_func_state(state->frame[i]);
  823		state->frame[i] = NULL;
  824	}
  825	clear_jmp_history(state);
  826	if (free_self)
  827		kfree(state);
  828}
  829
  830/* copy verifier state from src to dst growing dst stack space
  831 * when necessary to accommodate larger src stack
  832 */
  833static int copy_func_state(struct bpf_func_state *dst,
  834			   const struct bpf_func_state *src)
  835{
  836	int err;
  837
  838	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
  839				 false);
  840	if (err)
  841		return err;
  842	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
  843	err = copy_reference_state(dst, src);
  844	if (err)
  845		return err;
  846	return copy_stack_state(dst, src);
  847}
  848
  849static int copy_verifier_state(struct bpf_verifier_state *dst_state,
  850			       const struct bpf_verifier_state *src)
  851{
  852	struct bpf_func_state *dst;
  853	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
  854	int i, err;
  855
  856	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
  857		kfree(dst_state->jmp_history);
  858		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
  859		if (!dst_state->jmp_history)
  860			return -ENOMEM;
  861	}
  862	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
  863	dst_state->jmp_history_cnt = src->jmp_history_cnt;
  864
  865	/* if dst has more stack frames then src frame, free them */
  866	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
  867		free_func_state(dst_state->frame[i]);
  868		dst_state->frame[i] = NULL;
  869	}
  870	dst_state->speculative = src->speculative;
  871	dst_state->curframe = src->curframe;
  872	dst_state->active_spin_lock = src->active_spin_lock;
  873	dst_state->branches = src->branches;
  874	dst_state->parent = src->parent;
  875	dst_state->first_insn_idx = src->first_insn_idx;
  876	dst_state->last_insn_idx = src->last_insn_idx;
  877	for (i = 0; i <= src->curframe; i++) {
  878		dst = dst_state->frame[i];
  879		if (!dst) {
  880			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
  881			if (!dst)
  882				return -ENOMEM;
  883			dst_state->frame[i] = dst;
  884		}
  885		err = copy_func_state(dst, src->frame[i]);
  886		if (err)
  887			return err;
  888	}
  889	return 0;
  890}
  891
  892static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
  893{
  894	while (st) {
  895		u32 br = --st->branches;
  896
  897		/* WARN_ON(br > 1) technically makes sense here,
  898		 * but see comment in push_stack(), hence:
  899		 */
  900		WARN_ONCE((int)br < 0,
  901			  "BUG update_branch_counts:branches_to_explore=%d\n",
  902			  br);
  903		if (br)
  904			break;
  905		st = st->parent;
  906	}
  907}
  908
  909static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
  910		     int *insn_idx, bool pop_log)
  911{
  912	struct bpf_verifier_state *cur = env->cur_state;
  913	struct bpf_verifier_stack_elem *elem, *head = env->head;
  914	int err;
  915
  916	if (env->head == NULL)
  917		return -ENOENT;
  918
  919	if (cur) {
  920		err = copy_verifier_state(cur, &head->st);
  921		if (err)
  922			return err;
  923	}
  924	if (pop_log)
  925		bpf_vlog_reset(&env->log, head->log_pos);
  926	if (insn_idx)
  927		*insn_idx = head->insn_idx;
  928	if (prev_insn_idx)
  929		*prev_insn_idx = head->prev_insn_idx;
  930	elem = head->next;
  931	free_verifier_state(&head->st, false);
  932	kfree(head);
  933	env->head = elem;
  934	env->stack_size--;
  935	return 0;
  936}
  937
  938static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
  939					     int insn_idx, int prev_insn_idx,
  940					     bool speculative)
  941{
  942	struct bpf_verifier_state *cur = env->cur_state;
  943	struct bpf_verifier_stack_elem *elem;
  944	int err;
  945
  946	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
  947	if (!elem)
  948		goto err;
  949
  950	elem->insn_idx = insn_idx;
  951	elem->prev_insn_idx = prev_insn_idx;
  952	elem->next = env->head;
  953	elem->log_pos = env->log.len_used;
  954	env->head = elem;
  955	env->stack_size++;
  956	err = copy_verifier_state(&elem->st, cur);
  957	if (err)
  958		goto err;
  959	elem->st.speculative |= speculative;
  960	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
  961		verbose(env, "The sequence of %d jumps is too complex.\n",
  962			env->stack_size);
  963		goto err;
  964	}
  965	if (elem->st.parent) {
  966		++elem->st.parent->branches;
  967		/* WARN_ON(branches > 2) technically makes sense here,
  968		 * but
  969		 * 1. speculative states will bump 'branches' for non-branch
  970		 * instructions
  971		 * 2. is_state_visited() heuristics may decide not to create
  972		 * a new state for a sequence of branches and all such current
  973		 * and cloned states will be pointing to a single parent state
  974		 * which might have large 'branches' count.
  975		 */
  976	}
  977	return &elem->st;
  978err:
  979	free_verifier_state(env->cur_state, true);
  980	env->cur_state = NULL;
  981	/* pop all elements and return */
  982	while (!pop_stack(env, NULL, NULL, false));
  983	return NULL;
  984}
  985
  986#define CALLER_SAVED_REGS 6
  987static const int caller_saved[CALLER_SAVED_REGS] = {
  988	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
  989};
  990
  991static void __mark_reg_not_init(const struct bpf_verifier_env *env,
  992				struct bpf_reg_state *reg);
  993
  994/* Mark the unknown part of a register (variable offset or scalar value) as
  995 * known to have the value @imm.
  996 */
  997static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
  998{
  999	/* Clear id, off, and union(map_ptr, range) */
 1000	memset(((u8 *)reg) + sizeof(reg->type), 0,
 1001	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
 1002	reg->var_off = tnum_const(imm);
 1003	reg->smin_value = (s64)imm;
 1004	reg->smax_value = (s64)imm;
 1005	reg->umin_value = imm;
 1006	reg->umax_value = imm;
 1007
 1008	reg->s32_min_value = (s32)imm;
 1009	reg->s32_max_value = (s32)imm;
 1010	reg->u32_min_value = (u32)imm;
 1011	reg->u32_max_value = (u32)imm;
 1012}
 1013
 1014static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
 1015{
 1016	reg->var_off = tnum_const_subreg(reg->var_off, imm);
 1017	reg->s32_min_value = (s32)imm;
 1018	reg->s32_max_value = (s32)imm;
 1019	reg->u32_min_value = (u32)imm;
 1020	reg->u32_max_value = (u32)imm;
 1021}
 1022
 1023/* Mark the 'variable offset' part of a register as zero.  This should be
 1024 * used only on registers holding a pointer type.
 1025 */
 1026static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 1027{
 1028	__mark_reg_known(reg, 0);
 1029}
 1030
 1031static void __mark_reg_const_zero(struct bpf_reg_state *reg)
 1032{
 1033	__mark_reg_known(reg, 0);
 1034	reg->type = SCALAR_VALUE;
 1035}
 1036
 1037static void mark_reg_known_zero(struct bpf_verifier_env *env,
 1038				struct bpf_reg_state *regs, u32 regno)
 1039{
 1040	if (WARN_ON(regno >= MAX_BPF_REG)) {
 1041		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 1042		/* Something bad happened, let's kill all regs */
 1043		for (regno = 0; regno < MAX_BPF_REG; regno++)
 1044			__mark_reg_not_init(env, regs + regno);
 1045		return;
 1046	}
 1047	__mark_reg_known_zero(regs + regno);
 1048}
 1049
 1050static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
 1051{
 1052	return type_is_pkt_pointer(reg->type);
 1053}
 1054
 1055static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
 1056{
 1057	return reg_is_pkt_pointer(reg) ||
 1058	       reg->type == PTR_TO_PACKET_END;
 1059}
 1060
 1061/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
 1062static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 1063				    enum bpf_reg_type which)
 1064{
 1065	/* The register can already have a range from prior markings.
 1066	 * This is fine as long as it hasn't been advanced from its
 1067	 * origin.
 1068	 */
 1069	return reg->type == which &&
 1070	       reg->id == 0 &&
 1071	       reg->off == 0 &&
 1072	       tnum_equals_const(reg->var_off, 0);
 1073}
 1074
 1075/* Reset the min/max bounds of a register */
 1076static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 1077{
 1078	reg->smin_value = S64_MIN;
 1079	reg->smax_value = S64_MAX;
 1080	reg->umin_value = 0;
 1081	reg->umax_value = U64_MAX;
 1082
 1083	reg->s32_min_value = S32_MIN;
 1084	reg->s32_max_value = S32_MAX;
 1085	reg->u32_min_value = 0;
 1086	reg->u32_max_value = U32_MAX;
 1087}
 1088
 1089static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
 1090{
 1091	reg->smin_value = S64_MIN;
 1092	reg->smax_value = S64_MAX;
 1093	reg->umin_value = 0;
 1094	reg->umax_value = U64_MAX;
 1095}
 1096
 1097static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
 1098{
 1099	reg->s32_min_value = S32_MIN;
 1100	reg->s32_max_value = S32_MAX;
 1101	reg->u32_min_value = 0;
 1102	reg->u32_max_value = U32_MAX;
 1103}
 1104
 1105static void __update_reg32_bounds(struct bpf_reg_state *reg)
 1106{
 1107	struct tnum var32_off = tnum_subreg(reg->var_off);
 1108
 1109	/* min signed is max(sign bit) | min(other bits) */
 1110	reg->s32_min_value = max_t(s32, reg->s32_min_value,
 1111			var32_off.value | (var32_off.mask & S32_MIN));
 1112	/* max signed is min(sign bit) | max(other bits) */
 1113	reg->s32_max_value = min_t(s32, reg->s32_max_value,
 1114			var32_off.value | (var32_off.mask & S32_MAX));
 1115	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
 1116	reg->u32_max_value = min(reg->u32_max_value,
 1117				 (u32)(var32_off.value | var32_off.mask));
 1118}
 1119
 1120static void __update_reg64_bounds(struct bpf_reg_state *reg)
 1121{
 1122	/* min signed is max(sign bit) | min(other bits) */
 1123	reg->smin_value = max_t(s64, reg->smin_value,
 1124				reg->var_off.value | (reg->var_off.mask & S64_MIN));
 1125	/* max signed is min(sign bit) | max(other bits) */
 1126	reg->smax_value = min_t(s64, reg->smax_value,
 1127				reg->var_off.value | (reg->var_off.mask & S64_MAX));
 1128	reg->umin_value = max(reg->umin_value, reg->var_off.value);
 1129	reg->umax_value = min(reg->umax_value,
 1130			      reg->var_off.value | reg->var_off.mask);
 1131}
 1132
 1133static void __update_reg_bounds(struct bpf_reg_state *reg)
 1134{
 1135	__update_reg32_bounds(reg);
 1136	__update_reg64_bounds(reg);
 1137}
 1138
 1139/* Uses signed min/max values to inform unsigned, and vice-versa */
 1140static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
 1141{
 1142	/* Learn sign from signed bounds.
 1143	 * If we cannot cross the sign boundary, then signed and unsigned bounds
 1144	 * are the same, so combine.  This works even in the negative case, e.g.
 1145	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 1146	 */
 1147	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
 1148		reg->s32_min_value = reg->u32_min_value =
 1149			max_t(u32, reg->s32_min_value, reg->u32_min_value);
 1150		reg->s32_max_value = reg->u32_max_value =
 1151			min_t(u32, reg->s32_max_value, reg->u32_max_value);
 1152		return;
 1153	}
 1154	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
 1155	 * boundary, so we must be careful.
 1156	 */
 1157	if ((s32)reg->u32_max_value >= 0) {
 1158		/* Positive.  We can't learn anything from the smin, but smax
 1159		 * is positive, hence safe.
 1160		 */
 1161		reg->s32_min_value = reg->u32_min_value;
 1162		reg->s32_max_value = reg->u32_max_value =
 1163			min_t(u32, reg->s32_max_value, reg->u32_max_value);
 1164	} else if ((s32)reg->u32_min_value < 0) {
 1165		/* Negative.  We can't learn anything from the smax, but smin
 1166		 * is negative, hence safe.
 1167		 */
 1168		reg->s32_min_value = reg->u32_min_value =
 1169			max_t(u32, reg->s32_min_value, reg->u32_min_value);
 1170		reg->s32_max_value = reg->u32_max_value;
 1171	}
 1172}
 1173
 1174static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
 1175{
 1176	/* Learn sign from signed bounds.
 1177	 * If we cannot cross the sign boundary, then signed and unsigned bounds
 1178	 * are the same, so combine.  This works even in the negative case, e.g.
 1179	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 1180	 */
 1181	if (reg->smin_value >= 0 || reg->smax_value < 0) {
 1182		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 1183							  reg->umin_value);
 1184		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 1185							  reg->umax_value);
 1186		return;
 1187	}
 1188	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
 1189	 * boundary, so we must be careful.
 1190	 */
 1191	if ((s64)reg->umax_value >= 0) {
 1192		/* Positive.  We can't learn anything from the smin, but smax
 1193		 * is positive, hence safe.
 1194		 */
 1195		reg->smin_value = reg->umin_value;
 1196		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 1197							  reg->umax_value);
 1198	} else if ((s64)reg->umin_value < 0) {
 1199		/* Negative.  We can't learn anything from the smax, but smin
 1200		 * is negative, hence safe.
 1201		 */
 1202		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 1203							  reg->umin_value);
 1204		reg->smax_value = reg->umax_value;
 1205	}
 1206}
 1207
 1208static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 1209{
 1210	__reg32_deduce_bounds(reg);
 1211	__reg64_deduce_bounds(reg);
 1212}
 1213
 1214/* Attempts to improve var_off based on unsigned min/max information */
 1215static void __reg_bound_offset(struct bpf_reg_state *reg)
 1216{
 1217	struct tnum var64_off = tnum_intersect(reg->var_off,
 1218					       tnum_range(reg->umin_value,
 1219							  reg->umax_value));
 1220	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
 1221						tnum_range(reg->u32_min_value,
 1222							   reg->u32_max_value));
 1223
 1224	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
 1225}
 1226
 1227static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
 1228{
 1229	reg->umin_value = reg->u32_min_value;
 1230	reg->umax_value = reg->u32_max_value;
 1231	/* Attempt to pull 32-bit signed bounds into 64-bit bounds
 1232	 * but must be positive otherwise set to worse case bounds
 1233	 * and refine later from tnum.
 1234	 */
 1235	if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
 1236		reg->smax_value = reg->s32_max_value;
 1237	else
 1238		reg->smax_value = U32_MAX;
 1239	if (reg->s32_min_value >= 0)
 1240		reg->smin_value = reg->s32_min_value;
 1241	else
 1242		reg->smin_value = 0;
 1243}
 1244
 1245static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
 1246{
 1247	/* special case when 64-bit register has upper 32-bit register
 1248	 * zeroed. Typically happens after zext or <<32, >>32 sequence
 1249	 * allowing us to use 32-bit bounds directly,
 1250	 */
 1251	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
 1252		__reg_assign_32_into_64(reg);
 1253	} else {
 1254		/* Otherwise the best we can do is push lower 32bit known and
 1255		 * unknown bits into register (var_off set from jmp logic)
 1256		 * then learn as much as possible from the 64-bit tnum
 1257		 * known and unknown bits. The previous smin/smax bounds are
 1258		 * invalid here because of jmp32 compare so mark them unknown
 1259		 * so they do not impact tnum bounds calculation.
 1260		 */
 1261		__mark_reg64_unbounded(reg);
 1262		__update_reg_bounds(reg);
 1263	}
 1264
 1265	/* Intersecting with the old var_off might have improved our bounds
 1266	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
 1267	 * then new var_off is (0; 0x7f...fc) which improves our umax.
 1268	 */
 1269	__reg_deduce_bounds(reg);
 1270	__reg_bound_offset(reg);
 1271	__update_reg_bounds(reg);
 1272}
 1273
 1274static bool __reg64_bound_s32(s64 a)
 1275{
 1276	if (a > S32_MIN && a < S32_MAX)
 1277		return true;
 1278	return false;
 1279}
 1280
 1281static bool __reg64_bound_u32(u64 a)
 1282{
 1283	if (a > U32_MIN && a < U32_MAX)
 1284		return true;
 1285	return false;
 1286}
 1287
 1288static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
 1289{
 1290	__mark_reg32_unbounded(reg);
 1291
 1292	if (__reg64_bound_s32(reg->smin_value))
 1293		reg->s32_min_value = (s32)reg->smin_value;
 1294	if (__reg64_bound_s32(reg->smax_value))
 1295		reg->s32_max_value = (s32)reg->smax_value;
 1296	if (__reg64_bound_u32(reg->umin_value))
 1297		reg->u32_min_value = (u32)reg->umin_value;
 1298	if (__reg64_bound_u32(reg->umax_value))
 1299		reg->u32_max_value = (u32)reg->umax_value;
 1300
 1301	/* Intersecting with the old var_off might have improved our bounds
 1302	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
 1303	 * then new var_off is (0; 0x7f...fc) which improves our umax.
 1304	 */
 1305	__reg_deduce_bounds(reg);
 1306	__reg_bound_offset(reg);
 1307	__update_reg_bounds(reg);
 1308}
 1309
 1310/* Mark a register as having a completely unknown (scalar) value. */
 1311static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 1312			       struct bpf_reg_state *reg)
 1313{
 1314	/*
 1315	 * Clear type, id, off, and union(map_ptr, range) and
 1316	 * padding between 'type' and union
 1317	 */
 1318	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
 1319	reg->type = SCALAR_VALUE;
 1320	reg->var_off = tnum_unknown;
 1321	reg->frameno = 0;
 1322	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
 1323	__mark_reg_unbounded(reg);
 1324}
 1325
 1326static void mark_reg_unknown(struct bpf_verifier_env *env,
 1327			     struct bpf_reg_state *regs, u32 regno)
 1328{
 1329	if (WARN_ON(regno >= MAX_BPF_REG)) {
 1330		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
 1331		/* Something bad happened, let's kill all regs except FP */
 1332		for (regno = 0; regno < BPF_REG_FP; regno++)
 1333			__mark_reg_not_init(env, regs + regno);
 1334		return;
 1335	}
 1336	__mark_reg_unknown(env, regs + regno);
 1337}
 1338
 1339static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 1340				struct bpf_reg_state *reg)
 1341{
 1342	__mark_reg_unknown(env, reg);
 1343	reg->type = NOT_INIT;
 1344}
 1345
 1346static void mark_reg_not_init(struct bpf_verifier_env *env,
 1347			      struct bpf_reg_state *regs, u32 regno)
 1348{
 1349	if (WARN_ON(regno >= MAX_BPF_REG)) {
 1350		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
 1351		/* Something bad happened, let's kill all regs except FP */
 1352		for (regno = 0; regno < BPF_REG_FP; regno++)
 1353			__mark_reg_not_init(env, regs + regno);
 1354		return;
 1355	}
 1356	__mark_reg_not_init(env, regs + regno);
 1357}
 1358
 1359static void mark_btf_ld_reg(struct bpf_verifier_env *env,
 1360			    struct bpf_reg_state *regs, u32 regno,
 1361			    enum bpf_reg_type reg_type, u32 btf_id)
 1362{
 1363	if (reg_type == SCALAR_VALUE) {
 1364		mark_reg_unknown(env, regs, regno);
 1365		return;
 1366	}
 1367	mark_reg_known_zero(env, regs, regno);
 1368	regs[regno].type = PTR_TO_BTF_ID;
 1369	regs[regno].btf_id = btf_id;
 1370}
 1371
 1372#define DEF_NOT_SUBREG	(0)
 1373static void init_reg_state(struct bpf_verifier_env *env,
 1374			   struct bpf_func_state *state)
 1375{
 1376	struct bpf_reg_state *regs = state->regs;
 1377	int i;
 1378
 1379	for (i = 0; i < MAX_BPF_REG; i++) {
 1380		mark_reg_not_init(env, regs, i);
 1381		regs[i].live = REG_LIVE_NONE;
 1382		regs[i].parent = NULL;
 1383		regs[i].subreg_def = DEF_NOT_SUBREG;
 1384	}
 1385
 1386	/* frame pointer */
 1387	regs[BPF_REG_FP].type = PTR_TO_STACK;
 1388	mark_reg_known_zero(env, regs, BPF_REG_FP);
 1389	regs[BPF_REG_FP].frameno = state->frameno;
 1390}
 1391
 1392#define BPF_MAIN_FUNC (-1)
 1393static void init_func_state(struct bpf_verifier_env *env,
 1394			    struct bpf_func_state *state,
 1395			    int callsite, int frameno, int subprogno)
 1396{
 1397	state->callsite = callsite;
 1398	state->frameno = frameno;
 1399	state->subprogno = subprogno;
 1400	init_reg_state(env, state);
 1401}
 1402
 1403enum reg_arg_type {
 1404	SRC_OP,		/* register is used as source operand */
 1405	DST_OP,		/* register is used as destination operand */
 1406	DST_OP_NO_MARK	/* same as above, check only, don't mark */
 1407};
 1408
 1409static int cmp_subprogs(const void *a, const void *b)
 1410{
 1411	return ((struct bpf_subprog_info *)a)->start -
 1412	       ((struct bpf_subprog_info *)b)->start;
 1413}
 1414
 1415static int find_subprog(struct bpf_verifier_env *env, int off)
 1416{
 1417	struct bpf_subprog_info *p;
 1418
 1419	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
 1420		    sizeof(env->subprog_info[0]), cmp_subprogs);
 1421	if (!p)
 1422		return -ENOENT;
 1423	return p - env->subprog_info;
 1424
 1425}
 1426
 1427static int add_subprog(struct bpf_verifier_env *env, int off)
 1428{
 1429	int insn_cnt = env->prog->len;
 1430	int ret;
 1431
 1432	if (off >= insn_cnt || off < 0) {
 1433		verbose(env, "call to invalid destination\n");
 1434		return -EINVAL;
 1435	}
 1436	ret = find_subprog(env, off);
 1437	if (ret >= 0)
 1438		return 0;
 1439	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
 1440		verbose(env, "too many subprograms\n");
 1441		return -E2BIG;
 1442	}
 1443	env->subprog_info[env->subprog_cnt++].start = off;
 1444	sort(env->subprog_info, env->subprog_cnt,
 1445	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
 1446	return 0;
 1447}
 1448
 1449static int check_subprogs(struct bpf_verifier_env *env)
 1450{
 1451	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
 1452	struct bpf_subprog_info *subprog = env->subprog_info;
 1453	struct bpf_insn *insn = env->prog->insnsi;
 1454	int insn_cnt = env->prog->len;
 1455
 1456	/* Add entry function. */
 1457	ret = add_subprog(env, 0);
 1458	if (ret < 0)
 1459		return ret;
 1460
 1461	/* determine subprog starts. The end is one before the next starts */
 1462	for (i = 0; i < insn_cnt; i++) {
 1463		if (insn[i].code != (BPF_JMP | BPF_CALL))
 1464			continue;
 1465		if (insn[i].src_reg != BPF_PSEUDO_CALL)
 1466			continue;
 1467		if (!env->bpf_capable) {
 1468			verbose(env,
 1469				"function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
 1470			return -EPERM;
 1471		}
 1472		ret = add_subprog(env, i + insn[i].imm + 1);
 1473		if (ret < 0)
 1474			return ret;
 1475	}
 1476
 1477	/* Add a fake 'exit' subprog which could simplify subprog iteration
 1478	 * logic. 'subprog_cnt' should not be increased.
 1479	 */
 1480	subprog[env->subprog_cnt].start = insn_cnt;
 1481
 1482	if (env->log.level & BPF_LOG_LEVEL2)
 1483		for (i = 0; i < env->subprog_cnt; i++)
 1484			verbose(env, "func#%d @%d\n", i, subprog[i].start);
 1485
 1486	/* now check that all jumps are within the same subprog */
 1487	subprog_start = subprog[cur_subprog].start;
 1488	subprog_end = subprog[cur_subprog + 1].start;
 1489	for (i = 0; i < insn_cnt; i++) {
 1490		u8 code = insn[i].code;
 1491
 1492		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
 1493			goto next;
 1494		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
 1495			goto next;
 1496		off = i + insn[i].off + 1;
 1497		if (off < subprog_start || off >= subprog_end) {
 1498			verbose(env, "jump out of range from insn %d to %d\n", i, off);
 1499			return -EINVAL;
 1500		}
 1501next:
 1502		if (i == subprog_end - 1) {
 1503			/* to avoid fall-through from one subprog into another
 1504			 * the last insn of the subprog should be either exit
 1505			 * or unconditional jump back
 1506			 */
 1507			if (code != (BPF_JMP | BPF_EXIT) &&
 1508			    code != (BPF_JMP | BPF_JA)) {
 1509				verbose(env, "last insn is not an exit or jmp\n");
 1510				return -EINVAL;
 1511			}
 1512			subprog_start = subprog_end;
 1513			cur_subprog++;
 1514			if (cur_subprog < env->subprog_cnt)
 1515				subprog_end = subprog[cur_subprog + 1].start;
 1516		}
 1517	}
 1518	return 0;
 1519}
 1520
 1521/* Parentage chain of this register (or stack slot) should take care of all
 1522 * issues like callee-saved registers, stack slot allocation time, etc.
 1523 */
 1524static int mark_reg_read(struct bpf_verifier_env *env,
 1525			 const struct bpf_reg_state *state,
 1526			 struct bpf_reg_state *parent, u8 flag)
 1527{
 1528	bool writes = parent == state->parent; /* Observe write marks */
 1529	int cnt = 0;
 1530
 1531	while (parent) {
 1532		/* if read wasn't screened by an earlier write ... */
 1533		if (writes && state->live & REG_LIVE_WRITTEN)
 1534			break;
 1535		if (parent->live & REG_LIVE_DONE) {
 1536			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
 1537				reg_type_str[parent->type],
 1538				parent->var_off.value, parent->off);
 1539			return -EFAULT;
 1540		}
 1541		/* The first condition is more likely to be true than the
 1542		 * second, checked it first.
 1543		 */
 1544		if ((parent->live & REG_LIVE_READ) == flag ||
 1545		    parent->live & REG_LIVE_READ64)
 1546			/* The parentage chain never changes and
 1547			 * this parent was already marked as LIVE_READ.
 1548			 * There is no need to keep walking the chain again and
 1549			 * keep re-marking all parents as LIVE_READ.
 1550			 * This case happens when the same register is read
 1551			 * multiple times without writes into it in-between.
 1552			 * Also, if parent has the stronger REG_LIVE_READ64 set,
 1553			 * then no need to set the weak REG_LIVE_READ32.
 1554			 */
 1555			break;
 1556		/* ... then we depend on parent's value */
 1557		parent->live |= flag;
 1558		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
 1559		if (flag == REG_LIVE_READ64)
 1560			parent->live &= ~REG_LIVE_READ32;
 1561		state = parent;
 1562		parent = state->parent;
 1563		writes = true;
 1564		cnt++;
 1565	}
 1566
 1567	if (env->longest_mark_read_walk < cnt)
 1568		env->longest_mark_read_walk = cnt;
 1569	return 0;
 1570}
 1571
 1572/* This function is supposed to be used by the following 32-bit optimization
 1573 * code only. It returns TRUE if the source or destination register operates
 1574 * on 64-bit, otherwise return FALSE.
 1575 */
 1576static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
 1577		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
 1578{
 1579	u8 code, class, op;
 1580
 1581	code = insn->code;
 1582	class = BPF_CLASS(code);
 1583	op = BPF_OP(code);
 1584	if (class == BPF_JMP) {
 1585		/* BPF_EXIT for "main" will reach here. Return TRUE
 1586		 * conservatively.
 1587		 */
 1588		if (op == BPF_EXIT)
 1589			return true;
 1590		if (op == BPF_CALL) {
 1591			/* BPF to BPF call will reach here because of marking
 1592			 * caller saved clobber with DST_OP_NO_MARK for which we
 1593			 * don't care the register def because they are anyway
 1594			 * marked as NOT_INIT already.
 1595			 */
 1596			if (insn->src_reg == BPF_PSEUDO_CALL)
 1597				return false;
 1598			/* Helper call will reach here because of arg type
 1599			 * check, conservatively return TRUE.
 1600			 */
 1601			if (t == SRC_OP)
 1602				return true;
 1603
 1604			return false;
 1605		}
 1606	}
 1607
 1608	if (class == BPF_ALU64 || class == BPF_JMP ||
 1609	    /* BPF_END always use BPF_ALU class. */
 1610	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
 1611		return true;
 1612
 1613	if (class == BPF_ALU || class == BPF_JMP32)
 1614		return false;
 1615
 1616	if (class == BPF_LDX) {
 1617		if (t != SRC_OP)
 1618			return BPF_SIZE(code) == BPF_DW;
 1619		/* LDX source must be ptr. */
 1620		return true;
 1621	}
 1622
 1623	if (class == BPF_STX) {
 1624		if (reg->type != SCALAR_VALUE)
 1625			return true;
 1626		return BPF_SIZE(code) == BPF_DW;
 1627	}
 1628
 1629	if (class == BPF_LD) {
 1630		u8 mode = BPF_MODE(code);
 1631
 1632		/* LD_IMM64 */
 1633		if (mode == BPF_IMM)
 1634			return true;
 1635
 1636		/* Both LD_IND and LD_ABS return 32-bit data. */
 1637		if (t != SRC_OP)
 1638			return  false;
 1639
 1640		/* Implicit ctx ptr. */
 1641		if (regno == BPF_REG_6)
 1642			return true;
 1643
 1644		/* Explicit source could be any width. */
 1645		return true;
 1646	}
 1647
 1648	if (class == BPF_ST)
 1649		/* The only source register for BPF_ST is a ptr. */
 1650		return true;
 1651
 1652	/* Conservatively return true at default. */
 1653	return true;
 1654}
 1655
 1656/* Return TRUE if INSN doesn't have explicit value define. */
 1657static bool insn_no_def(struct bpf_insn *insn)
 1658{
 1659	u8 class = BPF_CLASS(insn->code);
 1660
 1661	return (class == BPF_JMP || class == BPF_JMP32 ||
 1662		class == BPF_STX || class == BPF_ST);
 1663}
 1664
 1665/* Return TRUE if INSN has defined any 32-bit value explicitly. */
 1666static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
 1667{
 1668	if (insn_no_def(insn))
 1669		return false;
 1670
 1671	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
 1672}
 1673
 1674static void mark_insn_zext(struct bpf_verifier_env *env,
 1675			   struct bpf_reg_state *reg)
 1676{
 1677	s32 def_idx = reg->subreg_def;
 1678
 1679	if (def_idx == DEF_NOT_SUBREG)
 1680		return;
 1681
 1682	env->insn_aux_data[def_idx - 1].zext_dst = true;
 1683	/* The dst will be zero extended, so won't be sub-register anymore. */
 1684	reg->subreg_def = DEF_NOT_SUBREG;
 1685}
 1686
 1687static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
 1688			 enum reg_arg_type t)
 1689{
 1690	struct bpf_verifier_state *vstate = env->cur_state;
 1691	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 1692	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
 1693	struct bpf_reg_state *reg, *regs = state->regs;
 1694	bool rw64;
 1695
 1696	if (regno >= MAX_BPF_REG) {
 1697		verbose(env, "R%d is invalid\n", regno);
 1698		return -EINVAL;
 1699	}
 1700
 1701	reg = &regs[regno];
 1702	rw64 = is_reg64(env, insn, regno, reg, t);
 1703	if (t == SRC_OP) {
 1704		/* check whether register used as source operand can be read */
 1705		if (reg->type == NOT_INIT) {
 1706			verbose(env, "R%d !read_ok\n", regno);
 1707			return -EACCES;
 1708		}
 1709		/* We don't need to worry about FP liveness because it's read-only */
 1710		if (regno == BPF_REG_FP)
 1711			return 0;
 1712
 1713		if (rw64)
 1714			mark_insn_zext(env, reg);
 1715
 1716		return mark_reg_read(env, reg, reg->parent,
 1717				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
 1718	} else {
 1719		/* check whether register used as dest operand can be written to */
 1720		if (regno == BPF_REG_FP) {
 1721			verbose(env, "frame pointer is read only\n");
 1722			return -EACCES;
 1723		}
 1724		reg->live |= REG_LIVE_WRITTEN;
 1725		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
 1726		if (t == DST_OP)
 1727			mark_reg_unknown(env, regs, regno);
 1728	}
 1729	return 0;
 1730}
 1731
 1732/* for any branch, call, exit record the history of jmps in the given state */
 1733static int push_jmp_history(struct bpf_verifier_env *env,
 1734			    struct bpf_verifier_state *cur)
 1735{
 1736	u32 cnt = cur->jmp_history_cnt;
 1737	struct bpf_idx_pair *p;
 1738
 1739	cnt++;
 1740	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
 1741	if (!p)
 1742		return -ENOMEM;
 1743	p[cnt - 1].idx = env->insn_idx;
 1744	p[cnt - 1].prev_idx = env->prev_insn_idx;
 1745	cur->jmp_history = p;
 1746	cur->jmp_history_cnt = cnt;
 1747	return 0;
 1748}
 1749
 1750/* Backtrack one insn at a time. If idx is not at the top of recorded
 1751 * history then previous instruction came from straight line execution.
 1752 */
 1753static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
 1754			     u32 *history)
 1755{
 1756	u32 cnt = *history;
 1757
 1758	if (cnt && st->jmp_history[cnt - 1].idx == i) {
 1759		i = st->jmp_history[cnt - 1].prev_idx;
 1760		(*history)--;
 1761	} else {
 1762		i--;
 1763	}
 1764	return i;
 1765}
 1766
 1767/* For given verifier state backtrack_insn() is called from the last insn to
 1768 * the first insn. Its purpose is to compute a bitmask of registers and
 1769 * stack slots that needs precision in the parent verifier state.
 1770 */
 1771static int backtrack_insn(struct bpf_verifier_env *env, int idx,
 1772			  u32 *reg_mask, u64 *stack_mask)
 1773{
 1774	const struct bpf_insn_cbs cbs = {
 1775		.cb_print	= verbose,
 1776		.private_data	= env,
 1777	};
 1778	struct bpf_insn *insn = env->prog->insnsi + idx;
 1779	u8 class = BPF_CLASS(insn->code);
 1780	u8 opcode = BPF_OP(insn->code);
 1781	u8 mode = BPF_MODE(insn->code);
 1782	u32 dreg = 1u << insn->dst_reg;
 1783	u32 sreg = 1u << insn->src_reg;
 1784	u32 spi;
 1785
 1786	if (insn->code == 0)
 1787		return 0;
 1788	if (env->log.level & BPF_LOG_LEVEL) {
 1789		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
 1790		verbose(env, "%d: ", idx);
 1791		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 1792	}
 1793
 1794	if (class == BPF_ALU || class == BPF_ALU64) {
 1795		if (!(*reg_mask & dreg))
 1796			return 0;
 1797		if (opcode == BPF_MOV) {
 1798			if (BPF_SRC(insn->code) == BPF_X) {
 1799				/* dreg = sreg
 1800				 * dreg needs precision after this insn
 1801				 * sreg needs precision before this insn
 1802				 */
 1803				*reg_mask &= ~dreg;
 1804				*reg_mask |= sreg;
 1805			} else {
 1806				/* dreg = K
 1807				 * dreg needs precision after this insn.
 1808				 * Corresponding register is already marked
 1809				 * as precise=true in this verifier state.
 1810				 * No further markings in parent are necessary
 1811				 */
 1812				*reg_mask &= ~dreg;
 1813			}
 1814		} else {
 1815			if (BPF_SRC(insn->code) == BPF_X) {
 1816				/* dreg += sreg
 1817				 * both dreg and sreg need precision
 1818				 * before this insn
 1819				 */
 1820				*reg_mask |= sreg;
 1821			} /* else dreg += K
 1822			   * dreg still needs precision before this insn
 1823			   */
 1824		}
 1825	} else if (class == BPF_LDX) {
 1826		if (!(*reg_mask & dreg))
 1827			return 0;
 1828		*reg_mask &= ~dreg;
 1829
 1830		/* scalars can only be spilled into stack w/o losing precision.
 1831		 * Load from any other memory can be zero extended.
 1832		 * The desire to keep that precision is already indicated
 1833		 * by 'precise' mark in corresponding register of this state.
 1834		 * No further tracking necessary.
 1835		 */
 1836		if (insn->src_reg != BPF_REG_FP)
 1837			return 0;
 1838		if (BPF_SIZE(insn->code) != BPF_DW)
 1839			return 0;
 1840
 1841		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
 1842		 * that [fp - off] slot contains scalar that needs to be
 1843		 * tracked with precision
 1844		 */
 1845		spi = (-insn->off - 1) / BPF_REG_SIZE;
 1846		if (spi >= 64) {
 1847			verbose(env, "BUG spi %d\n", spi);
 1848			WARN_ONCE(1, "verifier backtracking bug");
 1849			return -EFAULT;
 1850		}
 1851		*stack_mask |= 1ull << spi;
 1852	} else if (class == BPF_STX || class == BPF_ST) {
 1853		if (*reg_mask & dreg)
 1854			/* stx & st shouldn't be using _scalar_ dst_reg
 1855			 * to access memory. It means backtracking
 1856			 * encountered a case of pointer subtraction.
 1857			 */
 1858			return -ENOTSUPP;
 1859		/* scalars can only be spilled into stack */
 1860		if (insn->dst_reg != BPF_REG_FP)
 1861			return 0;
 1862		if (BPF_SIZE(insn->code) != BPF_DW)
 1863			return 0;
 1864		spi = (-insn->off - 1) / BPF_REG_SIZE;
 1865		if (spi >= 64) {
 1866			verbose(env, "BUG spi %d\n", spi);
 1867			WARN_ONCE(1, "verifier backtracking bug");
 1868			return -EFAULT;
 1869		}
 1870		if (!(*stack_mask & (1ull << spi)))
 1871			return 0;
 1872		*stack_mask &= ~(1ull << spi);
 1873		if (class == BPF_STX)
 1874			*reg_mask |= sreg;
 1875	} else if (class == BPF_JMP || class == BPF_JMP32) {
 1876		if (opcode == BPF_CALL) {
 1877			if (insn->src_reg == BPF_PSEUDO_CALL)
 1878				return -ENOTSUPP;
 1879			/* regular helper call sets R0 */
 1880			*reg_mask &= ~1;
 1881			if (*reg_mask & 0x3f) {
 1882				/* if backtracing was looking for registers R1-R5
 1883				 * they should have been found already.
 1884				 */
 1885				verbose(env, "BUG regs %x\n", *reg_mask);
 1886				WARN_ONCE(1, "verifier backtracking bug");
 1887				return -EFAULT;
 1888			}
 1889		} else if (opcode == BPF_EXIT) {
 1890			return -ENOTSUPP;
 1891		}
 1892	} else if (class == BPF_LD) {
 1893		if (!(*reg_mask & dreg))
 1894			return 0;
 1895		*reg_mask &= ~dreg;
 1896		/* It's ld_imm64 or ld_abs or ld_ind.
 1897		 * For ld_imm64 no further tracking of precision
 1898		 * into parent is necessary
 1899		 */
 1900		if (mode == BPF_IND || mode == BPF_ABS)
 1901			/* to be analyzed */
 1902			return -ENOTSUPP;
 1903	}
 1904	return 0;
 1905}
 1906
 1907/* the scalar precision tracking algorithm:
 1908 * . at the start all registers have precise=false.
 1909 * . scalar ranges are tracked as normal through alu and jmp insns.
 1910 * . once precise value of the scalar register is used in:
 1911 *   .  ptr + scalar alu
 1912 *   . if (scalar cond K|scalar)
 1913 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
 1914 *   backtrack through the verifier states and mark all registers and
 1915 *   stack slots with spilled constants that these scalar regisers
 1916 *   should be precise.
 1917 * . during state pruning two registers (or spilled stack slots)
 1918 *   are equivalent if both are not precise.
 1919 *
 1920 * Note the verifier cannot simply walk register parentage chain,
 1921 * since many different registers and stack slots could have been
 1922 * used to compute single precise scalar.
 1923 *
 1924 * The approach of starting with precise=true for all registers and then
 1925 * backtrack to mark a register as not precise when the verifier detects
 1926 * that program doesn't care about specific value (e.g., when helper
 1927 * takes register as ARG_ANYTHING parameter) is not safe.
 1928 *
 1929 * It's ok to walk single parentage chain of the verifier states.
 1930 * It's possible that this backtracking will go all the way till 1st insn.
 1931 * All other branches will be explored for needing precision later.
 1932 *
 1933 * The backtracking needs to deal with cases like:
 1934 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
 1935 * r9 -= r8
 1936 * r5 = r9
 1937 * if r5 > 0x79f goto pc+7
 1938 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
 1939 * r5 += 1
 1940 * ...
 1941 * call bpf_perf_event_output#25
 1942 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
 1943 *
 1944 * and this case:
 1945 * r6 = 1
 1946 * call foo // uses callee's r6 inside to compute r0
 1947 * r0 += r6
 1948 * if r0 == 0 goto
 1949 *
 1950 * to track above reg_mask/stack_mask needs to be independent for each frame.
 1951 *
 1952 * Also if parent's curframe > frame where backtracking started,
 1953 * the verifier need to mark registers in both frames, otherwise callees
 1954 * may incorrectly prune callers. This is similar to
 1955 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
 1956 *
 1957 * For now backtracking falls back into conservative marking.
 1958 */
 1959static void mark_all_scalars_precise(struct bpf_verifier_env *env,
 1960				     struct bpf_verifier_state *st)
 1961{
 1962	struct bpf_func_state *func;
 1963	struct bpf_reg_state *reg;
 1964	int i, j;
 1965
 1966	/* big hammer: mark all scalars precise in this path.
 1967	 * pop_stack may still get !precise scalars.
 1968	 */
 1969	for (; st; st = st->parent)
 1970		for (i = 0; i <= st->curframe; i++) {
 1971			func = st->frame[i];
 1972			for (j = 0; j < BPF_REG_FP; j++) {
 1973				reg = &func->regs[j];
 1974				if (reg->type != SCALAR_VALUE)
 1975					continue;
 1976				reg->precise = true;
 1977			}
 1978			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
 1979				if (func->stack[j].slot_type[0] != STACK_SPILL)
 1980					continue;
 1981				reg = &func->stack[j].spilled_ptr;
 1982				if (reg->type != SCALAR_VALUE)
 1983					continue;
 1984				reg->precise = true;
 1985			}
 1986		}
 1987}
 1988
 1989static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
 1990				  int spi)
 1991{
 1992	struct bpf_verifier_state *st = env->cur_state;
 1993	int first_idx = st->first_insn_idx;
 1994	int last_idx = env->insn_idx;
 1995	struct bpf_func_state *func;
 1996	struct bpf_reg_state *reg;
 1997	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
 1998	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
 1999	bool skip_first = true;
 2000	bool new_marks = false;
 2001	int i, err;
 2002
 2003	if (!env->bpf_capable)
 2004		return 0;
 2005
 2006	func = st->frame[st->curframe];
 2007	if (regno >= 0) {
 2008		reg = &func->regs[regno];
 2009		if (reg->type != SCALAR_VALUE) {
 2010			WARN_ONCE(1, "backtracing misuse");
 2011			return -EFAULT;
 2012		}
 2013		if (!reg->precise)
 2014			new_marks = true;
 2015		else
 2016			reg_mask = 0;
 2017		reg->precise = true;
 2018	}
 2019
 2020	while (spi >= 0) {
 2021		if (func->stack[spi].slot_type[0] != STACK_SPILL) {
 2022			stack_mask = 0;
 2023			break;
 2024		}
 2025		reg = &func->stack[spi].spilled_ptr;
 2026		if (reg->type != SCALAR_VALUE) {
 2027			stack_mask = 0;
 2028			break;
 2029		}
 2030		if (!reg->precise)
 2031			new_marks = true;
 2032		else
 2033			stack_mask = 0;
 2034		reg->precise = true;
 2035		break;
 2036	}
 2037
 2038	if (!new_marks)
 2039		return 0;
 2040	if (!reg_mask && !stack_mask)
 2041		return 0;
 2042	for (;;) {
 2043		DECLARE_BITMAP(mask, 64);
 2044		u32 history = st->jmp_history_cnt;
 2045
 2046		if (env->log.level & BPF_LOG_LEVEL)
 2047			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
 2048		for (i = last_idx;;) {
 2049			if (skip_first) {
 2050				err = 0;
 2051				skip_first = false;
 2052			} else {
 2053				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
 2054			}
 2055			if (err == -ENOTSUPP) {
 2056				mark_all_scalars_precise(env, st);
 2057				return 0;
 2058			} else if (err) {
 2059				return err;
 2060			}
 2061			if (!reg_mask && !stack_mask)
 2062				/* Found assignment(s) into tracked register in this state.
 2063				 * Since this state is already marked, just return.
 2064				 * Nothing to be tracked further in the parent state.
 2065				 */
 2066				return 0;
 2067			if (i == first_idx)
 2068				break;
 2069			i = get_prev_insn_idx(st, i, &history);
 2070			if (i >= env->prog->len) {
 2071				/* This can happen if backtracking reached insn 0
 2072				 * and there are still reg_mask or stack_mask
 2073				 * to backtrack.
 2074				 * It means the backtracking missed the spot where
 2075				 * particular register was initialized with a constant.
 2076				 */
 2077				verbose(env, "BUG backtracking idx %d\n", i);
 2078				WARN_ONCE(1, "verifier backtracking bug");
 2079				return -EFAULT;
 2080			}
 2081		}
 2082		st = st->parent;
 2083		if (!st)
 2084			break;
 2085
 2086		new_marks = false;
 2087		func = st->frame[st->curframe];
 2088		bitmap_from_u64(mask, reg_mask);
 2089		for_each_set_bit(i, mask, 32) {
 2090			reg = &func->regs[i];
 2091			if (reg->type != SCALAR_VALUE) {
 2092				reg_mask &= ~(1u << i);
 2093				continue;
 2094			}
 2095			if (!reg->precise)
 2096				new_marks = true;
 2097			reg->precise = true;
 2098		}
 2099
 2100		bitmap_from_u64(mask, stack_mask);
 2101		for_each_set_bit(i, mask, 64) {
 2102			if (i >= func->allocated_stack / BPF_REG_SIZE) {
 2103				/* the sequence of instructions:
 2104				 * 2: (bf) r3 = r10
 2105				 * 3: (7b) *(u64 *)(r3 -8) = r0
 2106				 * 4: (79) r4 = *(u64 *)(r10 -8)
 2107				 * doesn't contain jmps. It's backtracked
 2108				 * as a single block.
 2109				 * During backtracking insn 3 is not recognized as
 2110				 * stack access, so at the end of backtracking
 2111				 * stack slot fp-8 is still marked in stack_mask.
 2112				 * However the parent state may not have accessed
 2113				 * fp-8 and it's "unallocated" stack space.
 2114				 * In such case fallback to conservative.
 2115				 */
 2116				mark_all_scalars_precise(env, st);
 2117				return 0;
 2118			}
 2119
 2120			if (func->stack[i].slot_type[0] != STACK_SPILL) {
 2121				stack_mask &= ~(1ull << i);
 2122				continue;
 2123			}
 2124			reg = &func->stack[i].spilled_ptr;
 2125			if (reg->type != SCALAR_VALUE) {
 2126				stack_mask &= ~(1ull << i);
 2127				continue;
 2128			}
 2129			if (!reg->precise)
 2130				new_marks = true;
 2131			reg->precise = true;
 2132		}
 2133		if (env->log.level & BPF_LOG_LEVEL) {
 2134			print_verifier_state(env, func);
 2135			verbose(env, "parent %s regs=%x stack=%llx marks\n",
 2136				new_marks ? "didn't have" : "already had",
 2137				reg_mask, stack_mask);
 2138		}
 2139
 2140		if (!reg_mask && !stack_mask)
 2141			break;
 2142		if (!new_marks)
 2143			break;
 2144
 2145		last_idx = st->last_insn_idx;
 2146		first_idx = st->first_insn_idx;
 2147	}
 2148	return 0;
 2149}
 2150
 2151static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
 2152{
 2153	return __mark_chain_precision(env, regno, -1);
 2154}
 2155
 2156static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
 2157{
 2158	return __mark_chain_precision(env, -1, spi);
 2159}
 2160
 2161static bool is_spillable_regtype(enum bpf_reg_type type)
 2162{
 2163	switch (type) {
 2164	case PTR_TO_MAP_VALUE:
 2165	case PTR_TO_MAP_VALUE_OR_NULL:
 2166	case PTR_TO_STACK:
 2167	case PTR_TO_CTX:
 2168	case PTR_TO_PACKET:
 2169	case PTR_TO_PACKET_META:
 2170	case PTR_TO_PACKET_END:
 2171	case PTR_TO_FLOW_KEYS:
 2172	case CONST_PTR_TO_MAP:
 2173	case PTR_TO_SOCKET:
 2174	case PTR_TO_SOCKET_OR_NULL:
 2175	case PTR_TO_SOCK_COMMON:
 2176	case PTR_TO_SOCK_COMMON_OR_NULL:
 2177	case PTR_TO_TCP_SOCK:
 2178	case PTR_TO_TCP_SOCK_OR_NULL:
 2179	case PTR_TO_XDP_SOCK:
 2180	case PTR_TO_BTF_ID:
 2181	case PTR_TO_BTF_ID_OR_NULL:
 2182	case PTR_TO_RDONLY_BUF:
 2183	case PTR_TO_RDONLY_BUF_OR_NULL:
 2184	case PTR_TO_RDWR_BUF:
 2185	case PTR_TO_RDWR_BUF_OR_NULL:
 2186		return true;
 2187	default:
 2188		return false;
 2189	}
 2190}
 2191
 2192/* Does this register contain a constant zero? */
 2193static bool register_is_null(struct bpf_reg_state *reg)
 2194{
 2195	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
 2196}
 2197
 2198static bool register_is_const(struct bpf_reg_state *reg)
 2199{
 2200	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
 2201}
 2202
 2203static bool __is_pointer_value(bool allow_ptr_leaks,
 2204			       const struct bpf_reg_state *reg)
 2205{
 2206	if (allow_ptr_leaks)
 2207		return false;
 2208
 2209	return reg->type != SCALAR_VALUE;
 2210}
 2211
 2212static void save_register_state(struct bpf_func_state *state,
 2213				int spi, struct bpf_reg_state *reg)
 2214{
 2215	int i;
 2216
 2217	state->stack[spi].spilled_ptr = *reg;
 2218	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 2219
 2220	for (i = 0; i < BPF_REG_SIZE; i++)
 2221		state->stack[spi].slot_type[i] = STACK_SPILL;
 2222}
 2223
 2224/* check_stack_read/write functions track spill/fill of registers,
 2225 * stack boundary and alignment are checked in check_mem_access()
 2226 */
 2227static int check_stack_write(struct bpf_verifier_env *env,
 2228			     struct bpf_func_state *state, /* func where register points to */
 2229			     int off, int size, int value_regno, int insn_idx)
 2230{
 2231	struct bpf_func_state *cur; /* state of the current function */
 2232	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
 2233	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
 2234	struct bpf_reg_state *reg = NULL;
 2235
 2236	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
 2237				 state->acquired_refs, true);
 2238	if (err)
 2239		return err;
 2240	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
 2241	 * so it's aligned access and [off, off + size) are within stack limits
 2242	 */
 2243	if (!env->allow_ptr_leaks &&
 2244	    state->stack[spi].slot_type[0] == STACK_SPILL &&
 2245	    size != BPF_REG_SIZE) {
 2246		verbose(env, "attempt to corrupt spilled pointer on stack\n");
 2247		return -EACCES;
 2248	}
 2249
 2250	cur = env->cur_state->frame[env->cur_state->curframe];
 2251	if (value_regno >= 0)
 2252		reg = &cur->regs[value_regno];
 2253
 2254	if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
 2255	    !register_is_null(reg) && env->bpf_capable) {
 2256		if (dst_reg != BPF_REG_FP) {
 2257			/* The backtracking logic can only recognize explicit
 2258			 * stack slot address like [fp - 8]. Other spill of
 2259			 * scalar via different register has to be conervative.
 2260			 * Backtrack from here and mark all registers as precise
 2261			 * that contributed into 'reg' being a constant.
 2262			 */
 2263			err = mark_chain_precision(env, value_regno);
 2264			if (err)
 2265				return err;
 2266		}
 2267		save_register_state(state, spi, reg);
 2268	} else if (reg && is_spillable_regtype(reg->type)) {
 2269		/* register containing pointer is being spilled into stack */
 2270		if (size != BPF_REG_SIZE) {
 2271			verbose_linfo(env, insn_idx, "; ");
 2272			verbose(env, "invalid size of register spill\n");
 2273			return -EACCES;
 2274		}
 2275
 2276		if (state != cur && reg->type == PTR_TO_STACK) {
 2277			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
 2278			return -EINVAL;
 2279		}
 2280
 2281		if (!env->bypass_spec_v4) {
 2282			bool sanitize = false;
 2283
 2284			if (state->stack[spi].slot_type[0] == STACK_SPILL &&
 2285			    register_is_const(&state->stack[spi].spilled_ptr))
 2286				sanitize = true;
 2287			for (i = 0; i < BPF_REG_SIZE; i++)
 2288				if (state->stack[spi].slot_type[i] == STACK_MISC) {
 2289					sanitize = true;
 2290					break;
 2291				}
 2292			if (sanitize) {
 2293				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
 2294				int soff = (-spi - 1) * BPF_REG_SIZE;
 2295
 2296				/* detected reuse of integer stack slot with a pointer
 2297				 * which means either llvm is reusing stack slot or
 2298				 * an attacker is trying to exploit CVE-2018-3639
 2299				 * (speculative store bypass)
 2300				 * Have to sanitize that slot with preemptive
 2301				 * store of zero.
 2302				 */
 2303				if (*poff && *poff != soff) {
 2304					/* disallow programs where single insn stores
 2305					 * into two different stack slots, since verifier
 2306					 * cannot sanitize them
 2307					 */
 2308					verbose(env,
 2309						"insn %d cannot access two stack slots fp%d and fp%d",
 2310						insn_idx, *poff, soff);
 2311					return -EINVAL;
 2312				}
 2313				*poff = soff;
 2314			}
 2315		}
 2316		save_register_state(state, spi, reg);
 2317	} else {
 2318		u8 type = STACK_MISC;
 2319
 2320		/* regular write of data into stack destroys any spilled ptr */
 2321		state->stack[spi].spilled_ptr.type = NOT_INIT;
 2322		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
 2323		if (state->stack[spi].slot_type[0] == STACK_SPILL)
 2324			for (i = 0; i < BPF_REG_SIZE; i++)
 2325				state->stack[spi].slot_type[i] = STACK_MISC;
 2326
 2327		/* only mark the slot as written if all 8 bytes were written
 2328		 * otherwise read propagation may incorrectly stop too soon
 2329		 * when stack slots are partially written.
 2330		 * This heuristic means that read propagation will be
 2331		 * conservative, since it will add reg_live_read marks
 2332		 * to stack slots all the way to first state when programs
 2333		 * writes+reads less than 8 bytes
 2334		 */
 2335		if (size == BPF_REG_SIZE)
 2336			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 2337
 2338		/* when we zero initialize stack slots mark them as such */
 2339		if (reg && register_is_null(reg)) {
 2340			/* backtracking doesn't work for STACK_ZERO yet. */
 2341			err = mark_chain_precision(env, value_regno);
 2342			if (err)
 2343				return err;
 2344			type = STACK_ZERO;
 2345		}
 2346
 2347		/* Mark slots affected by this stack write. */
 2348		for (i = 0; i < size; i++)
 2349			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
 2350				type;
 2351	}
 2352	return 0;
 2353}
 2354
 2355static int check_stack_read(struct bpf_verifier_env *env,
 2356			    struct bpf_func_state *reg_state /* func where register points to */,
 2357			    int off, int size, int value_regno)
 2358{
 2359	struct bpf_verifier_state *vstate = env->cur_state;
 2360	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 2361	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
 2362	struct bpf_reg_state *reg;
 2363	u8 *stype;
 2364
 2365	if (reg_state->allocated_stack <= slot) {
 2366		verbose(env, "invalid read from stack off %d+0 size %d\n",
 2367			off, size);
 2368		return -EACCES;
 2369	}
 2370	stype = reg_state->stack[spi].slot_type;
 2371	reg = &reg_state->stack[spi].spilled_ptr;
 2372
 2373	if (stype[0] == STACK_SPILL) {
 2374		if (size != BPF_REG_SIZE) {
 2375			if (reg->type != SCALAR_VALUE) {
 2376				verbose_linfo(env, env->insn_idx, "; ");
 2377				verbose(env, "invalid size of register fill\n");
 2378				return -EACCES;
 2379			}
 2380			if (value_regno >= 0) {
 2381				mark_reg_unknown(env, state->regs, value_regno);
 2382				state->regs[value_regno].live |= REG_LIVE_WRITTEN;
 2383			}
 2384			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 2385			return 0;
 2386		}
 2387		for (i = 1; i < BPF_REG_SIZE; i++) {
 2388			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
 2389				verbose(env, "corrupted spill memory\n");
 2390				return -EACCES;
 2391			}
 2392		}
 2393
 2394		if (value_regno >= 0) {
 2395			/* restore register state from stack */
 2396			state->regs[value_regno] = *reg;
 2397			/* mark reg as written since spilled pointer state likely
 2398			 * has its liveness marks cleared by is_state_visited()
 2399			 * which resets stack/reg liveness for state transitions
 2400			 */
 2401			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
 2402		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
 2403			/* If value_regno==-1, the caller is asking us whether
 2404			 * it is acceptable to use this value as a SCALAR_VALUE
 2405			 * (e.g. for XADD).
 2406			 * We must not allow unprivileged callers to do that
 2407			 * with spilled pointers.
 2408			 */
 2409			verbose(env, "leaking pointer from stack off %d\n",
 2410				off);
 2411			return -EACCES;
 2412		}
 2413		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 2414	} else {
 2415		int zeros = 0;
 2416
 2417		for (i = 0; i < size; i++) {
 2418			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
 2419				continue;
 2420			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
 2421				zeros++;
 2422				continue;
 2423			}
 2424			verbose(env, "invalid read from stack off %d+%d size %d\n",
 2425				off, i, size);
 2426			return -EACCES;
 2427		}
 2428		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 2429		if (value_regno >= 0) {
 2430			if (zeros == size) {
 2431				/* any size read into register is zero extended,
 2432				 * so the whole register == const_zero
 2433				 */
 2434				__mark_reg_const_zero(&state->regs[value_regno]);
 2435				/* backtracking doesn't support STACK_ZERO yet,
 2436				 * so mark it precise here, so that later
 2437				 * backtracking can stop here.
 2438				 * Backtracking may not need this if this register
 2439				 * doesn't participate in pointer adjustment.
 2440				 * Forward propagation of precise flag is not
 2441				 * necessary either. This mark is only to stop
 2442				 * backtracking. Any register that contributed
 2443				 * to const 0 was marked precise before spill.
 2444				 */
 2445				state->regs[value_regno].precise = true;
 2446			} else {
 2447				/* have read misc data from the stack */
 2448				mark_reg_unknown(env, state->regs, value_regno);
 2449			}
 2450			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
 2451		}
 2452	}
 2453	return 0;
 2454}
 2455
 2456static int check_stack_access(struct bpf_verifier_env *env,
 2457			      const struct bpf_reg_state *reg,
 2458			      int off, int size)
 2459{
 2460	/* Stack accesses must be at a fixed offset, so that we
 2461	 * can determine what type of data were returned. See
 2462	 * check_stack_read().
 2463	 */
 2464	if (!tnum_is_const(reg->var_off)) {
 2465		char tn_buf[48];
 2466
 2467		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 2468		verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
 2469			tn_buf, off, size);
 2470		return -EACCES;
 2471	}
 2472
 2473	if (off >= 0 || off < -MAX_BPF_STACK) {
 2474		verbose(env, "invalid stack off=%d size=%d\n", off, size);
 2475		return -EACCES;
 2476	}
 2477
 2478	return 0;
 2479}
 2480
 2481static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
 2482				 int off, int size, enum bpf_access_type type)
 2483{
 2484	struct bpf_reg_state *regs = cur_regs(env);
 2485	struct bpf_map *map = regs[regno].map_ptr;
 2486	u32 cap = bpf_map_flags_to_cap(map);
 2487
 2488	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
 2489		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
 2490			map->value_size, off, size);
 2491		return -EACCES;
 2492	}
 2493
 2494	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
 2495		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
 2496			map->value_size, off, size);
 2497		return -EACCES;
 2498	}
 2499
 2500	return 0;
 2501}
 2502
 2503/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
 2504static int __check_mem_access(struct bpf_verifier_env *env, int regno,
 2505			      int off, int size, u32 mem_size,
 2506			      bool zero_size_allowed)
 2507{
 2508	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
 2509	struct bpf_reg_state *reg;
 2510
 2511	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
 2512		return 0;
 2513
 2514	reg = &cur_regs(env)[regno];
 2515	switch (reg->type) {
 2516	case PTR_TO_MAP_VALUE:
 2517		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
 2518			mem_size, off, size);
 2519		break;
 2520	case PTR_TO_PACKET:
 2521	case PTR_TO_PACKET_META:
 2522	case PTR_TO_PACKET_END:
 2523		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
 2524			off, size, regno, reg->id, off, mem_size);
 2525		break;
 2526	case PTR_TO_MEM:
 2527	default:
 2528		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
 2529			mem_size, off, size);
 2530	}
 2531
 2532	return -EACCES;
 2533}
 2534
 2535/* check read/write into a memory region with possible variable offset */
 2536static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
 2537				   int off, int size, u32 mem_size,
 2538				   bool zero_size_allowed)
 2539{
 2540	struct bpf_verifier_state *vstate = env->cur_state;
 2541	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 2542	struct bpf_reg_state *reg = &state->regs[regno];
 2543	int err;
 2544
 2545	/* We may have adjusted the register pointing to memory region, so we
 2546	 * need to try adding each of min_value and max_value to off
 2547	 * to make sure our theoretical access will be safe.
 2548	 */
 2549	if (env->log.level & BPF_LOG_LEVEL)
 2550		print_verifier_state(env, state);
 2551
 2552	/* The minimum value is only important with signed
 2553	 * comparisons where we can't assume the floor of a
 2554	 * value is 0.  If we are using signed variables for our
 2555	 * index'es we need to make sure that whatever we use
 2556	 * will have a set floor within our range.
 2557	 */
 2558	if (reg->smin_value < 0 &&
 2559	    (reg->smin_value == S64_MIN ||
 2560	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
 2561	      reg->smin_value + off < 0)) {
 2562		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 2563			regno);
 2564		return -EACCES;
 2565	}
 2566	err = __check_mem_access(env, regno, reg->smin_value + off, size,
 2567				 mem_size, zero_size_allowed);
 2568	if (err) {
 2569		verbose(env, "R%d min value is outside of the allowed memory range\n",
 2570			regno);
 2571		return err;
 2572	}
 2573
 2574	/* If we haven't set a max value then we need to bail since we can't be
 2575	 * sure we won't do bad things.
 2576	 * If reg->umax_value + off could overflow, treat that as unbounded too.
 2577	 */
 2578	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
 2579		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
 2580			regno);
 2581		return -EACCES;
 2582	}
 2583	err = __check_mem_access(env, regno, reg->umax_value + off, size,
 2584				 mem_size, zero_size_allowed);
 2585	if (err) {
 2586		verbose(env, "R%d max value is outside of the allowed memory range\n",
 2587			regno);
 2588		return err;
 2589	}
 2590
 2591	return 0;
 2592}
 2593
 2594/* check read/write into a map element with possible variable offset */
 2595static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 2596			    int off, int size, bool zero_size_allowed)
 2597{
 2598	struct bpf_verifier_state *vstate = env->cur_state;
 2599	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 2600	struct bpf_reg_state *reg = &state->regs[regno];
 2601	struct bpf_map *map = reg->map_ptr;
 2602	int err;
 2603
 2604	err = check_mem_region_access(env, regno, off, size, map->value_size,
 2605				      zero_size_allowed);
 2606	if (err)
 2607		return err;
 2608
 2609	if (map_value_has_spin_lock(map)) {
 2610		u32 lock = map->spin_lock_off;
 2611
 2612		/* if any part of struct bpf_spin_lock can be touched by
 2613		 * load/store reject this program.
 2614		 * To check that [x1, x2) overlaps with [y1, y2)
 2615		 * it is sufficient to check x1 < y2 && y1 < x2.
 2616		 */
 2617		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
 2618		     lock < reg->umax_value + off + size) {
 2619			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
 2620			return -EACCES;
 2621		}
 2622	}
 2623	return err;
 2624}
 2625
 2626#define MAX_PACKET_OFF 0xffff
 2627
 2628static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 2629				       const struct bpf_call_arg_meta *meta,
 2630				       enum bpf_access_type t)
 2631{
 2632	switch (env->prog->type) {
 2633	/* Program types only with direct read access go here! */
 2634	case BPF_PROG_TYPE_LWT_IN:
 2635	case BPF_PROG_TYPE_LWT_OUT:
 2636	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
 2637	case BPF_PROG_TYPE_SK_REUSEPORT:
 2638	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 2639	case BPF_PROG_TYPE_CGROUP_SKB:
 2640		if (t == BPF_WRITE)
 2641			return false;
 2642		/* fallthrough */
 2643
 2644	/* Program types with direct read + write access go here! */
 2645	case BPF_PROG_TYPE_SCHED_CLS:
 2646	case BPF_PROG_TYPE_SCHED_ACT:
 2647	case BPF_PROG_TYPE_XDP:
 2648	case BPF_PROG_TYPE_LWT_XMIT:
 2649	case BPF_PROG_TYPE_SK_SKB:
 2650	case BPF_PROG_TYPE_SK_MSG:
 2651		if (meta)
 2652			return meta->pkt_access;
 2653
 2654		env->seen_direct_write = true;
 2655		return true;
 2656
 2657	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 2658		if (t == BPF_WRITE)
 2659			env->seen_direct_write = true;
 2660
 2661		return true;
 2662
 2663	default:
 2664		return false;
 2665	}
 2666}
 2667
 2668static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 2669			       int size, bool zero_size_allowed)
 2670{
 2671	struct bpf_reg_state *regs = cur_regs(env);
 2672	struct bpf_reg_state *reg = &regs[regno];
 2673	int err;
 2674
 2675	/* We may have added a variable offset to the packet pointer; but any
 2676	 * reg->range we have comes after that.  We are only checking the fixed
 2677	 * offset.
 2678	 */
 2679
 2680	/* We don't allow negative numbers, because we aren't tracking enough
 2681	 * detail to prove they're safe.
 2682	 */
 2683	if (reg->smin_value < 0) {
 2684		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 2685			regno);
 2686		return -EACCES;
 2687	}
 2688	err = __check_mem_access(env, regno, off, size, reg->range,
 2689				 zero_size_allowed);
 2690	if (err) {
 2691		verbose(env, "R%d offset is outside of the packet\n", regno);
 2692		return err;
 2693	}
 2694
 2695	/* __check_mem_access has made sure "off + size - 1" is within u16.
 2696	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
 2697	 * otherwise find_good_pkt_pointers would have refused to set range info
 2698	 * that __check_mem_access would have rejected this pkt access.
 2699	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
 2700	 */
 2701	env->prog->aux->max_pkt_offset =
 2702		max_t(u32, env->prog->aux->max_pkt_offset,
 2703		      off + reg->umax_value + size - 1);
 2704
 2705	return err;
 2706}
 2707
 2708/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 2709static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 2710			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
 2711			    u32 *btf_id)
 2712{
 2713	struct bpf_insn_access_aux info = {
 2714		.reg_type = *reg_type,
 2715		.log = &env->log,
 2716	};
 2717
 2718	if (env->ops->is_valid_access &&
 2719	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
 2720		/* A non zero info.ctx_field_size indicates that this field is a
 2721		 * candidate for later verifier transformation to load the whole
 2722		 * field and then apply a mask when accessed with a narrower
 2723		 * access than actual ctx access size. A zero info.ctx_field_size
 2724		 * will only allow for whole field access and rejects any other
 2725		 * type of narrower access.
 2726		 */
 2727		*reg_type = info.reg_type;
 2728
 2729		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
 2730			*btf_id = info.btf_id;
 2731		else
 2732			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 2733		/* remember the offset of last byte accessed in ctx */
 2734		if (env->prog->aux->max_ctx_offset < off + size)
 2735			env->prog->aux->max_ctx_offset = off + size;
 2736		return 0;
 2737	}
 2738
 2739	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
 2740	return -EACCES;
 2741}
 2742
 2743static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
 2744				  int size)
 2745{
 2746	if (size < 0 || off < 0 ||
 2747	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
 2748		verbose(env, "invalid access to flow keys off=%d size=%d\n",
 2749			off, size);
 2750		return -EACCES;
 2751	}
 2752	return 0;
 2753}
 2754
 2755static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
 2756			     u32 regno, int off, int size,
 2757			     enum bpf_access_type t)
 2758{
 2759	struct bpf_reg_state *regs = cur_regs(env);
 2760	struct bpf_reg_state *reg = &regs[regno];
 2761	struct bpf_insn_access_aux info = {};
 2762	bool valid;
 2763
 2764	if (reg->smin_value < 0) {
 2765		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
 2766			regno);
 2767		return -EACCES;
 2768	}
 2769
 2770	switch (reg->type) {
 2771	case PTR_TO_SOCK_COMMON:
 2772		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
 2773		break;
 2774	case PTR_TO_SOCKET:
 2775		valid = bpf_sock_is_valid_access(off, size, t, &info);
 2776		break;
 2777	case PTR_TO_TCP_SOCK:
 2778		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
 2779		break;
 2780	case PTR_TO_XDP_SOCK:
 2781		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
 2782		break;
 2783	default:
 2784		valid = false;
 2785	}
 2786
 2787
 2788	if (valid) {
 2789		env->insn_aux_data[insn_idx].ctx_field_size =
 2790			info.ctx_field_size;
 2791		return 0;
 2792	}
 2793
 2794	verbose(env, "R%d invalid %s access off=%d size=%d\n",
 2795		regno, reg_type_str[reg->type], off, size);
 2796
 2797	return -EACCES;
 2798}
 2799
 2800static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
 2801{
 2802	return cur_regs(env) + regno;
 2803}
 2804
 2805static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
 2806{
 2807	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
 2808}
 2809
 2810static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
 2811{
 2812	const struct bpf_reg_state *reg = reg_state(env, regno);
 2813
 2814	return reg->type == PTR_TO_CTX;
 2815}
 2816
 2817static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
 2818{
 2819	const struct bpf_reg_state *reg = reg_state(env, regno);
 2820
 2821	return type_is_sk_pointer(reg->type);
 2822}
 2823
 2824static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
 2825{
 2826	const struct bpf_reg_state *reg = reg_state(env, regno);
 2827
 2828	return type_is_pkt_pointer(reg->type);
 2829}
 2830
 2831static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
 2832{
 2833	const struct bpf_reg_state *reg = reg_state(env, regno);
 2834
 2835	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
 2836	return reg->type == PTR_TO_FLOW_KEYS;
 2837}
 2838
 2839static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
 2840				   const struct bpf_reg_state *reg,
 2841				   int off, int size, bool strict)
 2842{
 2843	struct tnum reg_off;
 2844	int ip_align;
 2845
 2846	/* Byte size accesses are always allowed. */
 2847	if (!strict || size == 1)
 2848		return 0;
 2849
 2850	/* For platforms that do not have a Kconfig enabling
 2851	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
 2852	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
 2853	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
 2854	 * to this code only in strict mode where we want to emulate
 2855	 * the NET_IP_ALIGN==2 checking.  Therefore use an
 2856	 * unconditional IP align value of '2'.
 2857	 */
 2858	ip_align = 2;
 2859
 2860	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
 2861	if (!tnum_is_aligned(reg_off, size)) {
 2862		char tn_buf[48];
 2863
 2864		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 2865		verbose(env,
 2866			"misaligned packet access off %d+%s+%d+%d size %d\n",
 2867			ip_align, tn_buf, reg->off, off, size);
 2868		return -EACCES;
 2869	}
 2870
 2871	return 0;
 2872}
 2873
 2874static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
 2875				       const struct bpf_reg_state *reg,
 2876				       const char *pointer_desc,
 2877				       int off, int size, bool strict)
 2878{
 2879	struct tnum reg_off;
 2880
 2881	/* Byte size accesses are always allowed. */
 2882	if (!strict || size == 1)
 2883		return 0;
 2884
 2885	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
 2886	if (!tnum_is_aligned(reg_off, size)) {
 2887		char tn_buf[48];
 2888
 2889		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 2890		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
 2891			pointer_desc, tn_buf, reg->off, off, size);
 2892		return -EACCES;
 2893	}
 2894
 2895	return 0;
 2896}
 2897
 2898static int check_ptr_alignment(struct bpf_verifier_env *env,
 2899			       const struct bpf_reg_state *reg, int off,
 2900			       int size, bool strict_alignment_once)
 2901{
 2902	bool strict = env->strict_alignment || strict_alignment_once;
 2903	const char *pointer_desc = "";
 2904
 2905	switch (reg->type) {
 2906	case PTR_TO_PACKET:
 2907	case PTR_TO_PACKET_META:
 2908		/* Special case, because of NET_IP_ALIGN. Given metadata sits
 2909		 * right in front, treat it the very same way.
 2910		 */
 2911		return check_pkt_ptr_alignment(env, reg, off, size, strict);
 2912	case PTR_TO_FLOW_KEYS:
 2913		pointer_desc = "flow keys ";
 2914		break;
 2915	case PTR_TO_MAP_VALUE:
 2916		pointer_desc = "value ";
 2917		break;
 2918	case PTR_TO_CTX:
 2919		pointer_desc = "context ";
 2920		break;
 2921	case PTR_TO_STACK:
 2922		pointer_desc = "stack ";
 2923		/* The stack spill tracking logic in check_stack_write()
 2924		 * and check_stack_read() relies on stack accesses being
 2925		 * aligned.
 2926		 */
 2927		strict = true;
 2928		break;
 2929	case PTR_TO_SOCKET:
 2930		pointer_desc = "sock ";
 2931		break;
 2932	case PTR_TO_SOCK_COMMON:
 2933		pointer_desc = "sock_common ";
 2934		break;
 2935	case PTR_TO_TCP_SOCK:
 2936		pointer_desc = "tcp_sock ";
 2937		break;
 2938	case PTR_TO_XDP_SOCK:
 2939		pointer_desc = "xdp_sock ";
 2940		break;
 2941	default:
 2942		break;
 2943	}
 2944	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
 2945					   strict);
 2946}
 2947
 2948static int update_stack_depth(struct bpf_verifier_env *env,
 2949			      const struct bpf_func_state *func,
 2950			      int off)
 2951{
 2952	u16 stack = env->subprog_info[func->subprogno].stack_depth;
 2953
 2954	if (stack >= -off)
 2955		return 0;
 2956
 2957	/* update known max for given subprogram */
 2958	env->subprog_info[func->subprogno].stack_depth = -off;
 2959	return 0;
 2960}
 2961
 2962/* starting from main bpf function walk all instructions of the function
 2963 * and recursively walk all callees that given function can call.
 2964 * Ignore jump and exit insns.
 2965 * Since recursion is prevented by check_cfg() this algorithm
 2966 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
 2967 */
 2968static int check_max_stack_depth(struct bpf_verifier_env *env)
 2969{
 2970	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
 2971	struct bpf_subprog_info *subprog = env->subprog_info;
 2972	struct bpf_insn *insn = env->prog->insnsi;
 2973	int ret_insn[MAX_CALL_FRAMES];
 2974	int ret_prog[MAX_CALL_FRAMES];
 2975
 2976process_func:
 2977	/* round up to 32-bytes, since this is granularity
 2978	 * of interpreter stack size
 2979	 */
 2980	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
 2981	if (depth > MAX_BPF_STACK) {
 2982		verbose(env, "combined stack size of %d calls is %d. Too large\n",
 2983			frame + 1, depth);
 2984		return -EACCES;
 2985	}
 2986continue_func:
 2987	subprog_end = subprog[idx + 1].start;
 2988	for (; i < subprog_end; i++) {
 2989		if (insn[i].code != (BPF_JMP | BPF_CALL))
 2990			continue;
 2991		if (insn[i].src_reg != BPF_PSEUDO_CALL)
 2992			continue;
 2993		/* remember insn and function to return to */
 2994		ret_insn[frame] = i + 1;
 2995		ret_prog[frame] = idx;
 2996
 2997		/* find the callee */
 2998		i = i + insn[i].imm + 1;
 2999		idx = find_subprog(env, i);
 3000		if (idx < 0) {
 3001			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
 3002				  i);
 3003			return -EFAULT;
 3004		}
 3005		frame++;
 3006		if (frame >= MAX_CALL_FRAMES) {
 3007			verbose(env, "the call stack of %d frames is too deep !\n",
 3008				frame);
 3009			return -E2BIG;
 3010		}
 3011		goto process_func;
 3012	}
 3013	/* end of for() loop means the last insn of the 'subprog'
 3014	 * was reached. Doesn't matter whether it was JA or EXIT
 3015	 */
 3016	if (frame == 0)
 3017		return 0;
 3018	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
 3019	frame--;
 3020	i = ret_insn[frame];
 3021	idx = ret_prog[frame];
 3022	goto continue_func;
 3023}
 3024
 3025#ifndef CONFIG_BPF_JIT_ALWAYS_ON
 3026static int get_callee_stack_depth(struct bpf_verifier_env *env,
 3027				  const struct bpf_insn *insn, int idx)
 3028{
 3029	int start = idx + insn->imm + 1, subprog;
 3030
 3031	subprog = find_subprog(env, start);
 3032	if (subprog < 0) {
 3033		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
 3034			  start);
 3035		return -EFAULT;
 3036	}
 3037	return env->subprog_info[subprog].stack_depth;
 3038}
 3039#endif
 3040
 3041int check_ctx_reg(struct bpf_verifier_env *env,
 3042		  const struct bpf_reg_state *reg, int regno)
 3043{
 3044	/* Access to ctx or passing it to a helper is only allowed in
 3045	 * its original, unmodified form.
 3046	 */
 3047
 3048	if (reg->off) {
 3049		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
 3050			regno, reg->off);
 3051		return -EACCES;
 3052	}
 3053
 3054	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 3055		char tn_buf[48];
 3056
 3057		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3058		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
 3059		return -EACCES;
 3060	}
 3061
 3062	return 0;
 3063}
 3064
 3065static int __check_buffer_access(struct bpf_verifier_env *env,
 3066				 const char *buf_info,
 3067				 const struct bpf_reg_state *reg,
 3068				 int regno, int off, int size)
 3069{
 3070	if (off < 0) {
 3071		verbose(env,
 3072			"R%d invalid %s buffer access: off=%d, size=%d\n",
 3073			regno, buf_info, off, size);
 3074		return -EACCES;
 3075	}
 3076	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 3077		char tn_buf[48];
 3078
 3079		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3080		verbose(env,
 3081			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
 3082			regno, off, tn_buf);
 3083		return -EACCES;
 3084	}
 3085
 3086	return 0;
 3087}
 3088
 3089static int check_tp_buffer_access(struct bpf_verifier_env *env,
 3090				  const struct bpf_reg_state *reg,
 3091				  int regno, int off, int size)
 3092{
 3093	int err;
 3094
 3095	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
 3096	if (err)
 3097		return err;
 3098
 3099	if (off + size > env->prog->aux->max_tp_access)
 3100		env->prog->aux->max_tp_access = off + size;
 3101
 3102	return 0;
 3103}
 3104
 3105static int check_buffer_access(struct bpf_verifier_env *env,
 3106			       const struct bpf_reg_state *reg,
 3107			       int regno, int off, int size,
 3108			       bool zero_size_allowed,
 3109			       const char *buf_info,
 3110			       u32 *max_access)
 3111{
 3112	int err;
 3113
 3114	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
 3115	if (err)
 3116		return err;
 3117
 3118	if (off + size > *max_access)
 3119		*max_access = off + size;
 3120
 3121	return 0;
 3122}
 3123
 3124/* BPF architecture zero extends alu32 ops into 64-bit registesr */
 3125static void zext_32_to_64(struct bpf_reg_state *reg)
 3126{
 3127	reg->var_off = tnum_subreg(reg->var_off);
 3128	__reg_assign_32_into_64(reg);
 3129}
 3130
 3131/* truncate register to smaller size (in bytes)
 3132 * must be called with size < BPF_REG_SIZE
 3133 */
 3134static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 3135{
 3136	u64 mask;
 3137
 3138	/* clear high bits in bit representation */
 3139	reg->var_off = tnum_cast(reg->var_off, size);
 3140
 3141	/* fix arithmetic bounds */
 3142	mask = ((u64)1 << (size * 8)) - 1;
 3143	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
 3144		reg->umin_value &= mask;
 3145		reg->umax_value &= mask;
 3146	} else {
 3147		reg->umin_value = 0;
 3148		reg->umax_value = mask;
 3149	}
 3150	reg->smin_value = reg->umin_value;
 3151	reg->smax_value = reg->umax_value;
 3152
 3153	/* If size is smaller than 32bit register the 32bit register
 3154	 * values are also truncated so we push 64-bit bounds into
 3155	 * 32-bit bounds. Above were truncated < 32-bits already.
 3156	 */
 3157	if (size >= 4)
 3158		return;
 3159	__reg_combine_64_into_32(reg);
 3160}
 3161
 3162static bool bpf_map_is_rdonly(const struct bpf_map *map)
 3163{
 3164	return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
 3165}
 3166
 3167static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
 3168{
 3169	void *ptr;
 3170	u64 addr;
 3171	int err;
 3172
 3173	err = map->ops->map_direct_value_addr(map, &addr, off);
 3174	if (err)
 3175		return err;
 3176	ptr = (void *)(long)addr + off;
 3177
 3178	switch (size) {
 3179	case sizeof(u8):
 3180		*val = (u64)*(u8 *)ptr;
 3181		break;
 3182	case sizeof(u16):
 3183		*val = (u64)*(u16 *)ptr;
 3184		break;
 3185	case sizeof(u32):
 3186		*val = (u64)*(u32 *)ptr;
 3187		break;
 3188	case sizeof(u64):
 3189		*val = *(u64 *)ptr;
 3190		break;
 3191	default:
 3192		return -EINVAL;
 3193	}
 3194	return 0;
 3195}
 3196
 3197static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 3198				   struct bpf_reg_state *regs,
 3199				   int regno, int off, int size,
 3200				   enum bpf_access_type atype,
 3201				   int value_regno)
 3202{
 3203	struct bpf_reg_state *reg = regs + regno;
 3204	const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
 3205	const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
 3206	u32 btf_id;
 3207	int ret;
 3208
 3209	if (off < 0) {
 3210		verbose(env,
 3211			"R%d is ptr_%s invalid negative access: off=%d\n",
 3212			regno, tname, off);
 3213		return -EACCES;
 3214	}
 3215	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 3216		char tn_buf[48];
 3217
 3218		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3219		verbose(env,
 3220			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
 3221			regno, tname, off, tn_buf);
 3222		return -EACCES;
 3223	}
 3224
 3225	if (env->ops->btf_struct_access) {
 3226		ret = env->ops->btf_struct_access(&env->log, t, off, size,
 3227						  atype, &btf_id);
 3228	} else {
 3229		if (atype != BPF_READ) {
 3230			verbose(env, "only read is supported\n");
 3231			return -EACCES;
 3232		}
 3233
 3234		ret = btf_struct_access(&env->log, t, off, size, atype,
 3235					&btf_id);
 3236	}
 3237
 3238	if (ret < 0)
 3239		return ret;
 3240
 3241	if (atype == BPF_READ && value_regno >= 0)
 3242		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
 3243
 3244	return 0;
 3245}
 3246
 3247static int check_ptr_to_map_access(struct bpf_verifier_env *env,
 3248				   struct bpf_reg_state *regs,
 3249				   int regno, int off, int size,
 3250				   enum bpf_access_type atype,
 3251				   int value_regno)
 3252{
 3253	struct bpf_reg_state *reg = regs + regno;
 3254	struct bpf_map *map = reg->map_ptr;
 3255	const struct btf_type *t;
 3256	const char *tname;
 3257	u32 btf_id;
 3258	int ret;
 3259
 3260	if (!btf_vmlinux) {
 3261		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
 3262		return -ENOTSUPP;
 3263	}
 3264
 3265	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
 3266		verbose(env, "map_ptr access not supported for map type %d\n",
 3267			map->map_type);
 3268		return -ENOTSUPP;
 3269	}
 3270
 3271	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
 3272	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
 3273
 3274	if (!env->allow_ptr_to_map_access) {
 3275		verbose(env,
 3276			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
 3277			tname);
 3278		return -EPERM;
 3279	}
 3280
 3281	if (off < 0) {
 3282		verbose(env, "R%d is %s invalid negative access: off=%d\n",
 3283			regno, tname, off);
 3284		return -EACCES;
 3285	}
 3286
 3287	if (atype != BPF_READ) {
 3288		verbose(env, "only read from %s is supported\n", tname);
 3289		return -EACCES;
 3290	}
 3291
 3292	ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
 3293	if (ret < 0)
 3294		return ret;
 3295
 3296	if (value_regno >= 0)
 3297		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
 3298
 3299	return 0;
 3300}
 3301
 3302
 3303/* check whether memory at (regno + off) is accessible for t = (read | write)
 3304 * if t==write, value_regno is a register which value is stored into memory
 3305 * if t==read, value_regno is a register which will receive the value from memory
 3306 * if t==write && value_regno==-1, some unknown value is stored into memory
 3307 * if t==read && value_regno==-1, don't care what we read from memory
 3308 */
 3309static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
 3310			    int off, int bpf_size, enum bpf_access_type t,
 3311			    int value_regno, bool strict_alignment_once)
 3312{
 3313	struct bpf_reg_state *regs = cur_regs(env);
 3314	struct bpf_reg_state *reg = regs + regno;
 3315	struct bpf_func_state *state;
 3316	int size, err = 0;
 3317
 3318	size = bpf_size_to_bytes(bpf_size);
 3319	if (size < 0)
 3320		return size;
 3321
 3322	/* alignment checks will add in reg->off themselves */
 3323	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
 3324	if (err)
 3325		return err;
 3326
 3327	/* for access checks, reg->off is just part of off */
 3328	off += reg->off;
 3329
 3330	if (reg->type == PTR_TO_MAP_VALUE) {
 3331		if (t == BPF_WRITE && value_regno >= 0 &&
 3332		    is_pointer_value(env, value_regno)) {
 3333			verbose(env, "R%d leaks addr into map\n", value_regno);
 3334			return -EACCES;
 3335		}
 3336		err = check_map_access_type(env, regno, off, size, t);
 3337		if (err)
 3338			return err;
 3339		err = check_map_access(env, regno, off, size, false);
 3340		if (!err && t == BPF_READ && value_regno >= 0) {
 3341			struct bpf_map *map = reg->map_ptr;
 3342
 3343			/* if map is read-only, track its contents as scalars */
 3344			if (tnum_is_const(reg->var_off) &&
 3345			    bpf_map_is_rdonly(map) &&
 3346			    map->ops->map_direct_value_addr) {
 3347				int map_off = off + reg->var_off.value;
 3348				u64 val = 0;
 3349
 3350				err = bpf_map_direct_read(map, map_off, size,
 3351							  &val);
 3352				if (err)
 3353					return err;
 3354
 3355				regs[value_regno].type = SCALAR_VALUE;
 3356				__mark_reg_known(&regs[value_regno], val);
 3357			} else {
 3358				mark_reg_unknown(env, regs, value_regno);
 3359			}
 3360		}
 3361	} else if (reg->type == PTR_TO_MEM) {
 3362		if (t == BPF_WRITE && value_regno >= 0 &&
 3363		    is_pointer_value(env, value_regno)) {
 3364			verbose(env, "R%d leaks addr into mem\n", value_regno);
 3365			return -EACCES;
 3366		}
 3367		err = check_mem_region_access(env, regno, off, size,
 3368					      reg->mem_size, false);
 3369		if (!err && t == BPF_READ && value_regno >= 0)
 3370			mark_reg_unknown(env, regs, value_regno);
 3371	} else if (reg->type == PTR_TO_CTX) {
 3372		enum bpf_reg_type reg_type = SCALAR_VALUE;
 3373		u32 btf_id = 0;
 3374
 3375		if (t == BPF_WRITE && value_regno >= 0 &&
 3376		    is_pointer_value(env, value_regno)) {
 3377			verbose(env, "R%d leaks addr into ctx\n", value_regno);
 3378			return -EACCES;
 3379		}
 3380
 3381		err = check_ctx_reg(env, reg, regno);
 3382		if (err < 0)
 3383			return err;
 3384
 3385		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
 3386		if (err)
 3387			verbose_linfo(env, insn_idx, "; ");
 3388		if (!err && t == BPF_READ && value_regno >= 0) {
 3389			/* ctx access returns either a scalar, or a
 3390			 * PTR_TO_PACKET[_META,_END]. In the latter
 3391			 * case, we know the offset is zero.
 3392			 */
 3393			if (reg_type == SCALAR_VALUE) {
 3394				mark_reg_unknown(env, regs, value_regno);
 3395			} else {
 3396				mark_reg_known_zero(env, regs,
 3397						    value_regno);
 3398				if (reg_type_may_be_null(reg_type))
 3399					regs[value_regno].id = ++env->id_gen;
 3400				/* A load of ctx field could have different
 3401				 * actual load size with the one encoded in the
 3402				 * insn. When the dst is PTR, it is for sure not
 3403				 * a sub-register.
 3404				 */
 3405				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
 3406				if (reg_type == PTR_TO_BTF_ID ||
 3407				    reg_type == PTR_TO_BTF_ID_OR_NULL)
 3408					regs[value_regno].btf_id = btf_id;
 3409			}
 3410			regs[value_regno].type = reg_type;
 3411		}
 3412
 3413	} else if (reg->type == PTR_TO_STACK) {
 3414		off += reg->var_off.value;
 3415		err = check_stack_access(env, reg, off, size);
 3416		if (err)
 3417			return err;
 3418
 3419		state = func(env, reg);
 3420		err = update_stack_depth(env, state, off);
 3421		if (err)
 3422			return err;
 3423
 3424		if (t == BPF_WRITE)
 3425			err = check_stack_write(env, state, off, size,
 3426						value_regno, insn_idx);
 3427		else
 3428			err = check_stack_read(env, state, off, size,
 3429					       value_regno);
 3430	} else if (reg_is_pkt_pointer(reg)) {
 3431		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
 3432			verbose(env, "cannot write into packet\n");
 3433			return -EACCES;
 3434		}
 3435		if (t == BPF_WRITE && value_regno >= 0 &&
 3436		    is_pointer_value(env, value_regno)) {
 3437			verbose(env, "R%d leaks addr into packet\n",
 3438				value_regno);
 3439			return -EACCES;
 3440		}
 3441		err = check_packet_access(env, regno, off, size, false);
 3442		if (!err && t == BPF_READ && value_regno >= 0)
 3443			mark_reg_unknown(env, regs, value_regno);
 3444	} else if (reg->type == PTR_TO_FLOW_KEYS) {
 3445		if (t == BPF_WRITE && value_regno >= 0 &&
 3446		    is_pointer_value(env, value_regno)) {
 3447			verbose(env, "R%d leaks addr into flow keys\n",
 3448				value_regno);
 3449			return -EACCES;
 3450		}
 3451
 3452		err = check_flow_keys_access(env, off, size);
 3453		if (!err && t == BPF_READ && value_regno >= 0)
 3454			mark_reg_unknown(env, regs, value_regno);
 3455	} else if (type_is_sk_pointer(reg->type)) {
 3456		if (t == BPF_WRITE) {
 3457			verbose(env, "R%d cannot write into %s\n",
 3458				regno, reg_type_str[reg->type]);
 3459			return -EACCES;
 3460		}
 3461		err = check_sock_access(env, insn_idx, regno, off, size, t);
 3462		if (!err && value_regno >= 0)
 3463			mark_reg_unknown(env, regs, value_regno);
 3464	} else if (reg->type == PTR_TO_TP_BUFFER) {
 3465		err = check_tp_buffer_access(env, reg, regno, off, size);
 3466		if (!err && t == BPF_READ && value_regno >= 0)
 3467			mark_reg_unknown(env, regs, value_regno);
 3468	} else if (reg->type == PTR_TO_BTF_ID) {
 3469		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
 3470					      value_regno);
 3471	} else if (reg->type == CONST_PTR_TO_MAP) {
 3472		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
 3473					      value_regno);
 3474	} else if (reg->type == PTR_TO_RDONLY_BUF) {
 3475		if (t == BPF_WRITE) {
 3476			verbose(env, "R%d cannot write into %s\n",
 3477				regno, reg_type_str[reg->type]);
 3478			return -EACCES;
 3479		}
 3480		err = check_buffer_access(env, reg, regno, off, size, false,
 3481					  "rdonly",
 3482					  &env->prog->aux->max_rdonly_access);
 3483		if (!err && value_regno >= 0)
 3484			mark_reg_unknown(env, regs, value_regno);
 3485	} else if (reg->type == PTR_TO_RDWR_BUF) {
 3486		err = check_buffer_access(env, reg, regno, off, size, false,
 3487					  "rdwr",
 3488					  &env->prog->aux->max_rdwr_access);
 3489		if (!err && t == BPF_READ && value_regno >= 0)
 3490			mark_reg_unknown(env, regs, value_regno);
 3491	} else {
 3492		verbose(env, "R%d invalid mem access '%s'\n", regno,
 3493			reg_type_str[reg->type]);
 3494		return -EACCES;
 3495	}
 3496
 3497	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
 3498	    regs[value_regno].type == SCALAR_VALUE) {
 3499		/* b/h/w load zero-extends, mark upper bits as known 0 */
 3500		coerce_reg_to_size(&regs[value_regno], size);
 3501	}
 3502	return err;
 3503}
 3504
 3505static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
 3506{
 3507	int err;
 3508
 3509	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
 3510	    insn->imm != 0) {
 3511		verbose(env, "BPF_XADD uses reserved fields\n");
 3512		return -EINVAL;
 3513	}
 3514
 3515	/* check src1 operand */
 3516	err = check_reg_arg(env, insn->src_reg, SRC_OP);
 3517	if (err)
 3518		return err;
 3519
 3520	/* check src2 operand */
 3521	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 3522	if (err)
 3523		return err;
 3524
 3525	if (is_pointer_value(env, insn->src_reg)) {
 3526		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
 3527		return -EACCES;
 3528	}
 3529
 3530	if (is_ctx_reg(env, insn->dst_reg) ||
 3531	    is_pkt_reg(env, insn->dst_reg) ||
 3532	    is_flow_key_reg(env, insn->dst_reg) ||
 3533	    is_sk_reg(env, insn->dst_reg)) {
 3534		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
 3535			insn->dst_reg,
 3536			reg_type_str[reg_state(env, insn->dst_reg)->type]);
 3537		return -EACCES;
 3538	}
 3539
 3540	/* check whether atomic_add can read the memory */
 3541	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 3542			       BPF_SIZE(insn->code), BPF_READ, -1, true);
 3543	if (err)
 3544		return err;
 3545
 3546	/* check whether atomic_add can write into the same memory */
 3547	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
 3548				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
 3549}
 3550
 3551static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
 3552				  int off, int access_size,
 3553				  bool zero_size_allowed)
 3554{
 3555	struct bpf_reg_state *reg = reg_state(env, regno);
 3556
 3557	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
 3558	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
 3559		if (tnum_is_const(reg->var_off)) {
 3560			verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
 3561				regno, off, access_size);
 3562		} else {
 3563			char tn_buf[48];
 3564
 3565			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3566			verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
 3567				regno, tn_buf, access_size);
 3568		}
 3569		return -EACCES;
 3570	}
 3571	return 0;
 3572}
 3573
 3574/* when register 'regno' is passed into function that will read 'access_size'
 3575 * bytes from that pointer, make sure that it's within stack boundary
 3576 * and all elements of stack are initialized.
 3577 * Unlike most pointer bounds-checking functions, this one doesn't take an
 3578 * 'off' argument, so it has to add in reg->off itself.
 3579 */
 3580static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 3581				int access_size, bool zero_size_allowed,
 3582				struct bpf_call_arg_meta *meta)
 3583{
 3584	struct bpf_reg_state *reg = reg_state(env, regno);
 3585	struct bpf_func_state *state = func(env, reg);
 3586	int err, min_off, max_off, i, j, slot, spi;
 3587
 3588	if (reg->type != PTR_TO_STACK) {
 3589		/* Allow zero-byte read from NULL, regardless of pointer type */
 3590		if (zero_size_allowed && access_size == 0 &&
 3591		    register_is_null(reg))
 3592			return 0;
 3593
 3594		verbose(env, "R%d type=%s expected=%s\n", regno,
 3595			reg_type_str[reg->type],
 3596			reg_type_str[PTR_TO_STACK]);
 3597		return -EACCES;
 3598	}
 3599
 3600	if (tnum_is_const(reg->var_off)) {
 3601		min_off = max_off = reg->var_off.value + reg->off;
 3602		err = __check_stack_boundary(env, regno, min_off, access_size,
 3603					     zero_size_allowed);
 3604		if (err)
 3605			return err;
 3606	} else {
 3607		/* Variable offset is prohibited for unprivileged mode for
 3608		 * simplicity since it requires corresponding support in
 3609		 * Spectre masking for stack ALU.
 3610		 * See also retrieve_ptr_limit().
 3611		 */
 3612		if (!env->bypass_spec_v1) {
 3613			char tn_buf[48];
 3614
 3615			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3616			verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
 3617				regno, tn_buf);
 3618			return -EACCES;
 3619		}
 3620		/* Only initialized buffer on stack is allowed to be accessed
 3621		 * with variable offset. With uninitialized buffer it's hard to
 3622		 * guarantee that whole memory is marked as initialized on
 3623		 * helper return since specific bounds are unknown what may
 3624		 * cause uninitialized stack leaking.
 3625		 */
 3626		if (meta && meta->raw_mode)
 3627			meta = NULL;
 3628
 3629		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
 3630		    reg->smax_value <= -BPF_MAX_VAR_OFF) {
 3631			verbose(env, "R%d unbounded indirect variable offset stack access\n",
 3632				regno);
 3633			return -EACCES;
 3634		}
 3635		min_off = reg->smin_value + reg->off;
 3636		max_off = reg->smax_value + reg->off;
 3637		err = __check_stack_boundary(env, regno, min_off, access_size,
 3638					     zero_size_allowed);
 3639		if (err) {
 3640			verbose(env, "R%d min value is outside of stack bound\n",
 3641				regno);
 3642			return err;
 3643		}
 3644		err = __check_stack_boundary(env, regno, max_off, access_size,
 3645					     zero_size_allowed);
 3646		if (err) {
 3647			verbose(env, "R%d max value is outside of stack bound\n",
 3648				regno);
 3649			return err;
 3650		}
 3651	}
 3652
 3653	if (meta && meta->raw_mode) {
 3654		meta->access_size = access_size;
 3655		meta->regno = regno;
 3656		return 0;
 3657	}
 3658
 3659	for (i = min_off; i < max_off + access_size; i++) {
 3660		u8 *stype;
 3661
 3662		slot = -i - 1;
 3663		spi = slot / BPF_REG_SIZE;
 3664		if (state->allocated_stack <= slot)
 3665			goto err;
 3666		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
 3667		if (*stype == STACK_MISC)
 3668			goto mark;
 3669		if (*stype == STACK_ZERO) {
 3670			/* helper can write anything into the stack */
 3671			*stype = STACK_MISC;
 3672			goto mark;
 3673		}
 3674
 3675		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
 3676		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
 3677			goto mark;
 3678
 3679		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
 3680		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
 3681			__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
 3682			for (j = 0; j < BPF_REG_SIZE; j++)
 3683				state->stack[spi].slot_type[j] = STACK_MISC;
 3684			goto mark;
 3685		}
 3686
 3687err:
 3688		if (tnum_is_const(reg->var_off)) {
 3689			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
 3690				min_off, i - min_off, access_size);
 3691		} else {
 3692			char tn_buf[48];
 3693
 3694			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 3695			verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
 3696				tn_buf, i - min_off, access_size);
 3697		}
 3698		return -EACCES;
 3699mark:
 3700		/* reading any byte out of 8-byte 'spill_slot' will cause
 3701		 * the whole slot to be marked as 'read'
 3702		 */
 3703		mark_reg_read(env, &state->stack[spi].spilled_ptr,
 3704			      state->stack[spi].spilled_ptr.parent,
 3705			      REG_LIVE_READ64);
 3706	}
 3707	return update_stack_depth(env, state, min_off);
 3708}
 3709
 3710static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 3711				   int access_size, bool zero_size_allowed,
 3712				   struct bpf_call_arg_meta *meta)
 3713{
 3714	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 3715
 3716	switch (reg->type) {
 3717	case PTR_TO_PACKET:
 3718	case PTR_TO_PACKET_META:
 3719		return check_packet_access(env, regno, reg->off, access_size,
 3720					   zero_size_allowed);
 3721	case PTR_TO_MAP_VALUE:
 3722		if (check_map_access_type(env, regno, reg->off, access_size,
 3723					  meta && meta->raw_mode ? BPF_WRITE :
 3724					  BPF_READ))
 3725			return -EACCES;
 3726		return check_map_access(env, regno, reg->off, access_size,
 3727					zero_size_allowed);
 3728	case PTR_TO_MEM:
 3729		return check_mem_region_access(env, regno, reg->off,
 3730					       access_size, reg->mem_size,
 3731					       zero_size_allowed);
 3732	case PTR_TO_RDONLY_BUF:
 3733		if (meta && meta->raw_mode)
 3734			return -EACCES;
 3735		return check_buffer_access(env, reg, regno, reg->off,
 3736					   access_size, zero_size_allowed,
 3737					   "rdonly",
 3738					   &env->prog->aux->max_rdonly_access);
 3739	case PTR_TO_RDWR_BUF:
 3740		return check_buffer_access(env, reg, regno, reg->off,
 3741					   access_size, zero_size_allowed,
 3742					   "rdwr",
 3743					   &env->prog->aux->max_rdwr_access);
 3744	default: /* scalar_value|ptr_to_stack or invalid ptr */
 3745		return check_stack_boundary(env, regno, access_size,
 3746					    zero_size_allowed, meta);
 3747	}
 3748}
 3749
 3750/* Implementation details:
 3751 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
 3752 * Two bpf_map_lookups (even with the same key) will have different reg->id.
 3753 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
 3754 * value_or_null->value transition, since the verifier only cares about
 3755 * the range of access to valid map value pointer and doesn't care about actual
 3756 * address of the map element.
 3757 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
 3758 * reg->id > 0 after value_or_null->value transition. By doing so
 3759 * two bpf_map_lookups will be considered two different pointers that
 3760 * point to different bpf_spin_locks.
 3761 * The verifier allows taking only one bpf_spin_lock at a time to avoid
 3762 * dead-locks.
 3763 * Since only one bpf_spin_lock is allowed the checks are simpler than
 3764 * reg_is_refcounted() logic. The verifier needs to remember only
 3765 * one spin_lock instead of array of acquired_refs.
 3766 * cur_state->active_spin_lock remembers which map value element got locked
 3767 * and clears it after bpf_spin_unlock.
 3768 */
 3769static int process_spin_lock(struct bpf_verifier_env *env, int regno,
 3770			     bool is_lock)
 3771{
 3772	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 3773	struct bpf_verifier_state *cur = env->cur_state;
 3774	bool is_const = tnum_is_const(reg->var_off);
 3775	struct bpf_map *map = reg->map_ptr;
 3776	u64 val = reg->var_off.value;
 3777
 3778	if (reg->type != PTR_TO_MAP_VALUE) {
 3779		verbose(env, "R%d is not a pointer to map_value\n", regno);
 3780		return -EINVAL;
 3781	}
 3782	if (!is_const) {
 3783		verbose(env,
 3784			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
 3785			regno);
 3786		return -EINVAL;
 3787	}
 3788	if (!map->btf) {
 3789		verbose(env,
 3790			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
 3791			map->name);
 3792		return -EINVAL;
 3793	}
 3794	if (!map_value_has_spin_lock(map)) {
 3795		if (map->spin_lock_off == -E2BIG)
 3796			verbose(env,
 3797				"map '%s' has more than one 'struct bpf_spin_lock'\n",
 3798				map->name);
 3799		else if (map->spin_lock_off == -ENOENT)
 3800			verbose(env,
 3801				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
 3802				map->name);
 3803		else
 3804			verbose(env,
 3805				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
 3806				map->name);
 3807		return -EINVAL;
 3808	}
 3809	if (map->spin_lock_off != val + reg->off) {
 3810		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
 3811			val + reg->off);
 3812		return -EINVAL;
 3813	}
 3814	if (is_lock) {
 3815		if (cur->active_spin_lock) {
 3816			verbose(env,
 3817				"Locking two bpf_spin_locks are not allowed\n");
 3818			return -EINVAL;
 3819		}
 3820		cur->active_spin_lock = reg->id;
 3821	} else {
 3822		if (!cur->active_spin_lock) {
 3823			verbose(env, "bpf_spin_unlock without taking a lock\n");
 3824			return -EINVAL;
 3825		}
 3826		if (cur->active_spin_lock != reg->id) {
 3827			verbose(env, "bpf_spin_unlock of different lock\n");
 3828			return -EINVAL;
 3829		}
 3830		cur->active_spin_lock = 0;
 3831	}
 3832	return 0;
 3833}
 3834
 3835static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
 3836{
 3837	return type == ARG_PTR_TO_MEM ||
 3838	       type == ARG_PTR_TO_MEM_OR_NULL ||
 3839	       type == ARG_PTR_TO_UNINIT_MEM;
 3840}
 3841
 3842static bool arg_type_is_mem_size(enum bpf_arg_type type)
 3843{
 3844	return type == ARG_CONST_SIZE ||
 3845	       type == ARG_CONST_SIZE_OR_ZERO;
 3846}
 3847
 3848static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type)
 3849{
 3850	return type == ARG_PTR_TO_ALLOC_MEM ||
 3851	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
 3852}
 3853
 3854static bool arg_type_is_alloc_size(enum bpf_arg_type type)
 3855{
 3856	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
 3857}
 3858
 3859static bool arg_type_is_int_ptr(enum bpf_arg_type type)
 3860{
 3861	return type == ARG_PTR_TO_INT ||
 3862	       type == ARG_PTR_TO_LONG;
 3863}
 3864
 3865static int int_ptr_type_to_size(enum bpf_arg_type type)
 3866{
 3867	if (type == ARG_PTR_TO_INT)
 3868		return sizeof(u32);
 3869	else if (type == ARG_PTR_TO_LONG)
 3870		return sizeof(u64);
 3871
 3872	return -EINVAL;
 3873}
 3874
 3875static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 3876			  struct bpf_call_arg_meta *meta,
 3877			  const struct bpf_func_proto *fn)
 3878{
 3879	u32 regno = BPF_REG_1 + arg;
 3880	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
 3881	enum bpf_reg_type expected_type, type = reg->type;
 3882	enum bpf_arg_type arg_type = fn->arg_type[arg];
 3883	int err = 0;
 3884
 3885	if (arg_type == ARG_DONTCARE)
 3886		return 0;
 3887
 3888	err = check_reg_arg(env, regno, SRC_OP);
 3889	if (err)
 3890		return err;
 3891
 3892	if (arg_type == ARG_ANYTHING) {
 3893		if (is_pointer_value(env, regno)) {
 3894			verbose(env, "R%d leaks addr into helper function\n",
 3895				regno);
 3896			return -EACCES;
 3897		}
 3898		return 0;
 3899	}
 3900
 3901	if (type_is_pkt_pointer(type) &&
 3902	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
 3903		verbose(env, "helper access to the packet is not allowed\n");
 3904		return -EACCES;
 3905	}
 3906
 3907	if (arg_type == ARG_PTR_TO_MAP_KEY ||
 3908	    arg_type == ARG_PTR_TO_MAP_VALUE ||
 3909	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
 3910	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
 3911		expected_type = PTR_TO_STACK;
 3912		if (register_is_null(reg) &&
 3913		    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
 3914			/* final test in check_stack_boundary() */;
 3915		else if (!type_is_pkt_pointer(type) &&
 3916			 type != PTR_TO_MAP_VALUE &&
 3917			 type != expected_type)
 3918			goto err_type;
 3919	} else if (arg_type == ARG_CONST_SIZE ||
 3920		   arg_type == ARG_CONST_SIZE_OR_ZERO ||
 3921		   arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) {
 3922		expected_type = SCALAR_VALUE;
 3923		if (type != expected_type)
 3924			goto err_type;
 3925	} else if (arg_type == ARG_CONST_MAP_PTR) {
 3926		expected_type = CONST_PTR_TO_MAP;
 3927		if (type != expected_type)
 3928			goto err_type;
 3929	} else if (arg_type == ARG_PTR_TO_CTX ||
 3930		   arg_type == ARG_PTR_TO_CTX_OR_NULL) {
 3931		expected_type = PTR_TO_CTX;
 3932		if (!(register_is_null(reg) &&
 3933		      arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
 3934			if (type != expected_type)
 3935				goto err_type;
 3936			err = check_ctx_reg(env, reg, regno);
 3937			if (err < 0)
 3938				return err;
 3939		}
 3940	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
 3941		expected_type = PTR_TO_SOCK_COMMON;
 3942		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
 3943		if (!type_is_sk_pointer(type))
 3944			goto err_type;
 3945		if (reg->ref_obj_id) {
 3946			if (meta->ref_obj_id) {
 3947				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
 3948					regno, reg->ref_obj_id,
 3949					meta->ref_obj_id);
 3950				return -EFAULT;
 3951			}
 3952			meta->ref_obj_id = reg->ref_obj_id;
 3953		}
 3954	} else if (arg_type == ARG_PTR_TO_SOCKET ||
 3955		   arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
 3956		expected_type = PTR_TO_SOCKET;
 3957		if (!(register_is_null(reg) &&
 3958		      arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
 3959			if (type != expected_type)
 3960				goto err_type;
 3961		}
 3962	} else if (arg_type == ARG_PTR_TO_BTF_ID) {
 3963		expected_type = PTR_TO_BTF_ID;
 3964		if (type != expected_type)
 3965			goto err_type;
 3966		if (!fn->check_btf_id) {
 3967			if (reg->btf_id != meta->btf_id) {
 3968				verbose(env, "Helper has type %s got %s in R%d\n",
 3969					kernel_type_name(meta->btf_id),
 3970					kernel_type_name(reg->btf_id), regno);
 3971
 3972				return -EACCES;
 3973			}
 3974		} else if (!fn->check_btf_id(reg->btf_id, arg)) {
 3975			verbose(env, "Helper does not support %s in R%d\n",
 3976				kernel_type_name(reg->btf_id), regno);
 3977
 3978			return -EACCES;
 3979		}
 3980		if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
 3981			verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
 3982				regno);
 3983			return -EACCES;
 3984		}
 3985	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
 3986		if (meta->func_id == BPF_FUNC_spin_lock) {
 3987			if (process_spin_lock(env, regno, true))
 3988				return -EACCES;
 3989		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
 3990			if (process_spin_lock(env, regno, false))
 3991				return -EACCES;
 3992		} else {
 3993			verbose(env, "verifier internal error\n");
 3994			return -EFAULT;
 3995		}
 3996	} else if (arg_type_is_mem_ptr(arg_type)) {
 3997		expected_type = PTR_TO_STACK;
 3998		/* One exception here. In case function allows for NULL to be
 3999		 * passed in as argument, it's a SCALAR_VALUE type. Final test
 4000		 * happens during stack boundary checking.
 4001		 */
 4002		if (register_is_null(reg) &&
 4003		    (arg_type == ARG_PTR_TO_MEM_OR_NULL ||
 4004		     arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL))
 4005			/* final test in check_stack_boundary() */;
 4006		else if (!type_is_pkt_pointer(type) &&
 4007			 type != PTR_TO_MAP_VALUE &&
 4008			 type != PTR_TO_MEM &&
 4009			 type != PTR_TO_RDONLY_BUF &&
 4010			 type != PTR_TO_RDWR_BUF &&
 4011			 type != expected_type)
 4012			goto err_type;
 4013		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
 4014	} else if (arg_type_is_alloc_mem_ptr(arg_type)) {
 4015		expected_type = PTR_TO_MEM;
 4016		if (register_is_null(reg) &&
 4017		    arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)
 4018			/* final test in check_stack_boundary() */;
 4019		else if (type != expected_type)
 4020			goto err_type;
 4021		if (meta->ref_obj_id) {
 4022			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
 4023				regno, reg->ref_obj_id,
 4024				meta->ref_obj_id);
 4025			return -EFAULT;
 4026		}
 4027		meta->ref_obj_id = reg->ref_obj_id;
 4028	} else if (arg_type_is_int_ptr(arg_type)) {
 4029		expected_type = PTR_TO_STACK;
 4030		if (!type_is_pkt_pointer(type) &&
 4031		    type != PTR_TO_MAP_VALUE &&
 4032		    type != expected_type)
 4033			goto err_type;
 4034	} else {
 4035		verbose(env, "unsupported arg_type %d\n", arg_type);
 4036		return -EFAULT;
 4037	}
 4038
 4039	if (arg_type == ARG_CONST_MAP_PTR) {
 4040		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
 4041		meta->map_ptr = reg->map_ptr;
 4042	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
 4043		/* bpf_map_xxx(..., map_ptr, ..., key) call:
 4044		 * check that [key, key + map->key_size) are within
 4045		 * stack limits and initialized
 4046		 */
 4047		if (!meta->map_ptr) {
 4048			/* in function declaration map_ptr must come before
 4049			 * map_key, so that it's verified and known before
 4050			 * we have to check map_key here. Otherwise it means
 4051			 * that kernel subsystem misconfigured verifier
 4052			 */
 4053			verbose(env, "invalid map_ptr to access map->key\n");
 4054			return -EACCES;
 4055		}
 4056		err = check_helper_mem_access(env, regno,
 4057					      meta->map_ptr->key_size, false,
 4058					      NULL);
 4059	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
 4060		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
 4061		    !register_is_null(reg)) ||
 4062		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
 4063		/* bpf_map_xxx(..., map_ptr, ..., value) call:
 4064		 * check [value, value + map->value_size) validity
 4065		 */
 4066		if (!meta->map_ptr) {
 4067			/* kernel subsystem misconfigured verifier */
 4068			verbose(env, "invalid map_ptr to access map->value\n");
 4069			return -EACCES;
 4070		}
 4071		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
 4072		err = check_helper_mem_access(env, regno,
 4073					      meta->map_ptr->value_size, false,
 4074					      meta);
 4075	} else if (arg_type_is_mem_size(arg_type)) {
 4076		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 4077
 4078		/* This is used to refine r0 return value bounds for helpers
 4079		 * that enforce this value as an upper bound on return values.
 4080		 * See do_refine_retval_range() for helpers that can refine
 4081		 * the return value. C type of helper is u32 so we pull register
 4082		 * bound from umax_value however, if negative verifier errors
 4083		 * out. Only upper bounds can be learned because retval is an
 4084		 * int type and negative retvals are allowed.
 4085		 */
 4086		meta->msize_max_value = reg->umax_value;
 4087
 4088		/* The register is SCALAR_VALUE; the access check
 4089		 * happens using its boundaries.
 4090		 */
 4091		if (!tnum_is_const(reg->var_off))
 4092			/* For unprivileged variable accesses, disable raw
 4093			 * mode so that the program is required to
 4094			 * initialize all the memory that the helper could
 4095			 * just partially fill up.
 4096			 */
 4097			meta = NULL;
 4098
 4099		if (reg->smin_value < 0) {
 4100			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
 4101				regno);
 4102			return -EACCES;
 4103		}
 4104
 4105		if (reg->umin_value == 0) {
 4106			err = check_helper_mem_access(env, regno - 1, 0,
 4107						      zero_size_allowed,
 4108						      meta);
 4109			if (err)
 4110				return err;
 4111		}
 4112
 4113		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
 4114			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
 4115				regno);
 4116			return -EACCES;
 4117		}
 4118		err = check_helper_mem_access(env, regno - 1,
 4119					      reg->umax_value,
 4120					      zero_size_allowed, meta);
 4121		if (!err)
 4122			err = mark_chain_precision(env, regno);
 4123	} else if (arg_type_is_alloc_size(arg_type)) {
 4124		if (!tnum_is_const(reg->var_off)) {
 4125			verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
 4126				regno);
 4127			return -EACCES;
 4128		}
 4129		meta->mem_size = reg->var_off.value;
 4130	} else if (arg_type_is_int_ptr(arg_type)) {
 4131		int size = int_ptr_type_to_size(arg_type);
 4132
 4133		err = check_helper_mem_access(env, regno, size, false, meta);
 4134		if (err)
 4135			return err;
 4136		err = check_ptr_alignment(env, reg, 0, size, true);
 4137	}
 4138
 4139	return err;
 4140err_type:
 4141	verbose(env, "R%d type=%s expected=%s\n", regno,
 4142		reg_type_str[type], reg_type_str[expected_type]);
 4143	return -EACCES;
 4144}
 4145
 4146static int check_map_func_compatibility(struct bpf_verifier_env *env,
 4147					struct bpf_map *map, int func_id)
 4148{
 4149	if (!map)
 4150		return 0;
 4151
 4152	/* We need a two way check, first is from map perspective ... */
 4153	switch (map->map_type) {
 4154	case BPF_MAP_TYPE_PROG_ARRAY:
 4155		if (func_id != BPF_FUNC_tail_call)
 4156			goto error;
 4157		break;
 4158	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
 4159		if (func_id != BPF_FUNC_perf_event_read &&
 4160		    func_id != BPF_FUNC_perf_event_output &&
 4161		    func_id != BPF_FUNC_skb_output &&
 4162		    func_id != BPF_FUNC_perf_event_read_value &&
 4163		    func_id != BPF_FUNC_xdp_output)
 4164			goto error;
 4165		break;
 4166	case BPF_MAP_TYPE_RINGBUF:
 4167		if (func_id != BPF_FUNC_ringbuf_output &&
 4168		    func_id != BPF_FUNC_ringbuf_reserve &&
 4169		    func_id != BPF_FUNC_ringbuf_submit &&
 4170		    func_id != BPF_FUNC_ringbuf_discard &&
 4171		    func_id != BPF_FUNC_ringbuf_query)
 4172			goto error;
 4173		break;
 4174	case BPF_MAP_TYPE_STACK_TRACE:
 4175		if (func_id != BPF_FUNC_get_stackid)
 4176			goto error;
 4177		break;
 4178	case BPF_MAP_TYPE_CGROUP_ARRAY:
 4179		if (func_id != BPF_FUNC_skb_under_cgroup &&
 4180		    func_id != BPF_FUNC_current_task_under_cgroup)
 4181			goto error;
 4182		break;
 4183	case BPF_MAP_TYPE_CGROUP_STORAGE:
 4184	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
 4185		if (func_id != BPF_FUNC_get_local_storage)
 4186			goto error;
 4187		break;
 4188	case BPF_MAP_TYPE_DEVMAP:
 4189	case BPF_MAP_TYPE_DEVMAP_HASH:
 4190		if (func_id != BPF_FUNC_redirect_map &&
 4191		    func_id != BPF_FUNC_map_lookup_elem)
 4192			goto error;
 4193		break;
 4194	/* Restrict bpf side of cpumap and xskmap, open when use-cases
 4195	 * appear.
 4196	 */
 4197	case BPF_MAP_TYPE_CPUMAP:
 4198		if (func_id != BPF_FUNC_redirect_map)
 4199			goto error;
 4200		break;
 4201	case BPF_MAP_TYPE_XSKMAP:
 4202		if (func_id != BPF_FUNC_redirect_map &&
 4203		    func_id != BPF_FUNC_map_lookup_elem)
 4204			goto error;
 4205		break;
 4206	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 4207	case BPF_MAP_TYPE_HASH_OF_MAPS:
 4208		if (func_id != BPF_FUNC_map_lookup_elem)
 4209			goto error;
 4210		break;
 4211	case BPF_MAP_TYPE_SOCKMAP:
 4212		if (func_id != BPF_FUNC_sk_redirect_map &&
 4213		    func_id != BPF_FUNC_sock_map_update &&
 4214		    func_id != BPF_FUNC_map_delete_elem &&
 4215		    func_id != BPF_FUNC_msg_redirect_map &&
 4216		    func_id != BPF_FUNC_sk_select_reuseport &&
 4217		    func_id != BPF_FUNC_map_lookup_elem)
 4218			goto error;
 4219		break;
 4220	case BPF_MAP_TYPE_SOCKHASH:
 4221		if (func_id != BPF_FUNC_sk_redirect_hash &&
 4222		    func_id != BPF_FUNC_sock_hash_update &&
 4223		    func_id != BPF_FUNC_map_delete_elem &&
 4224		    func_id != BPF_FUNC_msg_redirect_hash &&
 4225		    func_id != BPF_FUNC_sk_select_reuseport &&
 4226		    func_id != BPF_FUNC_map_lookup_elem)
 4227			goto error;
 4228		break;
 4229	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
 4230		if (func_id != BPF_FUNC_sk_select_reuseport)
 4231			goto error;
 4232		break;
 4233	case BPF_MAP_TYPE_QUEUE:
 4234	case BPF_MAP_TYPE_STACK:
 4235		if (func_id != BPF_FUNC_map_peek_elem &&
 4236		    func_id != BPF_FUNC_map_pop_elem &&
 4237		    func_id != BPF_FUNC_map_push_elem)
 4238			goto error;
 4239		break;
 4240	case BPF_MAP_TYPE_SK_STORAGE:
 4241		if (func_id != BPF_FUNC_sk_storage_get &&
 4242		    func_id != BPF_FUNC_sk_storage_delete)
 4243			goto error;
 4244		break;
 4245	default:
 4246		break;
 4247	}
 4248
 4249	/* ... and second from the function itself. */
 4250	switch (func_id) {
 4251	case BPF_FUNC_tail_call:
 4252		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
 4253			goto error;
 4254		if (env->subprog_cnt > 1) {
 4255			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
 4256			return -EINVAL;
 4257		}
 4258		break;
 4259	case BPF_FUNC_perf_event_read:
 4260	case BPF_FUNC_perf_event_output:
 4261	case BPF_FUNC_perf_event_read_value:
 4262	case BPF_FUNC_skb_output:
 4263	case BPF_FUNC_xdp_output:
 4264		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
 4265			goto error;
 4266		break;
 4267	case BPF_FUNC_get_stackid:
 4268		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 4269			goto error;
 4270		break;
 4271	case BPF_FUNC_current_task_under_cgroup:
 4272	case BPF_FUNC_skb_under_cgroup:
 4273		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
 4274			goto error;
 4275		break;
 4276	case BPF_FUNC_redirect_map:
 4277		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
 4278		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
 4279		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
 4280		    map->map_type != BPF_MAP_TYPE_XSKMAP)
 4281			goto error;
 4282		break;
 4283	case BPF_FUNC_sk_redirect_map:
 4284	case BPF_FUNC_msg_redirect_map:
 4285	case BPF_FUNC_sock_map_update:
 4286		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
 4287			goto error;
 4288		break;
 4289	case BPF_FUNC_sk_redirect_hash:
 4290	case BPF_FUNC_msg_redirect_hash:
 4291	case BPF_FUNC_sock_hash_update:
 4292		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
 4293			goto error;
 4294		break;
 4295	case BPF_FUNC_get_local_storage:
 4296		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
 4297		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
 4298			goto error;
 4299		break;
 4300	case BPF_FUNC_sk_select_reuseport:
 4301		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
 4302		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
 4303		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
 4304			goto error;
 4305		break;
 4306	case BPF_FUNC_map_peek_elem:
 4307	case BPF_FUNC_map_pop_elem:
 4308	case BPF_FUNC_map_push_elem:
 4309		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
 4310		    map->map_type != BPF_MAP_TYPE_STACK)
 4311			goto error;
 4312		break;
 4313	case BPF_FUNC_sk_storage_get:
 4314	case BPF_FUNC_sk_storage_delete:
 4315		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 4316			goto error;
 4317		break;
 4318	default:
 4319		break;
 4320	}
 4321
 4322	return 0;
 4323error:
 4324	verbose(env, "cannot pass map_type %d into func %s#%d\n",
 4325		map->map_type, func_id_name(func_id), func_id);
 4326	return -EINVAL;
 4327}
 4328
 4329static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
 4330{
 4331	int count = 0;
 4332
 4333	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
 4334		count++;
 4335	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
 4336		count++;
 4337	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
 4338		count++;
 4339	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
 4340		count++;
 4341	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
 4342		count++;
 4343
 4344	/* We only support one arg being in raw mode at the moment,
 4345	 * which is sufficient for the helper functions we have
 4346	 * right now.
 4347	 */
 4348	return count <= 1;
 4349}
 4350
 4351static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
 4352				    enum bpf_arg_type arg_next)
 4353{
 4354	return (arg_type_is_mem_ptr(arg_curr) &&
 4355	        !arg_type_is_mem_size(arg_next)) ||
 4356	       (!arg_type_is_mem_ptr(arg_curr) &&
 4357		arg_type_is_mem_size(arg_next));
 4358}
 4359
 4360static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
 4361{
 4362	/* bpf_xxx(..., buf, len) call will access 'len'
 4363	 * bytes from memory 'buf'. Both arg types need
 4364	 * to be paired, so make sure there's no buggy
 4365	 * helper function specification.
 4366	 */
 4367	if (arg_type_is_mem_size(fn->arg1_type) ||
 4368	    arg_type_is_mem_ptr(fn->arg5_type)  ||
 4369	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
 4370	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
 4371	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
 4372	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
 4373		return false;
 4374
 4375	return true;
 4376}
 4377
 4378static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
 4379{
 4380	int count = 0;
 4381
 4382	if (arg_type_may_be_refcounted(fn->arg1_type))
 4383		count++;
 4384	if (arg_type_may_be_refcounted(fn->arg2_type))
 4385		count++;
 4386	if (arg_type_may_be_refcounted(fn->arg3_type))
 4387		count++;
 4388	if (arg_type_may_be_refcounted(fn->arg4_type))
 4389		count++;
 4390	if (arg_type_may_be_refcounted(fn->arg5_type))
 4391		count++;
 4392
 4393	/* A reference acquiring function cannot acquire
 4394	 * another refcounted ptr.
 4395	 */
 4396	if (may_be_acquire_function(func_id) && count)
 4397		return false;
 4398
 4399	/* We only support one arg being unreferenced at the moment,
 4400	 * which is sufficient for the helper functions we have right now.
 4401	 */
 4402	return count <= 1;
 4403}
 4404
 4405static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
 4406{
 4407	return check_raw_mode_ok(fn) &&
 4408	       check_arg_pair_ok(fn) &&
 4409	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
 4410}
 4411
 4412/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
 4413 * are now invalid, so turn them into unknown SCALAR_VALUE.
 4414 */
 4415static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
 4416				     struct bpf_func_state *state)
 4417{
 4418	struct bpf_reg_state *regs = state->regs, *reg;
 4419	int i;
 4420
 4421	for (i = 0; i < MAX_BPF_REG; i++)
 4422		if (reg_is_pkt_pointer_any(&regs[i]))
 4423			mark_reg_unknown(env, regs, i);
 4424
 4425	bpf_for_each_spilled_reg(i, state, reg) {
 4426		if (!reg)
 4427			continue;
 4428		if (reg_is_pkt_pointer_any(reg))
 4429			__mark_reg_unknown(env, reg);
 4430	}
 4431}
 4432
 4433static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 4434{
 4435	struct bpf_verifier_state *vstate = env->cur_state;
 4436	int i;
 4437
 4438	for (i = 0; i <= vstate->curframe; i++)
 4439		__clear_all_pkt_pointers(env, vstate->frame[i]);
 4440}
 4441
 4442static void release_reg_references(struct bpf_verifier_env *env,
 4443				   struct bpf_func_state *state,
 4444				   int ref_obj_id)
 4445{
 4446	struct bpf_reg_state *regs = state->regs, *reg;
 4447	int i;
 4448
 4449	for (i = 0; i < MAX_BPF_REG; i++)
 4450		if (regs[i].ref_obj_id == ref_obj_id)
 4451			mark_reg_unknown(env, regs, i);
 4452
 4453	bpf_for_each_spilled_reg(i, state, reg) {
 4454		if (!reg)
 4455			continue;
 4456		if (reg->ref_obj_id == ref_obj_id)
 4457			__mark_reg_unknown(env, reg);
 4458	}
 4459}
 4460
 4461/* The pointer with the specified id has released its reference to kernel
 4462 * resources. Identify all copies of the same pointer and clear the reference.
 4463 */
 4464static int release_reference(struct bpf_verifier_env *env,
 4465			     int ref_obj_id)
 4466{
 4467	struct bpf_verifier_state *vstate = env->cur_state;
 4468	int err;
 4469	int i;
 4470
 4471	err = release_reference_state(cur_func(env), ref_obj_id);
 4472	if (err)
 4473		return err;
 4474
 4475	for (i = 0; i <= vstate->curframe; i++)
 4476		release_reg_references(env, vstate->frame[i], ref_obj_id);
 4477
 4478	return 0;
 4479}
 4480
 4481static void clear_caller_saved_regs(struct bpf_verifier_env *env,
 4482				    struct bpf_reg_state *regs)
 4483{
 4484	int i;
 4485
 4486	/* after the call registers r0 - r5 were scratched */
 4487	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 4488		mark_reg_not_init(env, regs, caller_saved[i]);
 4489		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 4490	}
 4491}
 4492
 4493static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 4494			   int *insn_idx)
 4495{
 4496	struct bpf_verifier_state *state = env->cur_state;
 4497	struct bpf_func_info_aux *func_info_aux;
 4498	struct bpf_func_state *caller, *callee;
 4499	int i, err, subprog, target_insn;
 4500	bool is_global = false;
 4501
 4502	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
 4503		verbose(env, "the call stack of %d frames is too deep\n",
 4504			state->curframe + 2);
 4505		return -E2BIG;
 4506	}
 4507
 4508	target_insn = *insn_idx + insn->imm;
 4509	subprog = find_subprog(env, target_insn + 1);
 4510	if (subprog < 0) {
 4511		verbose(env, "verifier bug. No program starts at insn %d\n",
 4512			target_insn + 1);
 4513		return -EFAULT;
 4514	}
 4515
 4516	caller = state->frame[state->curframe];
 4517	if (state->frame[state->curframe + 1]) {
 4518		verbose(env, "verifier bug. Frame %d already allocated\n",
 4519			state->curframe + 1);
 4520		return -EFAULT;
 4521	}
 4522
 4523	func_info_aux = env->prog->aux->func_info_aux;
 4524	if (func_info_aux)
 4525		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
 4526	err = btf_check_func_arg_match(env, subprog, caller->regs);
 4527	if (err == -EFAULT)
 4528		return err;
 4529	if (is_global) {
 4530		if (err) {
 4531			verbose(env, "Caller passes invalid args into func#%d\n",
 4532				subprog);
 4533			return err;
 4534		} else {
 4535			if (env->log.level & BPF_LOG_LEVEL)
 4536				verbose(env,
 4537					"Func#%d is global and valid. Skipping.\n",
 4538					subprog);
 4539			clear_caller_saved_regs(env, caller->regs);
 4540
 4541			/* All global functions return SCALAR_VALUE */
 4542			mark_reg_unknown(env, caller->regs, BPF_REG_0);
 4543
 4544			/* continue with next insn after call */
 4545			return 0;
 4546		}
 4547	}
 4548
 4549	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
 4550	if (!callee)
 4551		return -ENOMEM;
 4552	state->frame[state->curframe + 1] = callee;
 4553
 4554	/* callee cannot access r0, r6 - r9 for reading and has to write
 4555	 * into its own stack before reading from it.
 4556	 * callee can read/write into caller's stack
 4557	 */
 4558	init_func_state(env, callee,
 4559			/* remember the callsite, it will be used by bpf_exit */
 4560			*insn_idx /* callsite */,
 4561			state->curframe + 1 /* frameno within this callchain */,
 4562			subprog /* subprog number within this prog */);
 4563
 4564	/* Transfer references to the callee */
 4565	err = transfer_reference_state(callee, caller);
 4566	if (err)
 4567		return err;
 4568
 4569	/* copy r1 - r5 args that callee can access.  The copy includes parent
 4570	 * pointers, which connects us up to the liveness chain
 4571	 */
 4572	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 4573		callee->regs[i] = caller->regs[i];
 4574
 4575	clear_caller_saved_regs(env, caller->regs);
 4576
 4577	/* only increment it after check_reg_arg() finished */
 4578	state->curframe++;
 4579
 4580	/* and go analyze first insn of the callee */
 4581	*insn_idx = target_insn;
 4582
 4583	if (env->log.level & BPF_LOG_LEVEL) {
 4584		verbose(env, "caller:\n");
 4585		print_verifier_state(env, caller);
 4586		verbose(env, "callee:\n");
 4587		print_verifier_state(env, callee);
 4588	}
 4589	return 0;
 4590}
 4591
 4592static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 4593{
 4594	struct bpf_verifier_state *state = env->cur_state;
 4595	struct bpf_func_state *caller, *callee;
 4596	struct bpf_reg_state *r0;
 4597	int err;
 4598
 4599	callee = state->frame[state->curframe];
 4600	r0 = &callee->regs[BPF_REG_0];
 4601	if (r0->type == PTR_TO_STACK) {
 4602		/* technically it's ok to return caller's stack pointer
 4603		 * (or caller's caller's pointer) back to the caller,
 4604		 * since these pointers are valid. Only current stack
 4605		 * pointer will be invalid as soon as function exits,
 4606		 * but let's be conservative
 4607		 */
 4608		verbose(env, "cannot return stack pointer to the caller\n");
 4609		return -EINVAL;
 4610	}
 4611
 4612	state->curframe--;
 4613	caller = state->frame[state->curframe];
 4614	/* return to the caller whatever r0 had in the callee */
 4615	caller->regs[BPF_REG_0] = *r0;
 4616
 4617	/* Transfer references to the caller */
 4618	err = transfer_reference_state(caller, callee);
 4619	if (err)
 4620		return err;
 4621
 4622	*insn_idx = callee->callsite + 1;
 4623	if (env->log.level & BPF_LOG_LEVEL) {
 4624		verbose(env, "returning from callee:\n");
 4625		print_verifier_state(env, callee);
 4626		verbose(env, "to caller at %d:\n", *insn_idx);
 4627		print_verifier_state(env, caller);
 4628	}
 4629	/* clear everything in the callee */
 4630	free_func_state(callee);
 4631	state->frame[state->curframe + 1] = NULL;
 4632	return 0;
 4633}
 4634
 4635static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
 4636				   int func_id,
 4637				   struct bpf_call_arg_meta *meta)
 4638{
 4639	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
 4640
 4641	if (ret_type != RET_INTEGER ||
 4642	    (func_id != BPF_FUNC_get_stack &&
 4643	     func_id != BPF_FUNC_probe_read_str &&
 4644	     func_id != BPF_FUNC_probe_read_kernel_str &&
 4645	     func_id != BPF_FUNC_probe_read_user_str))
 4646		return;
 4647
 4648	ret_reg->smax_value = meta->msize_max_value;
 4649	ret_reg->s32_max_value = meta->msize_max_value;
 4650	__reg_deduce_bounds(ret_reg);
 4651	__reg_bound_offset(ret_reg);
 4652	__update_reg_bounds(ret_reg);
 4653}
 4654
 4655static int
 4656record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 4657		int func_id, int insn_idx)
 4658{
 4659	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 4660	struct bpf_map *map = meta->map_ptr;
 4661
 4662	if (func_id != BPF_FUNC_tail_call &&
 4663	    func_id != BPF_FUNC_map_lookup_elem &&
 4664	    func_id != BPF_FUNC_map_update_elem &&
 4665	    func_id != BPF_FUNC_map_delete_elem &&
 4666	    func_id != BPF_FUNC_map_push_elem &&
 4667	    func_id != BPF_FUNC_map_pop_elem &&
 4668	    func_id != BPF_FUNC_map_peek_elem)
 4669		return 0;
 4670
 4671	if (map == NULL) {
 4672		verbose(env, "kernel subsystem misconfigured verifier\n");
 4673		return -EINVAL;
 4674	}
 4675
 4676	/* In case of read-only, some additional restrictions
 4677	 * need to be applied in order to prevent altering the
 4678	 * state of the map from program side.
 4679	 */
 4680	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
 4681	    (func_id == BPF_FUNC_map_delete_elem ||
 4682	     func_id == BPF_FUNC_map_update_elem ||
 4683	     func_id == BPF_FUNC_map_push_elem ||
 4684	     func_id == BPF_FUNC_map_pop_elem)) {
 4685		verbose(env, "write into map forbidden\n");
 4686		return -EACCES;
 4687	}
 4688
 4689	if (!BPF_MAP_PTR(aux->map_ptr_state))
 4690		bpf_map_ptr_store(aux, meta->map_ptr,
 4691				  !meta->map_ptr->bypass_spec_v1);
 4692	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
 4693		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
 4694				  !meta->map_ptr->bypass_spec_v1);
 4695	return 0;
 4696}
 4697
 4698static int
 4699record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 4700		int func_id, int insn_idx)
 4701{
 4702	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 4703	struct bpf_reg_state *regs = cur_regs(env), *reg;
 4704	struct bpf_map *map = meta->map_ptr;
 4705	struct tnum range;
 4706	u64 val;
 4707	int err;
 4708
 4709	if (func_id != BPF_FUNC_tail_call)
 4710		return 0;
 4711	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
 4712		verbose(env, "kernel subsystem misconfigured verifier\n");
 4713		return -EINVAL;
 4714	}
 4715
 4716	range = tnum_range(0, map->max_entries - 1);
 4717	reg = &regs[BPF_REG_3];
 4718
 4719	if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
 4720		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
 4721		return 0;
 4722	}
 4723
 4724	err = mark_chain_precision(env, BPF_REG_3);
 4725	if (err)
 4726		return err;
 4727
 4728	val = reg->var_off.value;
 4729	if (bpf_map_key_unseen(aux))
 4730		bpf_map_key_store(aux, val);
 4731	else if (!bpf_map_key_poisoned(aux) &&
 4732		  bpf_map_key_immediate(aux) != val)
 4733		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
 4734	return 0;
 4735}
 4736
 4737static int check_reference_leak(struct bpf_verifier_env *env)
 4738{
 4739	struct bpf_func_state *state = cur_func(env);
 4740	int i;
 4741
 4742	for (i = 0; i < state->acquired_refs; i++) {
 4743		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
 4744			state->refs[i].id, state->refs[i].insn_idx);
 4745	}
 4746	return state->acquired_refs ? -EINVAL : 0;
 4747}
 4748
 4749static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 4750{
 4751	const struct bpf_func_proto *fn = NULL;
 4752	struct bpf_reg_state *regs;
 4753	struct bpf_call_arg_meta meta;
 4754	bool changes_data;
 4755	int i, err;
 4756
 4757	/* find function prototype */
 4758	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
 4759		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
 4760			func_id);
 4761		return -EINVAL;
 4762	}
 4763
 4764	if (env->ops->get_func_proto)
 4765		fn = env->ops->get_func_proto(func_id, env->prog);
 4766	if (!fn) {
 4767		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
 4768			func_id);
 4769		return -EINVAL;
 4770	}
 4771
 4772	/* eBPF programs must be GPL compatible to use GPL-ed functions */
 4773	if (!env->prog->gpl_compatible && fn->gpl_only) {
 4774		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
 4775		return -EINVAL;
 4776	}
 4777
 4778	/* With LD_ABS/IND some JITs save/restore skb from r1. */
 4779	changes_data = bpf_helper_changes_pkt_data(fn->func);
 4780	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
 4781		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
 4782			func_id_name(func_id), func_id);
 4783		return -EINVAL;
 4784	}
 4785
 4786	memset(&meta, 0, sizeof(meta));
 4787	meta.pkt_access = fn->pkt_access;
 4788
 4789	err = check_func_proto(fn, func_id);
 4790	if (err) {
 4791		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
 4792			func_id_name(func_id), func_id);
 4793		return err;
 4794	}
 4795
 4796	meta.func_id = func_id;
 4797	/* check args */
 4798	for (i = 0; i < 5; i++) {
 4799		if (!fn->check_btf_id) {
 4800			err = btf_resolve_helper_id(&env->log, fn, i);
 4801			if (err > 0)
 4802				meta.btf_id = err;
 4803		}
 4804		err = check_func_arg(env, i, &meta, fn);
 4805		if (err)
 4806			return err;
 4807	}
 4808
 4809	err = record_func_map(env, &meta, func_id, insn_idx);
 4810	if (err)
 4811		return err;
 4812
 4813	err = record_func_key(env, &meta, func_id, insn_idx);
 4814	if (err)
 4815		return err;
 4816
 4817	/* Mark slots with STACK_MISC in case of raw mode, stack offset
 4818	 * is inferred from register state.
 4819	 */
 4820	for (i = 0; i < meta.access_size; i++) {
 4821		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
 4822				       BPF_WRITE, -1, false);
 4823		if (err)
 4824			return err;
 4825	}
 4826
 4827	if (func_id == BPF_FUNC_tail_call) {
 4828		err = check_reference_leak(env);
 4829		if (err) {
 4830			verbose(env, "tail_call would lead to reference leak\n");
 4831			return err;
 4832		}
 4833	} else if (is_release_function(func_id)) {
 4834		err = release_reference(env, meta.ref_obj_id);
 4835		if (err) {
 4836			verbose(env, "func %s#%d reference has not been acquired before\n",
 4837				func_id_name(func_id), func_id);
 4838			return err;
 4839		}
 4840	}
 4841
 4842	regs = cur_regs(env);
 4843
 4844	/* check that flags argument in get_local_storage(map, flags) is 0,
 4845	 * this is required because get_local_storage() can't return an error.
 4846	 */
 4847	if (func_id == BPF_FUNC_get_local_storage &&
 4848	    !register_is_null(&regs[BPF_REG_2])) {
 4849		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
 4850		return -EINVAL;
 4851	}
 4852
 4853	/* reset caller saved regs */
 4854	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 4855		mark_reg_not_init(env, regs, caller_saved[i]);
 4856		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 4857	}
 4858
 4859	/* helper call returns 64-bit value. */
 4860	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
 4861
 4862	/* update return register (already marked as written above) */
 4863	if (fn->ret_type == RET_INTEGER) {
 4864		/* sets type to SCALAR_VALUE */
 4865		mark_reg_unknown(env, regs, BPF_REG_0);
 4866	} else if (fn->ret_type == RET_VOID) {
 4867		regs[BPF_REG_0].type = NOT_INIT;
 4868	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
 4869		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
 4870		/* There is no offset yet applied, variable or fixed */
 4871		mark_reg_known_zero(env, regs, BPF_REG_0);
 4872		/* remember map_ptr, so that check_map_access()
 4873		 * can check 'value_size' boundary of memory access
 4874		 * to map element returned from bpf_map_lookup_elem()
 4875		 */
 4876		if (meta.map_ptr == NULL) {
 4877			verbose(env,
 4878				"kernel subsystem misconfigured verifier\n");
 4879			return -EINVAL;
 4880		}
 4881		regs[BPF_REG_0].map_ptr = meta.map_ptr;
 4882		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
 4883			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
 4884			if (map_value_has_spin_lock(meta.map_ptr))
 4885				regs[BPF_REG_0].id = ++env->id_gen;
 4886		} else {
 4887			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
 4888			regs[BPF_REG_0].id = ++env->id_gen;
 4889		}
 4890	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
 4891		mark_reg_known_zero(env, regs, BPF_REG_0);
 4892		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
 4893		regs[BPF_REG_0].id = ++env->id_gen;
 4894	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
 4895		mark_reg_known_zero(env, regs, BPF_REG_0);
 4896		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
 4897		regs[BPF_REG_0].id = ++env->id_gen;
 4898	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
 4899		mark_reg_known_zero(env, regs, BPF_REG_0);
 4900		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
 4901		regs[BPF_REG_0].id = ++env->id_gen;
 4902	} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
 4903		mark_reg_known_zero(env, regs, BPF_REG_0);
 4904		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
 4905		regs[BPF_REG_0].id = ++env->id_gen;
 4906		regs[BPF_REG_0].mem_size = meta.mem_size;
 4907	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
 4908		int ret_btf_id;
 4909
 4910		mark_reg_known_zero(env, regs, BPF_REG_0);
 4911		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
 4912		ret_btf_id = *fn->ret_btf_id;
 4913		if (ret_btf_id == 0) {
 4914			verbose(env, "invalid return type %d of func %s#%d\n",
 4915				fn->ret_type, func_id_name(func_id), func_id);
 4916			return -EINVAL;
 4917		}
 4918		regs[BPF_REG_0].btf_id = ret_btf_id;
 4919	} else {
 4920		verbose(env, "unknown return type %d of func %s#%d\n",
 4921			fn->ret_type, func_id_name(func_id), func_id);
 4922		return -EINVAL;
 4923	}
 4924
 4925	if (is_ptr_cast_function(func_id)) {
 4926		/* For release_reference() */
 4927		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
 4928	} else if (is_acquire_function(func_id, meta.map_ptr)) {
 4929		int id = acquire_reference_state(env, insn_idx);
 4930
 4931		if (id < 0)
 4932			return id;
 4933		/* For mark_ptr_or_null_reg() */
 4934		regs[BPF_REG_0].id = id;
 4935		/* For release_reference() */
 4936		regs[BPF_REG_0].ref_obj_id = id;
 4937	}
 4938
 4939	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
 4940
 4941	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
 4942	if (err)
 4943		return err;
 4944
 4945	if ((func_id == BPF_FUNC_get_stack ||
 4946	     func_id == BPF_FUNC_get_task_stack) &&
 4947	    !env->prog->has_callchain_buf) {
 4948		const char *err_str;
 4949
 4950#ifdef CONFIG_PERF_EVENTS
 4951		err = get_callchain_buffers(sysctl_perf_event_max_stack);
 4952		err_str = "cannot get callchain buffer for func %s#%d\n";
 4953#else
 4954		err = -ENOTSUPP;
 4955		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
 4956#endif
 4957		if (err) {
 4958			verbose(env, err_str, func_id_name(func_id), func_id);
 4959			return err;
 4960		}
 4961
 4962		env->prog->has_callchain_buf = true;
 4963	}
 4964
 4965	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
 4966		env->prog->call_get_stack = true;
 4967
 4968	if (changes_data)
 4969		clear_all_pkt_pointers(env);
 4970	return 0;
 4971}
 4972
 4973static bool signed_add_overflows(s64 a, s64 b)
 4974{
 4975	/* Do the add in u64, where overflow is well-defined */
 4976	s64 res = (s64)((u64)a + (u64)b);
 4977
 4978	if (b < 0)
 4979		return res > a;
 4980	return res < a;
 4981}
 4982
 4983static bool signed_add32_overflows(s64 a, s64 b)
 4984{
 4985	/* Do the add in u32, where overflow is well-defined */
 4986	s32 res = (s32)((u32)a + (u32)b);
 4987
 4988	if (b < 0)
 4989		return res > a;
 4990	return res < a;
 4991}
 4992
 4993static bool signed_sub_overflows(s32 a, s32 b)
 4994{
 4995	/* Do the sub in u64, where overflow is well-defined */
 4996	s64 res = (s64)((u64)a - (u64)b);
 4997
 4998	if (b < 0)
 4999		return res < a;
 5000	return res > a;
 5001}
 5002
 5003static bool signed_sub32_overflows(s32 a, s32 b)
 5004{
 5005	/* Do the sub in u64, where overflow is well-defined */
 5006	s32 res = (s32)((u32)a - (u32)b);
 5007
 5008	if (b < 0)
 5009		return res < a;
 5010	return res > a;
 5011}
 5012
 5013static bool check_reg_sane_offset(struct bpf_verifier_env *env,
 5014				  const struct bpf_reg_state *reg,
 5015				  enum bpf_reg_type type)
 5016{
 5017	bool known = tnum_is_const(reg->var_off);
 5018	s64 val = reg->var_off.value;
 5019	s64 smin = reg->smin_value;
 5020
 5021	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
 5022		verbose(env, "math between %s pointer and %lld is not allowed\n",
 5023			reg_type_str[type], val);
 5024		return false;
 5025	}
 5026
 5027	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
 5028		verbose(env, "%s pointer offset %d is not allowed\n",
 5029			reg_type_str[type], reg->off);
 5030		return false;
 5031	}
 5032
 5033	if (smin == S64_MIN) {
 5034		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
 5035			reg_type_str[type]);
 5036		return false;
 5037	}
 5038
 5039	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
 5040		verbose(env, "value %lld makes %s pointer be out of bounds\n",
 5041			smin, reg_type_str[type]);
 5042		return false;
 5043	}
 5044
 5045	return true;
 5046}
 5047
 5048static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
 5049{
 5050	return &env->insn_aux_data[env->insn_idx];
 5051}
 5052
 5053static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 5054			      u32 *ptr_limit, u8 opcode, bool off_is_neg)
 5055{
 5056	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
 5057			    (opcode == BPF_SUB && !off_is_neg);
 5058	u32 off;
 5059
 5060	switch (ptr_reg->type) {
 5061	case PTR_TO_STACK:
 5062		/* Indirect variable offset stack access is prohibited in
 5063		 * unprivileged mode so it's not handled here.
 5064		 */
 5065		off = ptr_reg->off + ptr_reg->var_off.value;
 5066		if (mask_to_left)
 5067			*ptr_limit = MAX_BPF_STACK + off;
 5068		else
 5069			*ptr_limit = -off;
 5070		return 0;
 5071	case PTR_TO_MAP_VALUE:
 5072		if (mask_to_left) {
 5073			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
 5074		} else {
 5075			off = ptr_reg->smin_value + ptr_reg->off;
 5076			*ptr_limit = ptr_reg->map_ptr->value_size - off;
 5077		}
 5078		return 0;
 5079	default:
 5080		return -EINVAL;
 5081	}
 5082}
 5083
 5084static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
 5085				    const struct bpf_insn *insn)
 5086{
 5087	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
 5088}
 5089
 5090static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
 5091				       u32 alu_state, u32 alu_limit)
 5092{
 5093	/* If we arrived here from different branches with different
 5094	 * state or limits to sanitize, then this won't work.
 5095	 */
 5096	if (aux->alu_state &&
 5097	    (aux->alu_state != alu_state ||
 5098	     aux->alu_limit != alu_limit))
 5099		return -EACCES;
 5100
 5101	/* Corresponding fixup done in fixup_bpf_calls(). */
 5102	aux->alu_state = alu_state;
 5103	aux->alu_limit = alu_limit;
 5104	return 0;
 5105}
 5106
 5107static int sanitize_val_alu(struct bpf_verifier_env *env,
 5108			    struct bpf_insn *insn)
 5109{
 5110	struct bpf_insn_aux_data *aux = cur_aux(env);
 5111
 5112	if (can_skip_alu_sanitation(env, insn))
 5113		return 0;
 5114
 5115	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
 5116}
 5117
 5118static int sanitize_ptr_alu(struct bpf_verifier_env *env,
 5119			    struct bpf_insn *insn,
 5120			    const struct bpf_reg_state *ptr_reg,
 5121			    struct bpf_reg_state *dst_reg,
 5122			    bool off_is_neg)
 5123{
 5124	struct bpf_verifier_state *vstate = env->cur_state;
 5125	struct bpf_insn_aux_data *aux = cur_aux(env);
 5126	bool ptr_is_dst_reg = ptr_reg == dst_reg;
 5127	u8 opcode = BPF_OP(insn->code);
 5128	u32 alu_state, alu_limit;
 5129	struct bpf_reg_state tmp;
 5130	bool ret;
 5131
 5132	if (can_skip_alu_sanitation(env, insn))
 5133		return 0;
 5134
 5135	/* We already marked aux for masking from non-speculative
 5136	 * paths, thus we got here in the first place. We only care
 5137	 * to explore bad access from here.
 5138	 */
 5139	if (vstate->speculative)
 5140		goto do_sim;
 5141
 5142	alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
 5143	alu_state |= ptr_is_dst_reg ?
 5144		     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
 5145
 5146	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
 5147		return 0;
 5148	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
 5149		return -EACCES;
 5150do_sim:
 5151	/* Simulate and find potential out-of-bounds access under
 5152	 * speculative execution from truncation as a result of
 5153	 * masking when off was not within expected range. If off
 5154	 * sits in dst, then we temporarily need to move ptr there
 5155	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
 5156	 * for cases where we use K-based arithmetic in one direction
 5157	 * and truncated reg-based in the other in order to explore
 5158	 * bad access.
 5159	 */
 5160	if (!ptr_is_dst_reg) {
 5161		tmp = *dst_reg;
 5162		*dst_reg = *ptr_reg;
 5163	}
 5164	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
 5165	if (!ptr_is_dst_reg && ret)
 5166		*dst_reg = tmp;
 5167	return !ret ? -EFAULT : 0;
 5168}
 5169
 5170/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
 5171 * Caller should also handle BPF_MOV case separately.
 5172 * If we return -EACCES, caller may want to try again treating pointer as a
 5173 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
 5174 */
 5175static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 5176				   struct bpf_insn *insn,
 5177				   const struct bpf_reg_state *ptr_reg,
 5178				   const struct bpf_reg_state *off_reg)
 5179{
 5180	struct bpf_verifier_state *vstate = env->cur_state;
 5181	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 5182	struct bpf_reg_state *regs = state->regs, *dst_reg;
 5183	bool known = tnum_is_const(off_reg->var_off);
 5184	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
 5185	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
 5186	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
 5187	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
 5188	u32 dst = insn->dst_reg, src = insn->src_reg;
 5189	u8 opcode = BPF_OP(insn->code);
 5190	int ret;
 5191
 5192	dst_reg = &regs[dst];
 5193
 5194	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
 5195	    smin_val > smax_val || umin_val > umax_val) {
 5196		/* Taint dst register if offset had invalid bounds derived from
 5197		 * e.g. dead branches.
 5198		 */
 5199		__mark_reg_unknown(env, dst_reg);
 5200		return 0;
 5201	}
 5202
 5203	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 5204		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
 5205		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
 5206			__mark_reg_unknown(env, dst_reg);
 5207			return 0;
 5208		}
 5209
 5210		verbose(env,
 5211			"R%d 32-bit pointer arithmetic prohibited\n",
 5212			dst);
 5213		return -EACCES;
 5214	}
 5215
 5216	switch (ptr_reg->type) {
 5217	case PTR_TO_MAP_VALUE_OR_NULL:
 5218		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
 5219			dst, reg_type_str[ptr_reg->type]);
 5220		return -EACCES;
 5221	case CONST_PTR_TO_MAP:
 5222	case PTR_TO_PACKET_END:
 5223	case PTR_TO_SOCKET:
 5224	case PTR_TO_SOCKET_OR_NULL:
 5225	case PTR_TO_SOCK_COMMON:
 5226	case PTR_TO_SOCK_COMMON_OR_NULL:
 5227	case PTR_TO_TCP_SOCK:
 5228	case PTR_TO_TCP_SOCK_OR_NULL:
 5229	case PTR_TO_XDP_SOCK:
 5230		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
 5231			dst, reg_type_str[ptr_reg->type]);
 5232		return -EACCES;
 5233	case PTR_TO_MAP_VALUE:
 5234		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
 5235			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
 5236				off_reg == dst_reg ? dst : src);
 5237			return -EACCES;
 5238		}
 5239		fallthrough;
 5240	default:
 5241		break;
 5242	}
 5243
 5244	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
 5245	 * The id may be overwritten later if we create a new variable offset.
 5246	 */
 5247	dst_reg->type = ptr_reg->type;
 5248	dst_reg->id = ptr_reg->id;
 5249
 5250	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
 5251	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
 5252		return -EINVAL;
 5253
 5254	/* pointer types do not carry 32-bit bounds at the moment. */
 5255	__mark_reg32_unbounded(dst_reg);
 5256
 5257	switch (opcode) {
 5258	case BPF_ADD:
 5259		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
 5260		if (ret < 0) {
 5261			verbose(env, "R%d tried to add from different maps or paths\n", dst);
 5262			return ret;
 5263		}
 5264		/* We can take a fixed offset as long as it doesn't overflow
 5265		 * the s32 'off' field
 5266		 */
 5267		if (known && (ptr_reg->off + smin_val ==
 5268			      (s64)(s32)(ptr_reg->off + smin_val))) {
 5269			/* pointer += K.  Accumulate it into fixed offset */
 5270			dst_reg->smin_value = smin_ptr;
 5271			dst_reg->smax_value = smax_ptr;
 5272			dst_reg->umin_value = umin_ptr;
 5273			dst_reg->umax_value = umax_ptr;
 5274			dst_reg->var_off = ptr_reg->var_off;
 5275			dst_reg->off = ptr_reg->off + smin_val;
 5276			dst_reg->raw = ptr_reg->raw;
 5277			break;
 5278		}
 5279		/* A new variable offset is created.  Note that off_reg->off
 5280		 * == 0, since it's a scalar.
 5281		 * dst_reg gets the pointer type and since some positive
 5282		 * integer value was added to the pointer, give it a new 'id'
 5283		 * if it's a PTR_TO_PACKET.
 5284		 * this creates a new 'base' pointer, off_reg (variable) gets
 5285		 * added into the variable offset, and we copy the fixed offset
 5286		 * from ptr_reg.
 5287		 */
 5288		if (signed_add_overflows(smin_ptr, smin_val) ||
 5289		    signed_add_overflows(smax_ptr, smax_val)) {
 5290			dst_reg->smin_value = S64_MIN;
 5291			dst_reg->smax_value = S64_MAX;
 5292		} else {
 5293			dst_reg->smin_value = smin_ptr + smin_val;
 5294			dst_reg->smax_value = smax_ptr + smax_val;
 5295		}
 5296		if (umin_ptr + umin_val < umin_ptr ||
 5297		    umax_ptr + umax_val < umax_ptr) {
 5298			dst_reg->umin_value = 0;
 5299			dst_reg->umax_value = U64_MAX;
 5300		} else {
 5301			dst_reg->umin_value = umin_ptr + umin_val;
 5302			dst_reg->umax_value = umax_ptr + umax_val;
 5303		}
 5304		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
 5305		dst_reg->off = ptr_reg->off;
 5306		dst_reg->raw = ptr_reg->raw;
 5307		if (reg_is_pkt_pointer(ptr_reg)) {
 5308			dst_reg->id = ++env->id_gen;
 5309			/* something was added to pkt_ptr, set range to zero */
 5310			dst_reg->raw = 0;
 5311		}
 5312		break;
 5313	case BPF_SUB:
 5314		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
 5315		if (ret < 0) {
 5316			verbose(env, "R%d tried to sub from different maps or paths\n", dst);
 5317			return ret;
 5318		}
 5319		if (dst_reg == off_reg) {
 5320			/* scalar -= pointer.  Creates an unknown scalar */
 5321			verbose(env, "R%d tried to subtract pointer from scalar\n",
 5322				dst);
 5323			return -EACCES;
 5324		}
 5325		/* We don't allow subtraction from FP, because (according to
 5326		 * test_verifier.c test "invalid fp arithmetic", JITs might not
 5327		 * be able to deal with it.
 5328		 */
 5329		if (ptr_reg->type == PTR_TO_STACK) {
 5330			verbose(env, "R%d subtraction from stack pointer prohibited\n",
 5331				dst);
 5332			return -EACCES;
 5333		}
 5334		if (known && (ptr_reg->off - smin_val ==
 5335			      (s64)(s32)(ptr_reg->off - smin_val))) {
 5336			/* pointer -= K.  Subtract it from fixed offset */
 5337			dst_reg->smin_value = smin_ptr;
 5338			dst_reg->smax_value = smax_ptr;
 5339			dst_reg->umin_value = umin_ptr;
 5340			dst_reg->umax_value = umax_ptr;
 5341			dst_reg->var_off = ptr_reg->var_off;
 5342			dst_reg->id = ptr_reg->id;
 5343			dst_reg->off = ptr_reg->off - smin_val;
 5344			dst_reg->raw = ptr_reg->raw;
 5345			break;
 5346		}
 5347		/* A new variable offset is created.  If the subtrahend is known
 5348		 * nonnegative, then any reg->range we had before is still good.
 5349		 */
 5350		if (signed_sub_overflows(smin_ptr, smax_val) ||
 5351		    signed_sub_overflows(smax_ptr, smin_val)) {
 5352			/* Overflow possible, we know nothing */
 5353			dst_reg->smin_value = S64_MIN;
 5354			dst_reg->smax_value = S64_MAX;
 5355		} else {
 5356			dst_reg->smin_value = smin_ptr - smax_val;
 5357			dst_reg->smax_value = smax_ptr - smin_val;
 5358		}
 5359		if (umin_ptr < umax_val) {
 5360			/* Overflow possible, we know nothing */
 5361			dst_reg->umin_value = 0;
 5362			dst_reg->umax_value = U64_MAX;
 5363		} else {
 5364			/* Cannot overflow (as long as bounds are consistent) */
 5365			dst_reg->umin_value = umin_ptr - umax_val;
 5366			dst_reg->umax_value = umax_ptr - umin_val;
 5367		}
 5368		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
 5369		dst_reg->off = ptr_reg->off;
 5370		dst_reg->raw = ptr_reg->raw;
 5371		if (reg_is_pkt_pointer(ptr_reg)) {
 5372			dst_reg->id = ++env->id_gen;
 5373			/* something was added to pkt_ptr, set range to zero */
 5374			if (smin_val < 0)
 5375				dst_reg->raw = 0;
 5376		}
 5377		break;
 5378	case BPF_AND:
 5379	case BPF_OR:
 5380	case BPF_XOR:
 5381		/* bitwise ops on pointers are troublesome, prohibit. */
 5382		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
 5383			dst, bpf_alu_string[opcode >> 4]);
 5384		return -EACCES;
 5385	default:
 5386		/* other operators (e.g. MUL,LSH) produce non-pointer results */
 5387		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
 5388			dst, bpf_alu_string[opcode >> 4]);
 5389		return -EACCES;
 5390	}
 5391
 5392	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
 5393		return -EINVAL;
 5394
 5395	__update_reg_bounds(dst_reg);
 5396	__reg_deduce_bounds(dst_reg);
 5397	__reg_bound_offset(dst_reg);
 5398
 5399	/* For unprivileged we require that resulting offset must be in bounds
 5400	 * in order to be able to sanitize access later on.
 5401	 */
 5402	if (!env->bypass_spec_v1) {
 5403		if (dst_reg->type == PTR_TO_MAP_VALUE &&
 5404		    check_map_access(env, dst, dst_reg->off, 1, false)) {
 5405			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
 5406				"prohibited for !root\n", dst);
 5407			return -EACCES;
 5408		} else if (dst_reg->type == PTR_TO_STACK &&
 5409			   check_stack_access(env, dst_reg, dst_reg->off +
 5410					      dst_reg->var_off.value, 1)) {
 5411			verbose(env, "R%d stack pointer arithmetic goes out of range, "
 5412				"prohibited for !root\n", dst);
 5413			return -EACCES;
 5414		}
 5415	}
 5416
 5417	return 0;
 5418}
 5419
 5420static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
 5421				 struct bpf_reg_state *src_reg)
 5422{
 5423	s32 smin_val = src_reg->s32_min_value;
 5424	s32 smax_val = src_reg->s32_max_value;
 5425	u32 umin_val = src_reg->u32_min_value;
 5426	u32 umax_val = src_reg->u32_max_value;
 5427
 5428	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
 5429	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
 5430		dst_reg->s32_min_value = S32_MIN;
 5431		dst_reg->s32_max_value = S32_MAX;
 5432	} else {
 5433		dst_reg->s32_min_value += smin_val;
 5434		dst_reg->s32_max_value += smax_val;
 5435	}
 5436	if (dst_reg->u32_min_value + umin_val < umin_val ||
 5437	    dst_reg->u32_max_value + umax_val < umax_val) {
 5438		dst_reg->u32_min_value = 0;
 5439		dst_reg->u32_max_value = U32_MAX;
 5440	} else {
 5441		dst_reg->u32_min_value += umin_val;
 5442		dst_reg->u32_max_value += umax_val;
 5443	}
 5444}
 5445
 5446static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
 5447			       struct bpf_reg_state *src_reg)
 5448{
 5449	s64 smin_val = src_reg->smin_value;
 5450	s64 smax_val = src_reg->smax_value;
 5451	u64 umin_val = src_reg->umin_value;
 5452	u64 umax_val = src_reg->umax_value;
 5453
 5454	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
 5455	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
 5456		dst_reg->smin_value = S64_MIN;
 5457		dst_reg->smax_value = S64_MAX;
 5458	} else {
 5459		dst_reg->smin_value += smin_val;
 5460		dst_reg->smax_value += smax_val;
 5461	}
 5462	if (dst_reg->umin_value + umin_val < umin_val ||
 5463	    dst_reg->umax_value + umax_val < umax_val) {
 5464		dst_reg->umin_value = 0;
 5465		dst_reg->umax_value = U64_MAX;
 5466	} else {
 5467		dst_reg->umin_value += umin_val;
 5468		dst_reg->umax_value += umax_val;
 5469	}
 5470}
 5471
 5472static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
 5473				 struct bpf_reg_state *src_reg)
 5474{
 5475	s32 smin_val = src_reg->s32_min_value;
 5476	s32 smax_val = src_reg->s32_max_value;
 5477	u32 umin_val = src_reg->u32_min_value;
 5478	u32 umax_val = src_reg->u32_max_value;
 5479
 5480	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
 5481	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
 5482		/* Overflow possible, we know nothing */
 5483		dst_reg->s32_min_value = S32_MIN;
 5484		dst_reg->s32_max_value = S32_MAX;
 5485	} else {
 5486		dst_reg->s32_min_value -= smax_val;
 5487		dst_reg->s32_max_value -= smin_val;
 5488	}
 5489	if (dst_reg->u32_min_value < umax_val) {
 5490		/* Overflow possible, we know nothing */
 5491		dst_reg->u32_min_value = 0;
 5492		dst_reg->u32_max_value = U32_MAX;
 5493	} else {
 5494		/* Cannot overflow (as long as bounds are consistent) */
 5495		dst_reg->u32_min_value -= umax_val;
 5496		dst_reg->u32_max_value -= umin_val;
 5497	}
 5498}
 5499
 5500static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
 5501			       struct bpf_reg_state *src_reg)
 5502{
 5503	s64 smin_val = src_reg->smin_value;
 5504	s64 smax_val = src_reg->smax_value;
 5505	u64 umin_val = src_reg->umin_value;
 5506	u64 umax_val = src_reg->umax_value;
 5507
 5508	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
 5509	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
 5510		/* Overflow possible, we know nothing */
 5511		dst_reg->smin_value = S64_MIN;
 5512		dst_reg->smax_value = S64_MAX;
 5513	} else {
 5514		dst_reg->smin_value -= smax_val;
 5515		dst_reg->smax_value -= smin_val;
 5516	}
 5517	if (dst_reg->umin_value < umax_val) {
 5518		/* Overflow possible, we know nothing */
 5519		dst_reg->umin_value = 0;
 5520		dst_reg->umax_value = U64_MAX;
 5521	} else {
 5522		/* Cannot overflow (as long as bounds are consistent) */
 5523		dst_reg->umin_value -= umax_val;
 5524		dst_reg->umax_value -= umin_val;
 5525	}
 5526}
 5527
 5528static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
 5529				 struct bpf_reg_state *src_reg)
 5530{
 5531	s32 smin_val = src_reg->s32_min_value;
 5532	u32 umin_val = src_reg->u32_min_value;
 5533	u32 umax_val = src_reg->u32_max_value;
 5534
 5535	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
 5536		/* Ain't nobody got time to multiply that sign */
 5537		__mark_reg32_unbounded(dst_reg);
 5538		return;
 5539	}
 5540	/* Both values are positive, so we can work with unsigned and
 5541	 * copy the result to signed (unless it exceeds S32_MAX).
 5542	 */
 5543	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
 5544		/* Potential overflow, we know nothing */
 5545		__mark_reg32_unbounded(dst_reg);
 5546		return;
 5547	}
 5548	dst_reg->u32_min_value *= umin_val;
 5549	dst_reg->u32_max_value *= umax_val;
 5550	if (dst_reg->u32_max_value > S32_MAX) {
 5551		/* Overflow possible, we know nothing */
 5552		dst_reg->s32_min_value = S32_MIN;
 5553		dst_reg->s32_max_value = S32_MAX;
 5554	} else {
 5555		dst_reg->s32_min_value = dst_reg->u32_min_value;
 5556		dst_reg->s32_max_value = dst_reg->u32_max_value;
 5557	}
 5558}
 5559
 5560static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
 5561			       struct bpf_reg_state *src_reg)
 5562{
 5563	s64 smin_val = src_reg->smin_value;
 5564	u64 umin_val = src_reg->umin_value;
 5565	u64 umax_val = src_reg->umax_value;
 5566
 5567	if (smin_val < 0 || dst_reg->smin_value < 0) {
 5568		/* Ain't nobody got time to multiply that sign */
 5569		__mark_reg64_unbounded(dst_reg);
 5570		return;
 5571	}
 5572	/* Both values are positive, so we can work with unsigned and
 5573	 * copy the result to signed (unless it exceeds S64_MAX).
 5574	 */
 5575	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
 5576		/* Potential overflow, we know nothing */
 5577		__mark_reg64_unbounded(dst_reg);
 5578		return;
 5579	}
 5580	dst_reg->umin_value *= umin_val;
 5581	dst_reg->umax_value *= umax_val;
 5582	if (dst_reg->umax_value > S64_MAX) {
 5583		/* Overflow possible, we know nothing */
 5584		dst_reg->smin_value = S64_MIN;
 5585		dst_reg->smax_value = S64_MAX;
 5586	} else {
 5587		dst_reg->smin_value = dst_reg->umin_value;
 5588		dst_reg->smax_value = dst_reg->umax_value;
 5589	}
 5590}
 5591
 5592static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
 5593				 struct bpf_reg_state *src_reg)
 5594{
 5595	bool src_known = tnum_subreg_is_const(src_reg->var_off);
 5596	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
 5597	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
 5598	s32 smin_val = src_reg->s32_min_value;
 5599	u32 umax_val = src_reg->u32_max_value;
 5600
 5601	/* Assuming scalar64_min_max_and will be called so its safe
 5602	 * to skip updating register for known 32-bit case.
 5603	 */
 5604	if (src_known && dst_known)
 5605		return;
 5606
 5607	/* We get our minimum from the var_off, since that's inherently
 5608	 * bitwise.  Our maximum is the minimum of the operands' maxima.
 5609	 */
 5610	dst_reg->u32_min_value = var32_off.value;
 5611	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
 5612	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
 5613		/* Lose signed bounds when ANDing negative numbers,
 5614		 * ain't nobody got time for that.
 5615		 */
 5616		dst_reg->s32_min_value = S32_MIN;
 5617		dst_reg->s32_max_value = S32_MAX;
 5618	} else {
 5619		/* ANDing two positives gives a positive, so safe to
 5620		 * cast result into s64.
 5621		 */
 5622		dst_reg->s32_min_value = dst_reg->u32_min_value;
 5623		dst_reg->s32_max_value = dst_reg->u32_max_value;
 5624	}
 5625
 5626}
 5627
 5628static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
 5629			       struct bpf_reg_state *src_reg)
 5630{
 5631	bool src_known = tnum_is_const(src_reg->var_off);
 5632	bool dst_known = tnum_is_const(dst_reg->var_off);
 5633	s64 smin_val = src_reg->smin_value;
 5634	u64 umax_val = src_reg->umax_value;
 5635
 5636	if (src_known && dst_known) {
 5637		__mark_reg_known(dst_reg, dst_reg->var_off.value &
 5638					  src_reg->var_off.value);
 5639		return;
 5640	}
 5641
 5642	/* We get our minimum from the var_off, since that's inherently
 5643	 * bitwise.  Our maximum is the minimum of the operands' maxima.
 5644	 */
 5645	dst_reg->umin_value = dst_reg->var_off.value;
 5646	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
 5647	if (dst_reg->smin_value < 0 || smin_val < 0) {
 5648		/* Lose signed bounds when ANDing negative numbers,
 5649		 * ain't nobody got time for that.
 5650		 */
 5651		dst_reg->smin_value = S64_MIN;
 5652		dst_reg->smax_value = S64_MAX;
 5653	} else {
 5654		/* ANDing two positives gives a positive, so safe to
 5655		 * cast result into s64.
 5656		 */
 5657		dst_reg->smin_value = dst_reg->umin_value;
 5658		dst_reg->smax_value = dst_reg->umax_value;
 5659	}
 5660	/* We may learn something more from the var_off */
 5661	__update_reg_bounds(dst_reg);
 5662}
 5663
 5664static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
 5665				struct bpf_reg_state *src_reg)
 5666{
 5667	bool src_known = tnum_subreg_is_const(src_reg->var_off);
 5668	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
 5669	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
 5670	s32 smin_val = src_reg->s32_min_value;
 5671	u32 umin_val = src_reg->u32_min_value;
 5672
 5673	/* Assuming scalar64_min_max_or will be called so it is safe
 5674	 * to skip updating register for known case.
 5675	 */
 5676	if (src_known && dst_known)
 5677		return;
 5678
 5679	/* We get our maximum from the var_off, and our minimum is the
 5680	 * maximum of the operands' minima
 5681	 */
 5682	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
 5683	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
 5684	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
 5685		/* Lose signed bounds when ORing negative numbers,
 5686		 * ain't nobody got time for that.
 5687		 */
 5688		dst_reg->s32_min_value = S32_MIN;
 5689		dst_reg->s32_max_value = S32_MAX;
 5690	} else {
 5691		/* ORing two positives gives a positive, so safe to
 5692		 * cast result into s64.
 5693		 */
 5694		dst_reg->s32_min_value = dst_reg->u32_min_value;
 5695		dst_reg->s32_max_value = dst_reg->u32_max_value;
 5696	}
 5697}
 5698
 5699static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
 5700			      struct bpf_reg_state *src_reg)
 5701{
 5702	bool src_known = tnum_is_const(src_reg->var_off);
 5703	bool dst_known = tnum_is_const(dst_reg->var_off);
 5704	s64 smin_val = src_reg->smin_value;
 5705	u64 umin_val = src_reg->umin_value;
 5706
 5707	if (src_known && dst_known) {
 5708		__mark_reg_known(dst_reg, dst_reg->var_off.value |
 5709					  src_reg->var_off.value);
 5710		return;
 5711	}
 5712
 5713	/* We get our maximum from the var_off, and our minimum is the
 5714	 * maximum of the operands' minima
 5715	 */
 5716	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
 5717	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
 5718	if (dst_reg->smin_value < 0 || smin_val < 0) {
 5719		/* Lose signed bounds when ORing negative numbers,
 5720		 * ain't nobody got time for that.
 5721		 */
 5722		dst_reg->smin_value = S64_MIN;
 5723		dst_reg->smax_value = S64_MAX;
 5724	} else {
 5725		/* ORing two positives gives a positive, so safe to
 5726		 * cast result into s64.
 5727		 */
 5728		dst_reg->smin_value = dst_reg->umin_value;
 5729		dst_reg->smax_value = dst_reg->umax_value;
 5730	}
 5731	/* We may learn something more from the var_off */
 5732	__update_reg_bounds(dst_reg);
 5733}
 5734
 5735static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
 5736				   u64 umin_val, u64 umax_val)
 5737{
 5738	/* We lose all sign bit information (except what we can pick
 5739	 * up from var_off)
 5740	 */
 5741	dst_reg->s32_min_value = S32_MIN;
 5742	dst_reg->s32_max_value = S32_MAX;
 5743	/* If we might shift our top bit out, then we know nothing */
 5744	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
 5745		dst_reg->u32_min_value = 0;
 5746		dst_reg->u32_max_value = U32_MAX;
 5747	} else {
 5748		dst_reg->u32_min_value <<= umin_val;
 5749		dst_reg->u32_max_value <<= umax_val;
 5750	}
 5751}
 5752
 5753static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
 5754				 struct bpf_reg_state *src_reg)
 5755{
 5756	u32 umax_val = src_reg->u32_max_value;
 5757	u32 umin_val = src_reg->u32_min_value;
 5758	/* u32 alu operation will zext upper bits */
 5759	struct tnum subreg = tnum_subreg(dst_reg->var_off);
 5760
 5761	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
 5762	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
 5763	/* Not required but being careful mark reg64 bounds as unknown so
 5764	 * that we are forced to pick them up from tnum and zext later and
 5765	 * if some path skips this step we are still safe.
 5766	 */
 5767	__mark_reg64_unbounded(dst_reg);
 5768	__update_reg32_bounds(dst_reg);
 5769}
 5770
 5771static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
 5772				   u64 umin_val, u64 umax_val)
 5773{
 5774	/* Special case <<32 because it is a common compiler pattern to sign
 5775	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
 5776	 * positive we know this shift will also be positive so we can track
 5777	 * bounds correctly. Otherwise we lose all sign bit information except
 5778	 * what we can pick up from var_off. Perhaps we can generalize this
 5779	 * later to shifts of any length.
 5780	 */
 5781	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
 5782		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
 5783	else
 5784		dst_reg->smax_value = S64_MAX;
 5785
 5786	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
 5787		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
 5788	else
 5789		dst_reg->smin_value = S64_MIN;
 5790
 5791	/* If we might shift our top bit out, then we know nothing */
 5792	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
 5793		dst_reg->umin_value = 0;
 5794		dst_reg->umax_value = U64_MAX;
 5795	} else {
 5796		dst_reg->umin_value <<= umin_val;
 5797		dst_reg->umax_value <<= umax_val;
 5798	}
 5799}
 5800
 5801static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
 5802			       struct bpf_reg_state *src_reg)
 5803{
 5804	u64 umax_val = src_reg->umax_value;
 5805	u64 umin_val = src_reg->umin_value;
 5806
 5807	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
 5808	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
 5809	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
 5810
 5811	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
 5812	/* We may learn something more from the var_off */
 5813	__update_reg_bounds(dst_reg);
 5814}
 5815
 5816static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
 5817				 struct bpf_reg_state *src_reg)
 5818{
 5819	struct tnum subreg = tnum_subreg(dst_reg->var_off);
 5820	u32 umax_val = src_reg->u32_max_value;
 5821	u32 umin_val = src_reg->u32_min_value;
 5822
 5823	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
 5824	 * be negative, then either:
 5825	 * 1) src_reg might be zero, so the sign bit of the result is
 5826	 *    unknown, so we lose our signed bounds
 5827	 * 2) it's known negative, thus the unsigned bounds capture the
 5828	 *    signed bounds
 5829	 * 3) the signed bounds cross zero, so they tell us nothing
 5830	 *    about the result
 5831	 * If the value in dst_reg is known nonnegative, then again the
 5832	 * unsigned bounts capture the signed bounds.
 5833	 * Thus, in all cases it suffices to blow away our signed bounds
 5834	 * and rely on inferring new ones from the unsigned bounds and
 5835	 * var_off of the result.
 5836	 */
 5837	dst_reg->s32_min_value = S32_MIN;
 5838	dst_reg->s32_max_value = S32_MAX;
 5839
 5840	dst_reg->var_off = tnum_rshift(subreg, umin_val);
 5841	dst_reg->u32_min_value >>= umax_val;
 5842	dst_reg->u32_max_value >>= umin_val;
 5843
 5844	__mark_reg64_unbounded(dst_reg);
 5845	__update_reg32_bounds(dst_reg);
 5846}
 5847
 5848static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
 5849			       struct bpf_reg_state *src_reg)
 5850{
 5851	u64 umax_val = src_reg->umax_value;
 5852	u64 umin_val = src_reg->umin_value;
 5853
 5854	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
 5855	 * be negative, then either:
 5856	 * 1) src_reg might be zero, so the sign bit of the result is
 5857	 *    unknown, so we lose our signed bounds
 5858	 * 2) it's known negative, thus the unsigned bounds capture the
 5859	 *    signed bounds
 5860	 * 3) the signed bounds cross zero, so they tell us nothing
 5861	 *    about the result
 5862	 * If the value in dst_reg is known nonnegative, then again the
 5863	 * unsigned bounts capture the signed bounds.
 5864	 * Thus, in all cases it suffices to blow away our signed bounds
 5865	 * and rely on inferring new ones from the unsigned bounds and
 5866	 * var_off of the result.
 5867	 */
 5868	dst_reg->smin_value = S64_MIN;
 5869	dst_reg->smax_value = S64_MAX;
 5870	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
 5871	dst_reg->umin_value >>= umax_val;
 5872	dst_reg->umax_value >>= umin_val;
 5873
 5874	/* Its not easy to operate on alu32 bounds here because it depends
 5875	 * on bits being shifted in. Take easy way out and mark unbounded
 5876	 * so we can recalculate later from tnum.
 5877	 */
 5878	__mark_reg32_unbounded(dst_reg);
 5879	__update_reg_bounds(dst_reg);
 5880}
 5881
 5882static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
 5883				  struct bpf_reg_state *src_reg)
 5884{
 5885	u64 umin_val = src_reg->u32_min_value;
 5886
 5887	/* Upon reaching here, src_known is true and
 5888	 * umax_val is equal to umin_val.
 5889	 */
 5890	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
 5891	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
 5892
 5893	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
 5894
 5895	/* blow away the dst_reg umin_value/umax_value and rely on
 5896	 * dst_reg var_off to refine the result.
 5897	 */
 5898	dst_reg->u32_min_value = 0;
 5899	dst_reg->u32_max_value = U32_MAX;
 5900
 5901	__mark_reg64_unbounded(dst_reg);
 5902	__update_reg32_bounds(dst_reg);
 5903}
 5904
 5905static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
 5906				struct bpf_reg_state *src_reg)
 5907{
 5908	u64 umin_val = src_reg->umin_value;
 5909
 5910	/* Upon reaching here, src_known is true and umax_val is equal
 5911	 * to umin_val.
 5912	 */
 5913	dst_reg->smin_value >>= umin_val;
 5914	dst_reg->smax_value >>= umin_val;
 5915
 5916	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
 5917
 5918	/* blow away the dst_reg umin_value/umax_value and rely on
 5919	 * dst_reg var_off to refine the result.
 5920	 */
 5921	dst_reg->umin_value = 0;
 5922	dst_reg->umax_value = U64_MAX;
 5923
 5924	/* Its not easy to operate on alu32 bounds here because it depends
 5925	 * on bits being shifted in from upper 32-bits. Take easy way out
 5926	 * and mark unbounded so we can recalculate later from tnum.
 5927	 */
 5928	__mark_reg32_unbounded(dst_reg);
 5929	__update_reg_bounds(dst_reg);
 5930}
 5931
 5932/* WARNING: This function does calculations on 64-bit values, but the actual
 5933 * execution may occur on 32-bit values. Therefore, things like bitshifts
 5934 * need extra checks in the 32-bit case.
 5935 */
 5936static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 5937				      struct bpf_insn *insn,
 5938				      struct bpf_reg_state *dst_reg,
 5939				      struct bpf_reg_state src_reg)
 5940{
 5941	struct bpf_reg_state *regs = cur_regs(env);
 5942	u8 opcode = BPF_OP(insn->code);
 5943	bool src_known;
 5944	s64 smin_val, smax_val;
 5945	u64 umin_val, umax_val;
 5946	s32 s32_min_val, s32_max_val;
 5947	u32 u32_min_val, u32_max_val;
 5948	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
 5949	u32 dst = insn->dst_reg;
 5950	int ret;
 5951	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
 5952
 5953	smin_val = src_reg.smin_value;
 5954	smax_val = src_reg.smax_value;
 5955	umin_val = src_reg.umin_value;
 5956	umax_val = src_reg.umax_value;
 5957
 5958	s32_min_val = src_reg.s32_min_value;
 5959	s32_max_val = src_reg.s32_max_value;
 5960	u32_min_val = src_reg.u32_min_value;
 5961	u32_max_val = src_reg.u32_max_value;
 5962
 5963	if (alu32) {
 5964		src_known = tnum_subreg_is_const(src_reg.var_off);
 5965		if ((src_known &&
 5966		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
 5967		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
 5968			/* Taint dst register if offset had invalid bounds
 5969			 * derived from e.g. dead branches.
 5970			 */
 5971			__mark_reg_unknown(env, dst_reg);
 5972			return 0;
 5973		}
 5974	} else {
 5975		src_known = tnum_is_const(src_reg.var_off);
 5976		if ((src_known &&
 5977		     (smin_val != smax_val || umin_val != umax_val)) ||
 5978		    smin_val > smax_val || umin_val > umax_val) {
 5979			/* Taint dst register if offset had invalid bounds
 5980			 * derived from e.g. dead branches.
 5981			 */
 5982			__mark_reg_unknown(env, dst_reg);
 5983			return 0;
 5984		}
 5985	}
 5986
 5987	if (!src_known &&
 5988	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
 5989		__mark_reg_unknown(env, dst_reg);
 5990		return 0;
 5991	}
 5992
 5993	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
 5994	 * There are two classes of instructions: The first class we track both
 5995	 * alu32 and alu64 sign/unsigned bounds independently this provides the
 5996	 * greatest amount of precision when alu operations are mixed with jmp32
 5997	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
 5998	 * and BPF_OR. This is possible because these ops have fairly easy to
 5999	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
 6000	 * See alu32 verifier tests for examples. The second class of
 6001	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
 6002	 * with regards to tracking sign/unsigned bounds because the bits may
 6003	 * cross subreg boundaries in the alu64 case. When this happens we mark
 6004	 * the reg unbounded in the subreg bound space and use the resulting
 6005	 * tnum to calculate an approximation of the sign/unsigned bounds.
 6006	 */
 6007	switch (opcode) {
 6008	case BPF_ADD:
 6009		ret = sanitize_val_alu(env, insn);
 6010		if (ret < 0) {
 6011			verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
 6012			return ret;
 6013		}
 6014		scalar32_min_max_add(dst_reg, &src_reg);
 6015		scalar_min_max_add(dst_reg, &src_reg);
 6016		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
 6017		break;
 6018	case BPF_SUB:
 6019		ret = sanitize_val_alu(env, insn);
 6020		if (ret < 0) {
 6021			verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
 6022			return ret;
 6023		}
 6024		scalar32_min_max_sub(dst_reg, &src_reg);
 6025		scalar_min_max_sub(dst_reg, &src_reg);
 6026		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
 6027		break;
 6028	case BPF_MUL:
 6029		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
 6030		scalar32_min_max_mul(dst_reg, &src_reg);
 6031		scalar_min_max_mul(dst_reg, &src_reg);
 6032		break;
 6033	case BPF_AND:
 6034		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
 6035		scalar32_min_max_and(dst_reg, &src_reg);
 6036		scalar_min_max_and(dst_reg, &src_reg);
 6037		break;
 6038	case BPF_OR:
 6039		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
 6040		scalar32_min_max_or(dst_reg, &src_reg);
 6041		scalar_min_max_or(dst_reg, &src_reg);
 6042		break;
 6043	case BPF_LSH:
 6044		if (umax_val >= insn_bitness) {
 6045			/* Shifts greater than 31 or 63 are undefined.
 6046			 * This includes shifts by a negative number.
 6047			 */
 6048			mark_reg_unknown(env, regs, insn->dst_reg);
 6049			break;
 6050		}
 6051		if (alu32)
 6052			scalar32_min_max_lsh(dst_reg, &src_reg);
 6053		else
 6054			scalar_min_max_lsh(dst_reg, &src_reg);
 6055		break;
 6056	case BPF_RSH:
 6057		if (umax_val >= insn_bitness) {
 6058			/* Shifts greater than 31 or 63 are undefined.
 6059			 * This includes shifts by a negative number.
 6060			 */
 6061			mark_reg_unknown(env, regs, insn->dst_reg);
 6062			break;
 6063		}
 6064		if (alu32)
 6065			scalar32_min_max_rsh(dst_reg, &src_reg);
 6066		else
 6067			scalar_min_max_rsh(dst_reg, &src_reg);
 6068		break;
 6069	case BPF_ARSH:
 6070		if (umax_val >= insn_bitness) {
 6071			/* Shifts greater than 31 or 63 are undefined.
 6072			 * This includes shifts by a negative number.
 6073			 */
 6074			mark_reg_unknown(env, regs, insn->dst_reg);
 6075			break;
 6076		}
 6077		if (alu32)
 6078			scalar32_min_max_arsh(dst_reg, &src_reg);
 6079		else
 6080			scalar_min_max_arsh(dst_reg, &src_reg);
 6081		break;
 6082	default:
 6083		mark_reg_unknown(env, regs, insn->dst_reg);
 6084		break;
 6085	}
 6086
 6087	/* ALU32 ops are zero extended into 64bit register */
 6088	if (alu32)
 6089		zext_32_to_64(dst_reg);
 6090
 6091	__update_reg_bounds(dst_reg);
 6092	__reg_deduce_bounds(dst_reg);
 6093	__reg_bound_offset(dst_reg);
 6094	return 0;
 6095}
 6096
 6097/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
 6098 * and var_off.
 6099 */
 6100static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
 6101				   struct bpf_insn *insn)
 6102{
 6103	struct bpf_verifier_state *vstate = env->cur_state;
 6104	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 6105	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
 6106	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
 6107	u8 opcode = BPF_OP(insn->code);
 6108	int err;
 6109
 6110	dst_reg = &regs[insn->dst_reg];
 6111	src_reg = NULL;
 6112	if (dst_reg->type != SCALAR_VALUE)
 6113		ptr_reg = dst_reg;
 6114	if (BPF_SRC(insn->code) == BPF_X) {
 6115		src_reg = &regs[insn->src_reg];
 6116		if (src_reg->type != SCALAR_VALUE) {
 6117			if (dst_reg->type != SCALAR_VALUE) {
 6118				/* Combining two pointers by any ALU op yields
 6119				 * an arbitrary scalar. Disallow all math except
 6120				 * pointer subtraction
 6121				 */
 6122				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
 6123					mark_reg_unknown(env, regs, insn->dst_reg);
 6124					return 0;
 6125				}
 6126				verbose(env, "R%d pointer %s pointer prohibited\n",
 6127					insn->dst_reg,
 6128					bpf_alu_string[opcode >> 4]);
 6129				return -EACCES;
 6130			} else {
 6131				/* scalar += pointer
 6132				 * This is legal, but we have to reverse our
 6133				 * src/dest handling in computing the range
 6134				 */
 6135				err = mark_chain_precision(env, insn->dst_reg);
 6136				if (err)
 6137					return err;
 6138				return adjust_ptr_min_max_vals(env, insn,
 6139							       src_reg, dst_reg);
 6140			}
 6141		} else if (ptr_reg) {
 6142			/* pointer += scalar */
 6143			err = mark_chain_precision(env, insn->src_reg);
 6144			if (err)
 6145				return err;
 6146			return adjust_ptr_min_max_vals(env, insn,
 6147						       dst_reg, src_reg);
 6148		}
 6149	} else {
 6150		/* Pretend the src is a reg with a known value, since we only
 6151		 * need to be able to read from this state.
 6152		 */
 6153		off_reg.type = SCALAR_VALUE;
 6154		__mark_reg_known(&off_reg, insn->imm);
 6155		src_reg = &off_reg;
 6156		if (ptr_reg) /* pointer += K */
 6157			return adjust_ptr_min_max_vals(env, insn,
 6158						       ptr_reg, src_reg);
 6159	}
 6160
 6161	/* Got here implies adding two SCALAR_VALUEs */
 6162	if (WARN_ON_ONCE(ptr_reg)) {
 6163		print_verifier_state(env, state);
 6164		verbose(env, "verifier internal error: unexpected ptr_reg\n");
 6165		return -EINVAL;
 6166	}
 6167	if (WARN_ON(!src_reg)) {
 6168		print_verifier_state(env, state);
 6169		verbose(env, "verifier internal error: no src_reg\n");
 6170		return -EINVAL;
 6171	}
 6172	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
 6173}
 6174
 6175/* check validity of 32-bit and 64-bit arithmetic operations */
 6176static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
 6177{
 6178	struct bpf_reg_state *regs = cur_regs(env);
 6179	u8 opcode = BPF_OP(insn->code);
 6180	int err;
 6181
 6182	if (opcode == BPF_END || opcode == BPF_NEG) {
 6183		if (opcode == BPF_NEG) {
 6184			if (BPF_SRC(insn->code) != 0 ||
 6185			    insn->src_reg != BPF_REG_0 ||
 6186			    insn->off != 0 || insn->imm != 0) {
 6187				verbose(env, "BPF_NEG uses reserved fields\n");
 6188				return -EINVAL;
 6189			}
 6190		} else {
 6191			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
 6192			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
 6193			    BPF_CLASS(insn->code) == BPF_ALU64) {
 6194				verbose(env, "BPF_END uses reserved fields\n");
 6195				return -EINVAL;
 6196			}
 6197		}
 6198
 6199		/* check src operand */
 6200		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 6201		if (err)
 6202			return err;
 6203
 6204		if (is_pointer_value(env, insn->dst_reg)) {
 6205			verbose(env, "R%d pointer arithmetic prohibited\n",
 6206				insn->dst_reg);
 6207			return -EACCES;
 6208		}
 6209
 6210		/* check dest operand */
 6211		err = check_reg_arg(env, insn->dst_reg, DST_OP);
 6212		if (err)
 6213			return err;
 6214
 6215	} else if (opcode == BPF_MOV) {
 6216
 6217		if (BPF_SRC(insn->code) == BPF_X) {
 6218			if (insn->imm != 0 || insn->off != 0) {
 6219				verbose(env, "BPF_MOV uses reserved fields\n");
 6220				return -EINVAL;
 6221			}
 6222
 6223			/* check src operand */
 6224			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 6225			if (err)
 6226				return err;
 6227		} else {
 6228			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
 6229				verbose(env, "BPF_MOV uses reserved fields\n");
 6230				return -EINVAL;
 6231			}
 6232		}
 6233
 6234		/* check dest operand, mark as required later */
 6235		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
 6236		if (err)
 6237			return err;
 6238
 6239		if (BPF_SRC(insn->code) == BPF_X) {
 6240			struct bpf_reg_state *src_reg = regs + insn->src_reg;
 6241			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
 6242
 6243			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 6244				/* case: R1 = R2
 6245				 * copy register state to dest reg
 6246				 */
 6247				*dst_reg = *src_reg;
 6248				dst_reg->live |= REG_LIVE_WRITTEN;
 6249				dst_reg->subreg_def = DEF_NOT_SUBREG;
 6250			} else {
 6251				/* R1 = (u32) R2 */
 6252				if (is_pointer_value(env, insn->src_reg)) {
 6253					verbose(env,
 6254						"R%d partial copy of pointer\n",
 6255						insn->src_reg);
 6256					return -EACCES;
 6257				} else if (src_reg->type == SCALAR_VALUE) {
 6258					*dst_reg = *src_reg;
 6259					dst_reg->live |= REG_LIVE_WRITTEN;
 6260					dst_reg->subreg_def = env->insn_idx + 1;
 6261				} else {
 6262					mark_reg_unknown(env, regs,
 6263							 insn->dst_reg);
 6264				}
 6265				zext_32_to_64(dst_reg);
 6266			}
 6267		} else {
 6268			/* case: R = imm
 6269			 * remember the value we stored into this reg
 6270			 */
 6271			/* clear any state __mark_reg_known doesn't set */
 6272			mark_reg_unknown(env, regs, insn->dst_reg);
 6273			regs[insn->dst_reg].type = SCALAR_VALUE;
 6274			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 6275				__mark_reg_known(regs + insn->dst_reg,
 6276						 insn->imm);
 6277			} else {
 6278				__mark_reg_known(regs + insn->dst_reg,
 6279						 (u32)insn->imm);
 6280			}
 6281		}
 6282
 6283	} else if (opcode > BPF_END) {
 6284		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
 6285		return -EINVAL;
 6286
 6287	} else {	/* all other ALU ops: and, sub, xor, add, ... */
 6288
 6289		if (BPF_SRC(insn->code) == BPF_X) {
 6290			if (insn->imm != 0 || insn->off != 0) {
 6291				verbose(env, "BPF_ALU uses reserved fields\n");
 6292				return -EINVAL;
 6293			}
 6294			/* check src1 operand */
 6295			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 6296			if (err)
 6297				return err;
 6298		} else {
 6299			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
 6300				verbose(env, "BPF_ALU uses reserved fields\n");
 6301				return -EINVAL;
 6302			}
 6303		}
 6304
 6305		/* check src2 operand */
 6306		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 6307		if (err)
 6308			return err;
 6309
 6310		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
 6311		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
 6312			verbose(env, "div by zero\n");
 6313			return -EINVAL;
 6314		}
 6315
 6316		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
 6317		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
 6318			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
 6319
 6320			if (insn->imm < 0 || insn->imm >= size) {
 6321				verbose(env, "invalid shift %d\n", insn->imm);
 6322				return -EINVAL;
 6323			}
 6324		}
 6325
 6326		/* check dest operand */
 6327		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
 6328		if (err)
 6329			return err;
 6330
 6331		return adjust_reg_min_max_vals(env, insn);
 6332	}
 6333
 6334	return 0;
 6335}
 6336
 6337static void __find_good_pkt_pointers(struct bpf_func_state *state,
 6338				     struct bpf_reg_state *dst_reg,
 6339				     enum bpf_reg_type type, u16 new_range)
 6340{
 6341	struct bpf_reg_state *reg;
 6342	int i;
 6343
 6344	for (i = 0; i < MAX_BPF_REG; i++) {
 6345		reg = &state->regs[i];
 6346		if (reg->type == type && reg->id == dst_reg->id)
 6347			/* keep the maximum range already checked */
 6348			reg->range = max(reg->range, new_range);
 6349	}
 6350
 6351	bpf_for_each_spilled_reg(i, state, reg) {
 6352		if (!reg)
 6353			continue;
 6354		if (reg->type == type && reg->id == dst_reg->id)
 6355			reg->range = max(reg->range, new_range);
 6356	}
 6357}
 6358
 6359static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
 6360				   struct bpf_reg_state *dst_reg,
 6361				   enum bpf_reg_type type,
 6362				   bool range_right_open)
 6363{
 6364	u16 new_range;
 6365	int i;
 6366
 6367	if (dst_reg->off < 0 ||
 6368	    (dst_reg->off == 0 && range_right_open))
 6369		/* This doesn't give us any range */
 6370		return;
 6371
 6372	if (dst_reg->umax_value > MAX_PACKET_OFF ||
 6373	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
 6374		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
 6375		 * than pkt_end, but that's because it's also less than pkt.
 6376		 */
 6377		return;
 6378
 6379	new_range = dst_reg->off;
 6380	if (range_right_open)
 6381		new_range--;
 6382
 6383	/* Examples for register markings:
 6384	 *
 6385	 * pkt_data in dst register:
 6386	 *
 6387	 *   r2 = r3;
 6388	 *   r2 += 8;
 6389	 *   if (r2 > pkt_end) goto <handle exception>
 6390	 *   <access okay>
 6391	 *
 6392	 *   r2 = r3;
 6393	 *   r2 += 8;
 6394	 *   if (r2 < pkt_end) goto <access okay>
 6395	 *   <handle exception>
 6396	 *
 6397	 *   Where:
 6398	 *     r2 == dst_reg, pkt_end == src_reg
 6399	 *     r2=pkt(id=n,off=8,r=0)
 6400	 *     r3=pkt(id=n,off=0,r=0)
 6401	 *
 6402	 * pkt_data in src register:
 6403	 *
 6404	 *   r2 = r3;
 6405	 *   r2 += 8;
 6406	 *   if (pkt_end >= r2) goto <access okay>
 6407	 *   <handle exception>
 6408	 *
 6409	 *   r2 = r3;
 6410	 *   r2 += 8;
 6411	 *   if (pkt_end <= r2) goto <handle exception>
 6412	 *   <access okay>
 6413	 *
 6414	 *   Where:
 6415	 *     pkt_end == dst_reg, r2 == src_reg
 6416	 *     r2=pkt(id=n,off=8,r=0)
 6417	 *     r3=pkt(id=n,off=0,r=0)
 6418	 *
 6419	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
 6420	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
 6421	 * and [r3, r3 + 8-1) respectively is safe to access depending on
 6422	 * the check.
 6423	 */
 6424
 6425	/* If our ids match, then we must have the same max_value.  And we
 6426	 * don't care about the other reg's fixed offset, since if it's too big
 6427	 * the range won't allow anything.
 6428	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
 6429	 */
 6430	for (i = 0; i <= vstate->curframe; i++)
 6431		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
 6432					 new_range);
 6433}
 6434
 6435static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
 6436{
 6437	struct tnum subreg = tnum_subreg(reg->var_off);
 6438	s32 sval = (s32)val;
 6439
 6440	switch (opcode) {
 6441	case BPF_JEQ:
 6442		if (tnum_is_const(subreg))
 6443			return !!tnum_equals_const(subreg, val);
 6444		break;
 6445	case BPF_JNE:
 6446		if (tnum_is_const(subreg))
 6447			return !tnum_equals_const(subreg, val);
 6448		break;
 6449	case BPF_JSET:
 6450		if ((~subreg.mask & subreg.value) & val)
 6451			return 1;
 6452		if (!((subreg.mask | subreg.value) & val))
 6453			return 0;
 6454		break;
 6455	case BPF_JGT:
 6456		if (reg->u32_min_value > val)
 6457			return 1;
 6458		else if (reg->u32_max_value <= val)
 6459			return 0;
 6460		break;
 6461	case BPF_JSGT:
 6462		if (reg->s32_min_value > sval)
 6463			return 1;
 6464		else if (reg->s32_max_value < sval)
 6465			return 0;
 6466		break;
 6467	case BPF_JLT:
 6468		if (reg->u32_max_value < val)
 6469			return 1;
 6470		else if (reg->u32_min_value >= val)
 6471			return 0;
 6472		break;
 6473	case BPF_JSLT:
 6474		if (reg->s32_max_value < sval)
 6475			return 1;
 6476		else if (reg->s32_min_value >= sval)
 6477			return 0;
 6478		break;
 6479	case BPF_JGE:
 6480		if (reg->u32_min_value >= val)
 6481			return 1;
 6482		else if (reg->u32_max_value < val)
 6483			return 0;
 6484		break;
 6485	case BPF_JSGE:
 6486		if (reg->s32_min_value >= sval)
 6487			return 1;
 6488		else if (reg->s32_max_value < sval)
 6489			return 0;
 6490		break;
 6491	case BPF_JLE:
 6492		if (reg->u32_max_value <= val)
 6493			return 1;
 6494		else if (reg->u32_min_value > val)
 6495			return 0;
 6496		break;
 6497	case BPF_JSLE:
 6498		if (reg->s32_max_value <= sval)
 6499			return 1;
 6500		else if (reg->s32_min_value > sval)
 6501			return 0;
 6502		break;
 6503	}
 6504
 6505	return -1;
 6506}
 6507
 6508
 6509static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 6510{
 6511	s64 sval = (s64)val;
 6512
 6513	switch (opcode) {
 6514	case BPF_JEQ:
 6515		if (tnum_is_const(reg->var_off))
 6516			return !!tnum_equals_const(reg->var_off, val);
 6517		break;
 6518	case BPF_JNE:
 6519		if (tnum_is_const(reg->var_off))
 6520			return !tnum_equals_const(reg->var_off, val);
 6521		break;
 6522	case BPF_JSET:
 6523		if ((~reg->var_off.mask & reg->var_off.value) & val)
 6524			return 1;
 6525		if (!((reg->var_off.mask | reg->var_off.value) & val))
 6526			return 0;
 6527		break;
 6528	case BPF_JGT:
 6529		if (reg->umin_value > val)
 6530			return 1;
 6531		else if (reg->umax_value <= val)
 6532			return 0;
 6533		break;
 6534	case BPF_JSGT:
 6535		if (reg->smin_value > sval)
 6536			return 1;
 6537		else if (reg->smax_value < sval)
 6538			return 0;
 6539		break;
 6540	case BPF_JLT:
 6541		if (reg->umax_value < val)
 6542			return 1;
 6543		else if (reg->umin_value >= val)
 6544			return 0;
 6545		break;
 6546	case BPF_JSLT:
 6547		if (reg->smax_value < sval)
 6548			return 1;
 6549		else if (reg->smin_value >= sval)
 6550			return 0;
 6551		break;
 6552	case BPF_JGE:
 6553		if (reg->umin_value >= val)
 6554			return 1;
 6555		else if (reg->umax_value < val)
 6556			return 0;
 6557		break;
 6558	case BPF_JSGE:
 6559		if (reg->smin_value >= sval)
 6560			return 1;
 6561		else if (reg->smax_value < sval)
 6562			return 0;
 6563		break;
 6564	case BPF_JLE:
 6565		if (reg->umax_value <= val)
 6566			return 1;
 6567		else if (reg->umin_value > val)
 6568			return 0;
 6569		break;
 6570	case BPF_JSLE:
 6571		if (reg->smax_value <= sval)
 6572			return 1;
 6573		else if (reg->smin_value > sval)
 6574			return 0;
 6575		break;
 6576	}
 6577
 6578	return -1;
 6579}
 6580
 6581/* compute branch direction of the expression "if (reg opcode val) goto target;"
 6582 * and return:
 6583 *  1 - branch will be taken and "goto target" will be executed
 6584 *  0 - branch will not be taken and fall-through to next insn
 6585 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
 6586 *      range [0,10]
 6587 */
 6588static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
 6589			   bool is_jmp32)
 6590{
 6591	if (__is_pointer_value(false, reg)) {
 6592		if (!reg_type_not_null(reg->type))
 6593			return -1;
 6594
 6595		/* If pointer is valid tests against zero will fail so we can
 6596		 * use this to direct branch taken.
 6597		 */
 6598		if (val != 0)
 6599			return -1;
 6600
 6601		switch (opcode) {
 6602		case BPF_JEQ:
 6603			return 0;
 6604		case BPF_JNE:
 6605			return 1;
 6606		default:
 6607			return -1;
 6608		}
 6609	}
 6610
 6611	if (is_jmp32)
 6612		return is_branch32_taken(reg, val, opcode);
 6613	return is_branch64_taken(reg, val, opcode);
 6614}
 6615
 6616/* Adjusts the register min/max values in the case that the dst_reg is the
 6617 * variable register that we are working on, and src_reg is a constant or we're
 6618 * simply doing a BPF_K check.
 6619 * In JEQ/JNE cases we also adjust the var_off values.
 6620 */
 6621static void reg_set_min_max(struct bpf_reg_state *true_reg,
 6622			    struct bpf_reg_state *false_reg,
 6623			    u64 val, u32 val32,
 6624			    u8 opcode, bool is_jmp32)
 6625{
 6626	struct tnum false_32off = tnum_subreg(false_reg->var_off);
 6627	struct tnum false_64off = false_reg->var_off;
 6628	struct tnum true_32off = tnum_subreg(true_reg->var_off);
 6629	struct tnum true_64off = true_reg->var_off;
 6630	s64 sval = (s64)val;
 6631	s32 sval32 = (s32)val32;
 6632
 6633	/* If the dst_reg is a pointer, we can't learn anything about its
 6634	 * variable offset from the compare (unless src_reg were a pointer into
 6635	 * the same object, but we don't bother with that.
 6636	 * Since false_reg and true_reg have the same type by construction, we
 6637	 * only need to check one of them for pointerness.
 6638	 */
 6639	if (__is_pointer_value(false, false_reg))
 6640		return;
 6641
 6642	switch (opcode) {
 6643	case BPF_JEQ:
 6644	case BPF_JNE:
 6645	{
 6646		struct bpf_reg_state *reg =
 6647			opcode == BPF_JEQ ? true_reg : false_reg;
 6648
 6649		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
 6650		 * if it is true we know the value for sure. Likewise for
 6651		 * BPF_JNE.
 6652		 */
 6653		if (is_jmp32)
 6654			__mark_reg32_known(reg, val32);
 6655		else
 6656			__mark_reg_known(reg, val);
 6657		break;
 6658	}
 6659	case BPF_JSET:
 6660		if (is_jmp32) {
 6661			false_32off = tnum_and(false_32off, tnum_const(~val32));
 6662			if (is_power_of_2(val32))
 6663				true_32off = tnum_or(true_32off,
 6664						     tnum_const(val32));
 6665		} else {
 6666			false_64off = tnum_and(false_64off, tnum_const(~val));
 6667			if (is_power_of_2(val))
 6668				true_64off = tnum_or(true_64off,
 6669						     tnum_const(val));
 6670		}
 6671		break;
 6672	case BPF_JGE:
 6673	case BPF_JGT:
 6674	{
 6675		if (is_jmp32) {
 6676			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
 6677			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
 6678
 6679			false_reg->u32_max_value = min(false_reg->u32_max_value,
 6680						       false_umax);
 6681			true_reg->u32_min_value = max(true_reg->u32_min_value,
 6682						      true_umin);
 6683		} else {
 6684			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
 6685			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
 6686
 6687			false_reg->umax_value = min(false_reg->umax_value, false_umax);
 6688			true_reg->umin_value = max(true_reg->umin_value, true_umin);
 6689		}
 6690		break;
 6691	}
 6692	case BPF_JSGE:
 6693	case BPF_JSGT:
 6694	{
 6695		if (is_jmp32) {
 6696			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
 6697			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
 6698
 6699			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
 6700			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
 6701		} else {
 6702			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
 6703			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
 6704
 6705			false_reg->smax_value = min(false_reg->smax_value, false_smax);
 6706			true_reg->smin_value = max(true_reg->smin_value, true_smin);
 6707		}
 6708		break;
 6709	}
 6710	case BPF_JLE:
 6711	case BPF_JLT:
 6712	{
 6713		if (is_jmp32) {
 6714			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
 6715			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
 6716
 6717			false_reg->u32_min_value = max(false_reg->u32_min_value,
 6718						       false_umin);
 6719			true_reg->u32_max_value = min(true_reg->u32_max_value,
 6720						      true_umax);
 6721		} else {
 6722			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
 6723			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
 6724
 6725			false_reg->umin_value = max(false_reg->umin_value, false_umin);
 6726			true_reg->umax_value = min(true_reg->umax_value, true_umax);
 6727		}
 6728		break;
 6729	}
 6730	case BPF_JSLE:
 6731	case BPF_JSLT:
 6732	{
 6733		if (is_jmp32) {
 6734			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
 6735			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
 6736
 6737			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
 6738			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
 6739		} else {
 6740			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
 6741			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
 6742
 6743			false_reg->smin_value = max(false_reg->smin_value, false_smin);
 6744			true_reg->smax_value = min(true_reg->smax_value, true_smax);
 6745		}
 6746		break;
 6747	}
 6748	default:
 6749		return;
 6750	}
 6751
 6752	if (is_jmp32) {
 6753		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
 6754					     tnum_subreg(false_32off));
 6755		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
 6756					    tnum_subreg(true_32off));
 6757		__reg_combine_32_into_64(false_reg);
 6758		__reg_combine_32_into_64(true_reg);
 6759	} else {
 6760		false_reg->var_off = false_64off;
 6761		true_reg->var_off = true_64off;
 6762		__reg_combine_64_into_32(false_reg);
 6763		__reg_combine_64_into_32(true_reg);
 6764	}
 6765}
 6766
 6767/* Same as above, but for the case that dst_reg holds a constant and src_reg is
 6768 * the variable reg.
 6769 */
 6770static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
 6771				struct bpf_reg_state *false_reg,
 6772				u64 val, u32 val32,
 6773				u8 opcode, bool is_jmp32)
 6774{
 6775	/* How can we transform "a <op> b" into "b <op> a"? */
 6776	static const u8 opcode_flip[16] = {
 6777		/* these stay the same */
 6778		[BPF_JEQ  >> 4] = BPF_JEQ,
 6779		[BPF_JNE  >> 4] = BPF_JNE,
 6780		[BPF_JSET >> 4] = BPF_JSET,
 6781		/* these swap "lesser" and "greater" (L and G in the opcodes) */
 6782		[BPF_JGE  >> 4] = BPF_JLE,
 6783		[BPF_JGT  >> 4] = BPF_JLT,
 6784		[BPF_JLE  >> 4] = BPF_JGE,
 6785		[BPF_JLT  >> 4] = BPF_JGT,
 6786		[BPF_JSGE >> 4] = BPF_JSLE,
 6787		[BPF_JSGT >> 4] = BPF_JSLT,
 6788		[BPF_JSLE >> 4] = BPF_JSGE,
 6789		[BPF_JSLT >> 4] = BPF_JSGT
 6790	};
 6791	opcode = opcode_flip[opcode >> 4];
 6792	/* This uses zero as "not present in table"; luckily the zero opcode,
 6793	 * BPF_JA, can't get here.
 6794	 */
 6795	if (opcode)
 6796		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
 6797}
 6798
 6799/* Regs are known to be equal, so intersect their min/max/var_off */
 6800static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
 6801				  struct bpf_reg_state *dst_reg)
 6802{
 6803	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
 6804							dst_reg->umin_value);
 6805	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
 6806							dst_reg->umax_value);
 6807	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
 6808							dst_reg->smin_value);
 6809	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
 6810							dst_reg->smax_value);
 6811	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
 6812							     dst_reg->var_off);
 6813	/* We might have learned new bounds from the var_off. */
 6814	__update_reg_bounds(src_reg);
 6815	__update_reg_bounds(dst_reg);
 6816	/* We might have learned something about the sign bit. */
 6817	__reg_deduce_bounds(src_reg);
 6818	__reg_deduce_bounds(dst_reg);
 6819	/* We might have learned some bits from the bounds. */
 6820	__reg_bound_offset(src_reg);
 6821	__reg_bound_offset(dst_reg);
 6822	/* Intersecting with the old var_off might have improved our bounds
 6823	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
 6824	 * then new var_off is (0; 0x7f...fc) which improves our umax.
 6825	 */
 6826	__update_reg_bounds(src_reg);
 6827	__update_reg_bounds(dst_reg);
 6828}
 6829
 6830static void reg_combine_min_max(struct bpf_reg_state *true_src,
 6831				struct bpf_reg_state *true_dst,
 6832				struct bpf_reg_state *false_src,
 6833				struct bpf_reg_state *false_dst,
 6834				u8 opcode)
 6835{
 6836	switch (opcode) {
 6837	case BPF_JEQ:
 6838		__reg_combine_min_max(true_src, true_dst);
 6839		break;
 6840	case BPF_JNE:
 6841		__reg_combine_min_max(false_src, false_dst);
 6842		break;
 6843	}
 6844}
 6845
 6846static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 6847				 struct bpf_reg_state *reg, u32 id,
 6848				 bool is_null)
 6849{
 6850	if (reg_type_may_be_null(reg->type) && reg->id == id) {
 6851		/* Old offset (both fixed and variable parts) should
 6852		 * have been known-zero, because we don't allow pointer
 6853		 * arithmetic on pointers that might be NULL.
 6854		 */
 6855		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
 6856				 !tnum_equals_const(reg->var_off, 0) ||
 6857				 reg->off)) {
 6858			__mark_reg_known_zero(reg);
 6859			reg->off = 0;
 6860		}
 6861		if (is_null) {
 6862			reg->type = SCALAR_VALUE;
 6863		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
 6864			const struct bpf_map *map = reg->map_ptr;
 6865
 6866			if (map->inner_map_meta) {
 6867				reg->type = CONST_PTR_TO_MAP;
 6868				reg->map_ptr = map->inner_map_meta;
 6869			} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 6870				reg->type = PTR_TO_XDP_SOCK;
 6871			} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
 6872				   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
 6873				reg->type = PTR_TO_SOCKET;
 6874			} else {
 6875				reg->type = PTR_TO_MAP_VALUE;
 6876			}
 6877		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
 6878			reg->type = PTR_TO_SOCKET;
 6879		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
 6880			reg->type = PTR_TO_SOCK_COMMON;
 6881		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
 6882			reg->type = PTR_TO_TCP_SOCK;
 6883		} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
 6884			reg->type = PTR_TO_BTF_ID;
 6885		} else if (reg->type == PTR_TO_MEM_OR_NULL) {
 6886			reg->type = PTR_TO_MEM;
 6887		} else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
 6888			reg->type = PTR_TO_RDONLY_BUF;
 6889		} else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
 6890			reg->type = PTR_TO_RDWR_BUF;
 6891		}
 6892		if (is_null) {
 6893			/* We don't need id and ref_obj_id from this point
 6894			 * onwards anymore, thus we should better reset it,
 6895			 * so that state pruning has chances to take effect.
 6896			 */
 6897			reg->id = 0;
 6898			reg->ref_obj_id = 0;
 6899		} else if (!reg_may_point_to_spin_lock(reg)) {
 6900			/* For not-NULL ptr, reg->ref_obj_id will be reset
 6901			 * in release_reg_references().
 6902			 *
 6903			 * reg->id is still used by spin_lock ptr. Other
 6904			 * than spin_lock ptr type, reg->id can be reset.
 6905			 */
 6906			reg->id = 0;
 6907		}
 6908	}
 6909}
 6910
 6911static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
 6912				    bool is_null)
 6913{
 6914	struct bpf_reg_state *reg;
 6915	int i;
 6916
 6917	for (i = 0; i < MAX_BPF_REG; i++)
 6918		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
 6919
 6920	bpf_for_each_spilled_reg(i, state, reg) {
 6921		if (!reg)
 6922			continue;
 6923		mark_ptr_or_null_reg(state, reg, id, is_null);
 6924	}
 6925}
 6926
 6927/* The logic is similar to find_good_pkt_pointers(), both could eventually
 6928 * be folded together at some point.
 6929 */
 6930static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 6931				  bool is_null)
 6932{
 6933	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 6934	struct bpf_reg_state *regs = state->regs;
 6935	u32 ref_obj_id = regs[regno].ref_obj_id;
 6936	u32 id = regs[regno].id;
 6937	int i;
 6938
 6939	if (ref_obj_id && ref_obj_id == id && is_null)
 6940		/* regs[regno] is in the " == NULL" branch.
 6941		 * No one could have freed the reference state before
 6942		 * doing the NULL check.
 6943		 */
 6944		WARN_ON_ONCE(release_reference_state(state, id));
 6945
 6946	for (i = 0; i <= vstate->curframe; i++)
 6947		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
 6948}
 6949
 6950static bool try_match_pkt_pointers(const struct bpf_insn *insn,
 6951				   struct bpf_reg_state *dst_reg,
 6952				   struct bpf_reg_state *src_reg,
 6953				   struct bpf_verifier_state *this_branch,
 6954				   struct bpf_verifier_state *other_branch)
 6955{
 6956	if (BPF_SRC(insn->code) != BPF_X)
 6957		return false;
 6958
 6959	/* Pointers are always 64-bit. */
 6960	if (BPF_CLASS(insn->code) == BPF_JMP32)
 6961		return false;
 6962
 6963	switch (BPF_OP(insn->code)) {
 6964	case BPF_JGT:
 6965		if ((dst_reg->type == PTR_TO_PACKET &&
 6966		     src_reg->type == PTR_TO_PACKET_END) ||
 6967		    (dst_reg->type == PTR_TO_PACKET_META &&
 6968		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
 6969			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
 6970			find_good_pkt_pointers(this_branch, dst_reg,
 6971					       dst_reg->type, false);
 6972		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
 6973			    src_reg->type == PTR_TO_PACKET) ||
 6974			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
 6975			    src_reg->type == PTR_TO_PACKET_META)) {
 6976			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
 6977			find_good_pkt_pointers(other_branch, src_reg,
 6978					       src_reg->type, true);
 6979		} else {
 6980			return false;
 6981		}
 6982		break;
 6983	case BPF_JLT:
 6984		if ((dst_reg->type == PTR_TO_PACKET &&
 6985		     src_reg->type == PTR_TO_PACKET_END) ||
 6986		    (dst_reg->type == PTR_TO_PACKET_META &&
 6987		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
 6988			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
 6989			find_good_pkt_pointers(other_branch, dst_reg,
 6990					       dst_reg->type, true);
 6991		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
 6992			    src_reg->type == PTR_TO_PACKET) ||
 6993			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
 6994			    src_reg->type == PTR_TO_PACKET_META)) {
 6995			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
 6996			find_good_pkt_pointers(this_branch, src_reg,
 6997					       src_reg->type, false);
 6998		} else {
 6999			return false;
 7000		}
 7001		break;
 7002	case BPF_JGE:
 7003		if ((dst_reg->type == PTR_TO_PACKET &&
 7004		     src_reg->type == PTR_TO_PACKET_END) ||
 7005		    (dst_reg->type == PTR_TO_PACKET_META &&
 7006		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
 7007			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
 7008			find_good_pkt_pointers(this_branch, dst_reg,
 7009					       dst_reg->type, true);
 7010		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
 7011			    src_reg->type == PTR_TO_PACKET) ||
 7012			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
 7013			    src_reg->type == PTR_TO_PACKET_META)) {
 7014			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
 7015			find_good_pkt_pointers(other_branch, src_reg,
 7016					       src_reg->type, false);
 7017		} else {
 7018			return false;
 7019		}
 7020		break;
 7021	case BPF_JLE:
 7022		if ((dst_reg->type == PTR_TO_PACKET &&
 7023		     src_reg->type == PTR_TO_PACKET_END) ||
 7024		    (dst_reg->type == PTR_TO_PACKET_META &&
 7025		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
 7026			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
 7027			find_good_pkt_pointers(other_branch, dst_reg,
 7028					       dst_reg->type, false);
 7029		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
 7030			    src_reg->type == PTR_TO_PACKET) ||
 7031			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
 7032			    src_reg->type == PTR_TO_PACKET_META)) {
 7033			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
 7034			find_good_pkt_pointers(this_branch, src_reg,
 7035					       src_reg->type, true);
 7036		} else {
 7037			return false;
 7038		}
 7039		break;
 7040	default:
 7041		return false;
 7042	}
 7043
 7044	return true;
 7045}
 7046
 7047static int check_cond_jmp_op(struct bpf_verifier_env *env,
 7048			     struct bpf_insn *insn, int *insn_idx)
 7049{
 7050	struct bpf_verifier_state *this_branch = env->cur_state;
 7051	struct bpf_verifier_state *other_branch;
 7052	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
 7053	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
 7054	u8 opcode = BPF_OP(insn->code);
 7055	bool is_jmp32;
 7056	int pred = -1;
 7057	int err;
 7058
 7059	/* Only conditional jumps are expected to reach here. */
 7060	if (opcode == BPF_JA || opcode > BPF_JSLE) {
 7061		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
 7062		return -EINVAL;
 7063	}
 7064
 7065	if (BPF_SRC(insn->code) == BPF_X) {
 7066		if (insn->imm != 0) {
 7067			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
 7068			return -EINVAL;
 7069		}
 7070
 7071		/* check src1 operand */
 7072		err = check_reg_arg(env, insn->src_reg, SRC_OP);
 7073		if (err)
 7074			return err;
 7075
 7076		if (is_pointer_value(env, insn->src_reg)) {
 7077			verbose(env, "R%d pointer comparison prohibited\n",
 7078				insn->src_reg);
 7079			return -EACCES;
 7080		}
 7081		src_reg = &regs[insn->src_reg];
 7082	} else {
 7083		if (insn->src_reg != BPF_REG_0) {
 7084			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
 7085			return -EINVAL;
 7086		}
 7087	}
 7088
 7089	/* check src2 operand */
 7090	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 7091	if (err)
 7092		return err;
 7093
 7094	dst_reg = &regs[insn->dst_reg];
 7095	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
 7096
 7097	if (BPF_SRC(insn->code) == BPF_K) {
 7098		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
 7099	} else if (src_reg->type == SCALAR_VALUE &&
 7100		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
 7101		pred = is_branch_taken(dst_reg,
 7102				       tnum_subreg(src_reg->var_off).value,
 7103				       opcode,
 7104				       is_jmp32);
 7105	} else if (src_reg->type == SCALAR_VALUE &&
 7106		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
 7107		pred = is_branch_taken(dst_reg,
 7108				       src_reg->var_off.value,
 7109				       opcode,
 7110				       is_jmp32);
 7111	}
 7112
 7113	if (pred >= 0) {
 7114		/* If we get here with a dst_reg pointer type it is because
 7115		 * above is_branch_taken() special cased the 0 comparison.
 7116		 */
 7117		if (!__is_pointer_value(false, dst_reg))
 7118			err = mark_chain_precision(env, insn->dst_reg);
 7119		if (BPF_SRC(insn->code) == BPF_X && !err)
 7120			err = mark_chain_precision(env, insn->src_reg);
 7121		if (err)
 7122			return err;
 7123	}
 7124	if (pred == 1) {
 7125		/* only follow the goto, ignore fall-through */
 7126		*insn_idx += insn->off;
 7127		return 0;
 7128	} else if (pred == 0) {
 7129		/* only follow fall-through branch, since
 7130		 * that's where the program will go
 7131		 */
 7132		return 0;
 7133	}
 7134
 7135	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
 7136				  false);
 7137	if (!other_branch)
 7138		return -EFAULT;
 7139	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
 7140
 7141	/* detect if we are comparing against a constant value so we can adjust
 7142	 * our min/max values for our dst register.
 7143	 * this is only legit if both are scalars (or pointers to the same
 7144	 * object, I suppose, but we don't support that right now), because
 7145	 * otherwise the different base pointers mean the offsets aren't
 7146	 * comparable.
 7147	 */
 7148	if (BPF_SRC(insn->code) == BPF_X) {
 7149		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
 7150
 7151		if (dst_reg->type == SCALAR_VALUE &&
 7152		    src_reg->type == SCALAR_VALUE) {
 7153			if (tnum_is_const(src_reg->var_off) ||
 7154			    (is_jmp32 &&
 7155			     tnum_is_const(tnum_subreg(src_reg->var_off))))
 7156				reg_set_min_max(&other_branch_regs[insn->dst_reg],
 7157						dst_reg,
 7158						src_reg->var_off.value,
 7159						tnum_subreg(src_reg->var_off).value,
 7160						opcode, is_jmp32);
 7161			else if (tnum_is_const(dst_reg->var_off) ||
 7162				 (is_jmp32 &&
 7163				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
 7164				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
 7165						    src_reg,
 7166						    dst_reg->var_off.value,
 7167						    tnum_subreg(dst_reg->var_off).value,
 7168						    opcode, is_jmp32);
 7169			else if (!is_jmp32 &&
 7170				 (opcode == BPF_JEQ || opcode == BPF_JNE))
 7171				/* Comparing for equality, we can combine knowledge */
 7172				reg_combine_min_max(&other_branch_regs[insn->src_reg],
 7173						    &other_branch_regs[insn->dst_reg],
 7174						    src_reg, dst_reg, opcode);
 7175		}
 7176	} else if (dst_reg->type == SCALAR_VALUE) {
 7177		reg_set_min_max(&other_branch_regs[insn->dst_reg],
 7178					dst_reg, insn->imm, (u32)insn->imm,
 7179					opcode, is_jmp32);
 7180	}
 7181
 7182	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
 7183	 * NOTE: these optimizations below are related with pointer comparison
 7184	 *       which will never be JMP32.
 7185	 */
 7186	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
 7187	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
 7188	    reg_type_may_be_null(dst_reg->type)) {
 7189		/* Mark all identical registers in each branch as either
 7190		 * safe or unknown depending R == 0 or R != 0 conditional.
 7191		 */
 7192		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
 7193				      opcode == BPF_JNE);
 7194		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
 7195				      opcode == BPF_JEQ);
 7196	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
 7197					   this_branch, other_branch) &&
 7198		   is_pointer_value(env, insn->dst_reg)) {
 7199		verbose(env, "R%d pointer comparison prohibited\n",
 7200			insn->dst_reg);
 7201		return -EACCES;
 7202	}
 7203	if (env->log.level & BPF_LOG_LEVEL)
 7204		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
 7205	return 0;
 7206}
 7207
 7208/* verify BPF_LD_IMM64 instruction */
 7209static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
 7210{
 7211	struct bpf_insn_aux_data *aux = cur_aux(env);
 7212	struct bpf_reg_state *regs = cur_regs(env);
 7213	struct bpf_map *map;
 7214	int err;
 7215
 7216	if (BPF_SIZE(insn->code) != BPF_DW) {
 7217		verbose(env, "invalid BPF_LD_IMM insn\n");
 7218		return -EINVAL;
 7219	}
 7220	if (insn->off != 0) {
 7221		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
 7222		return -EINVAL;
 7223	}
 7224
 7225	err = check_reg_arg(env, insn->dst_reg, DST_OP);
 7226	if (err)
 7227		return err;
 7228
 7229	if (insn->src_reg == 0) {
 7230		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
 7231
 7232		regs[insn->dst_reg].type = SCALAR_VALUE;
 7233		__mark_reg_known(&regs[insn->dst_reg], imm);
 7234		return 0;
 7235	}
 7236
 7237	map = env->used_maps[aux->map_index];
 7238	mark_reg_known_zero(env, regs, insn->dst_reg);
 7239	regs[insn->dst_reg].map_ptr = map;
 7240
 7241	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
 7242		regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
 7243		regs[insn->dst_reg].off = aux->map_off;
 7244		if (map_value_has_spin_lock(map))
 7245			regs[insn->dst_reg].id = ++env->id_gen;
 7246	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
 7247		regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
 7248	} else {
 7249		verbose(env, "bpf verifier is misconfigured\n");
 7250		return -EINVAL;
 7251	}
 7252
 7253	return 0;
 7254}
 7255
 7256static bool may_access_skb(enum bpf_prog_type type)
 7257{
 7258	switch (type) {
 7259	case BPF_PROG_TYPE_SOCKET_FILTER:
 7260	case BPF_PROG_TYPE_SCHED_CLS:
 7261	case BPF_PROG_TYPE_SCHED_ACT:
 7262		return true;
 7263	default:
 7264		return false;
 7265	}
 7266}
 7267
 7268/* verify safety of LD_ABS|LD_IND instructions:
 7269 * - they can only appear in the programs where ctx == skb
 7270 * - since they are wrappers of function calls, they scratch R1-R5 registers,
 7271 *   preserve R6-R9, and store return value into R0
 7272 *
 7273 * Implicit input:
 7274 *   ctx == skb == R6 == CTX
 7275 *
 7276 * Explicit input:
 7277 *   SRC == any register
 7278 *   IMM == 32-bit immediate
 7279 *
 7280 * Output:
 7281 *   R0 - 8/16/32-bit skb data converted to cpu endianness
 7282 */
 7283static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
 7284{
 7285	struct bpf_reg_state *regs = cur_regs(env);
 7286	static const int ctx_reg = BPF_REG_6;
 7287	u8 mode = BPF_MODE(insn->code);
 7288	int i, err;
 7289
 7290	if (!may_access_skb(env->prog->type)) {
 7291		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
 7292		return -EINVAL;
 7293	}
 7294
 7295	if (!env->ops->gen_ld_abs) {
 7296		verbose(env, "bpf verifier is misconfigured\n");
 7297		return -EINVAL;
 7298	}
 7299
 7300	if (env->subprog_cnt > 1) {
 7301		/* when program has LD_ABS insn JITs and interpreter assume
 7302		 * that r1 == ctx == skb which is not the case for callees
 7303		 * that can have arbitrary arguments. It's problematic
 7304		 * for main prog as well since JITs would need to analyze
 7305		 * all functions in order to make proper register save/restore
 7306		 * decisions in the main prog. Hence disallow LD_ABS with calls
 7307		 */
 7308		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
 7309		return -EINVAL;
 7310	}
 7311
 7312	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
 7313	    BPF_SIZE(insn->code) == BPF_DW ||
 7314	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
 7315		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
 7316		return -EINVAL;
 7317	}
 7318
 7319	/* check whether implicit source operand (register R6) is readable */
 7320	err = check_reg_arg(env, ctx_reg, SRC_OP);
 7321	if (err)
 7322		return err;
 7323
 7324	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
 7325	 * gen_ld_abs() may terminate the program at runtime, leading to
 7326	 * reference leak.
 7327	 */
 7328	err = check_reference_leak(env);
 7329	if (err) {
 7330		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
 7331		return err;
 7332	}
 7333
 7334	if (env->cur_state->active_spin_lock) {
 7335		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
 7336		return -EINVAL;
 7337	}
 7338
 7339	if (regs[ctx_reg].type != PTR_TO_CTX) {
 7340		verbose(env,
 7341			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
 7342		return -EINVAL;
 7343	}
 7344
 7345	if (mode == BPF_IND) {
 7346		/* check explicit source operand */
 7347		err = check_reg_arg(env, insn->src_reg, SRC_OP);
 7348		if (err)
 7349			return err;
 7350	}
 7351
 7352	err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
 7353	if (err < 0)
 7354		return err;
 7355
 7356	/* reset caller saved regs to unreadable */
 7357	for (i = 0; i < CALLER_SAVED_REGS; i++) {
 7358		mark_reg_not_init(env, regs, caller_saved[i]);
 7359		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
 7360	}
 7361
 7362	/* mark destination R0 register as readable, since it contains
 7363	 * the value fetched from the packet.
 7364	 * Already marked as written above.
 7365	 */
 7366	mark_reg_unknown(env, regs, BPF_REG_0);
 7367	/* ld_abs load up to 32-bit skb data. */
 7368	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
 7369	return 0;
 7370}
 7371
 7372static int check_return_code(struct bpf_verifier_env *env)
 7373{
 7374	struct tnum enforce_attach_type_range = tnum_unknown;
 7375	const struct bpf_prog *prog = env->prog;
 7376	struct bpf_reg_state *reg;
 7377	struct tnum range = tnum_range(0, 1);
 7378	int err;
 7379
 7380	/* LSM and struct_ops func-ptr's return type could be "void" */
 7381	if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
 7382	     env->prog->type == BPF_PROG_TYPE_LSM) &&
 7383	    !prog->aux->attach_func_proto->type)
 7384		return 0;
 7385
 7386	/* eBPF calling convetion is such that R0 is used
 7387	 * to return the value from eBPF program.
 7388	 * Make sure that it's readable at this time
 7389	 * of bpf_exit, which means that program wrote
 7390	 * something into it earlier
 7391	 */
 7392	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
 7393	if (err)
 7394		return err;
 7395
 7396	if (is_pointer_value(env, BPF_REG_0)) {
 7397		verbose(env, "R0 leaks addr as return value\n");
 7398		return -EACCES;
 7399	}
 7400
 7401	switch (env->prog->type) {
 7402	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 7403		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
 7404		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
 7405		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
 7406		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
 7407		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
 7408		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
 7409			range = tnum_range(1, 1);
 7410		break;
 7411	case BPF_PROG_TYPE_CGROUP_SKB:
 7412		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
 7413			range = tnum_range(0, 3);
 7414			enforce_attach_type_range = tnum_range(2, 3);
 7415		}
 7416		break;
 7417	case BPF_PROG_TYPE_CGROUP_SOCK:
 7418	case BPF_PROG_TYPE_SOCK_OPS:
 7419	case BPF_PROG_TYPE_CGROUP_DEVICE:
 7420	case BPF_PROG_TYPE_CGROUP_SYSCTL:
 7421	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 7422		break;
 7423	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 7424		if (!env->prog->aux->attach_btf_id)
 7425			return 0;
 7426		range = tnum_const(0);
 7427		break;
 7428	case BPF_PROG_TYPE_TRACING:
 7429		switch (env->prog->expected_attach_type) {
 7430		case BPF_TRACE_FENTRY:
 7431		case BPF_TRACE_FEXIT:
 7432			range = tnum_const(0);
 7433			break;
 7434		case BPF_TRACE_RAW_TP:
 7435		case BPF_MODIFY_RETURN:
 7436			return 0;
 7437		case BPF_TRACE_ITER:
 7438			break;
 7439		default:
 7440			return -ENOTSUPP;
 7441		}
 7442		break;
 7443	case BPF_PROG_TYPE_SK_LOOKUP:
 7444		range = tnum_range(SK_DROP, SK_PASS);
 7445		break;
 7446	case BPF_PROG_TYPE_EXT:
 7447		/* freplace program can return anything as its return value
 7448		 * depends on the to-be-replaced kernel func or bpf program.
 7449		 */
 7450	default:
 7451		return 0;
 7452	}
 7453
 7454	reg = cur_regs(env) + BPF_REG_0;
 7455	if (reg->type != SCALAR_VALUE) {
 7456		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
 7457			reg_type_str[reg->type]);
 7458		return -EINVAL;
 7459	}
 7460
 7461	if (!tnum_in(range, reg->var_off)) {
 7462		char tn_buf[48];
 7463
 7464		verbose(env, "At program exit the register R0 ");
 7465		if (!tnum_is_unknown(reg->var_off)) {
 7466			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 7467			verbose(env, "has value %s", tn_buf);
 7468		} else {
 7469			verbose(env, "has unknown scalar value");
 7470		}
 7471		tnum_strn(tn_buf, sizeof(tn_buf), range);
 7472		verbose(env, " should have been in %s\n", tn_buf);
 7473		return -EINVAL;
 7474	}
 7475
 7476	if (!tnum_is_unknown(enforce_attach_type_range) &&
 7477	    tnum_in(enforce_attach_type_range, reg->var_off))
 7478		env->prog->enforce_expected_attach_type = 1;
 7479	return 0;
 7480}
 7481
 7482/* non-recursive DFS pseudo code
 7483 * 1  procedure DFS-iterative(G,v):
 7484 * 2      label v as discovered
 7485 * 3      let S be a stack
 7486 * 4      S.push(v)
 7487 * 5      while S is not empty
 7488 * 6            t <- S.pop()
 7489 * 7            if t is what we're looking for:
 7490 * 8                return t
 7491 * 9            for all edges e in G.adjacentEdges(t) do
 7492 * 10               if edge e is already labelled
 7493 * 11                   continue with the next edge
 7494 * 12               w <- G.adjacentVertex(t,e)
 7495 * 13               if vertex w is not discovered and not explored
 7496 * 14                   label e as tree-edge
 7497 * 15                   label w as discovered
 7498 * 16                   S.push(w)
 7499 * 17                   continue at 5
 7500 * 18               else if vertex w is discovered
 7501 * 19                   label e as back-edge
 7502 * 20               else
 7503 * 21                   // vertex w is explored
 7504 * 22                   label e as forward- or cross-edge
 7505 * 23           label t as explored
 7506 * 24           S.pop()
 7507 *
 7508 * convention:
 7509 * 0x10 - discovered
 7510 * 0x11 - discovered and fall-through edge labelled
 7511 * 0x12 - discovered and fall-through and branch edges labelled
 7512 * 0x20 - explored
 7513 */
 7514
 7515enum {
 7516	DISCOVERED = 0x10,
 7517	EXPLORED = 0x20,
 7518	FALLTHROUGH = 1,
 7519	BRANCH = 2,
 7520};
 7521
 7522static u32 state_htab_size(struct bpf_verifier_env *env)
 7523{
 7524	return env->prog->len;
 7525}
 7526
 7527static struct bpf_verifier_state_list **explored_state(
 7528					struct bpf_verifier_env *env,
 7529					int idx)
 7530{
 7531	struct bpf_verifier_state *cur = env->cur_state;
 7532	struct bpf_func_state *state = cur->frame[cur->curframe];
 7533
 7534	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
 7535}
 7536
 7537static void init_explored_state(struct bpf_verifier_env *env, int idx)
 7538{
 7539	env->insn_aux_data[idx].prune_point = true;
 7540}
 7541
 7542/* t, w, e - match pseudo-code above:
 7543 * t - index of current instruction
 7544 * w - next instruction
 7545 * e - edge
 7546 */
 7547static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
 7548		     bool loop_ok)
 7549{
 7550	int *insn_stack = env->cfg.insn_stack;
 7551	int *insn_state = env->cfg.insn_state;
 7552
 7553	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
 7554		return 0;
 7555
 7556	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
 7557		return 0;
 7558
 7559	if (w < 0 || w >= env->prog->len) {
 7560		verbose_linfo(env, t, "%d: ", t);
 7561		verbose(env, "jump out of range from insn %d to %d\n", t, w);
 7562		return -EINVAL;
 7563	}
 7564
 7565	if (e == BRANCH)
 7566		/* mark branch target for state pruning */
 7567		init_explored_state(env, w);
 7568
 7569	if (insn_state[w] == 0) {
 7570		/* tree-edge */
 7571		insn_state[t] = DISCOVERED | e;
 7572		insn_state[w] = DISCOVERED;
 7573		if (env->cfg.cur_stack >= env->prog->len)
 7574			return -E2BIG;
 7575		insn_stack[env->cfg.cur_stack++] = w;
 7576		return 1;
 7577	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
 7578		if (loop_ok && env->bpf_capable)
 7579			return 0;
 7580		verbose_linfo(env, t, "%d: ", t);
 7581		verbose_linfo(env, w, "%d: ", w);
 7582		verbose(env, "back-edge from insn %d to %d\n", t, w);
 7583		return -EINVAL;
 7584	} else if (insn_state[w] == EXPLORED) {
 7585		/* forward- or cross-edge */
 7586		insn_state[t] = DISCOVERED | e;
 7587	} else {
 7588		verbose(env, "insn state internal bug\n");
 7589		return -EFAULT;
 7590	}
 7591	return 0;
 7592}
 7593
 7594/* non-recursive depth-first-search to detect loops in BPF program
 7595 * loop == back-edge in directed graph
 7596 */
 7597static int check_cfg(struct bpf_verifier_env *env)
 7598{
 7599	struct bpf_insn *insns = env->prog->insnsi;
 7600	int insn_cnt = env->prog->len;
 7601	int *insn_stack, *insn_state;
 7602	int ret = 0;
 7603	int i, t;
 7604
 7605	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
 7606	if (!insn_state)
 7607		return -ENOMEM;
 7608
 7609	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
 7610	if (!insn_stack) {
 7611		kvfree(insn_state);
 7612		return -ENOMEM;
 7613	}
 7614
 7615	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
 7616	insn_stack[0] = 0; /* 0 is the first instruction */
 7617	env->cfg.cur_stack = 1;
 7618
 7619peek_stack:
 7620	if (env->cfg.cur_stack == 0)
 7621		goto check_state;
 7622	t = insn_stack[env->cfg.cur_stack - 1];
 7623
 7624	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
 7625	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
 7626		u8 opcode = BPF_OP(insns[t].code);
 7627
 7628		if (opcode == BPF_EXIT) {
 7629			goto mark_explored;
 7630		} else if (opcode == BPF_CALL) {
 7631			ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
 7632			if (ret == 1)
 7633				goto peek_stack;
 7634			else if (ret < 0)
 7635				goto err_free;
 7636			if (t + 1 < insn_cnt)
 7637				init_explored_state(env, t + 1);
 7638			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
 7639				init_explored_state(env, t);
 7640				ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
 7641						env, false);
 7642				if (ret == 1)
 7643					goto peek_stack;
 7644				else if (ret < 0)
 7645					goto err_free;
 7646			}
 7647		} else if (opcode == BPF_JA) {
 7648			if (BPF_SRC(insns[t].code) != BPF_K) {
 7649				ret = -EINVAL;
 7650				goto err_free;
 7651			}
 7652			/* unconditional jump with single edge */
 7653			ret = push_insn(t, t + insns[t].off + 1,
 7654					FALLTHROUGH, env, true);
 7655			if (ret == 1)
 7656				goto peek_stack;
 7657			else if (ret < 0)
 7658				goto err_free;
 7659			/* unconditional jmp is not a good pruning point,
 7660			 * but it's marked, since backtracking needs
 7661			 * to record jmp history in is_state_visited().
 7662			 */
 7663			init_explored_state(env, t + insns[t].off + 1);
 7664			/* tell verifier to check for equivalent states
 7665			 * after every call and jump
 7666			 */
 7667			if (t + 1 < insn_cnt)
 7668				init_explored_state(env, t + 1);
 7669		} else {
 7670			/* conditional jump with two edges */
 7671			init_explored_state(env, t);
 7672			ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
 7673			if (ret == 1)
 7674				goto peek_stack;
 7675			else if (ret < 0)
 7676				goto err_free;
 7677
 7678			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
 7679			if (ret == 1)
 7680				goto peek_stack;
 7681			else if (ret < 0)
 7682				goto err_free;
 7683		}
 7684	} else {
 7685		/* all other non-branch instructions with single
 7686		 * fall-through edge
 7687		 */
 7688		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
 7689		if (ret == 1)
 7690			goto peek_stack;
 7691		else if (ret < 0)
 7692			goto err_free;
 7693	}
 7694
 7695mark_explored:
 7696	insn_state[t] = EXPLORED;
 7697	if (env->cfg.cur_stack-- <= 0) {
 7698		verbose(env, "pop stack internal bug\n");
 7699		ret = -EFAULT;
 7700		goto err_free;
 7701	}
 7702	goto peek_stack;
 7703
 7704check_state:
 7705	for (i = 0; i < insn_cnt; i++) {
 7706		if (insn_state[i] != EXPLORED) {
 7707			verbose(env, "unreachable insn %d\n", i);
 7708			ret = -EINVAL;
 7709			goto err_free;
 7710		}
 7711	}
 7712	ret = 0; /* cfg looks good */
 7713
 7714err_free:
 7715	kvfree(insn_state);
 7716	kvfree(insn_stack);
 7717	env->cfg.insn_state = env->cfg.insn_stack = NULL;
 7718	return ret;
 7719}
 7720
 7721/* The minimum supported BTF func info size */
 7722#define MIN_BPF_FUNCINFO_SIZE	8
 7723#define MAX_FUNCINFO_REC_SIZE	252
 7724
 7725static int check_btf_func(struct bpf_verifier_env *env,
 7726			  const union bpf_attr *attr,
 7727			  union bpf_attr __user *uattr)
 7728{
 7729	u32 i, nfuncs, urec_size, min_size;
 7730	u32 krec_size = sizeof(struct bpf_func_info);
 7731	struct bpf_func_info *krecord;
 7732	struct bpf_func_info_aux *info_aux = NULL;
 7733	const struct btf_type *type;
 7734	struct bpf_prog *prog;
 7735	const struct btf *btf;
 7736	void __user *urecord;
 7737	u32 prev_offset = 0;
 7738	int ret = -ENOMEM;
 7739
 7740	nfuncs = attr->func_info_cnt;
 7741	if (!nfuncs)
 7742		return 0;
 7743
 7744	if (nfuncs != env->subprog_cnt) {
 7745		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
 7746		return -EINVAL;
 7747	}
 7748
 7749	urec_size = attr->func_info_rec_size;
 7750	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
 7751	    urec_size > MAX_FUNCINFO_REC_SIZE ||
 7752	    urec_size % sizeof(u32)) {
 7753		verbose(env, "invalid func info rec size %u\n", urec_size);
 7754		return -EINVAL;
 7755	}
 7756
 7757	prog = env->prog;
 7758	btf = prog->aux->btf;
 7759
 7760	urecord = u64_to_user_ptr(attr->func_info);
 7761	min_size = min_t(u32, krec_size, urec_size);
 7762
 7763	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
 7764	if (!krecord)
 7765		return -ENOMEM;
 7766	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
 7767	if (!info_aux)
 7768		goto err_free;
 7769
 7770	for (i = 0; i < nfuncs; i++) {
 7771		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
 7772		if (ret) {
 7773			if (ret == -E2BIG) {
 7774				verbose(env, "nonzero tailing record in func info");
 7775				/* set the size kernel expects so loader can zero
 7776				 * out the rest of the record.
 7777				 */
 7778				if (put_user(min_size, &uattr->func_info_rec_size))
 7779					ret = -EFAULT;
 7780			}
 7781			goto err_free;
 7782		}
 7783
 7784		if (copy_from_user(&krecord[i], urecord, min_size)) {
 7785			ret = -EFAULT;
 7786			goto err_free;
 7787		}
 7788
 7789		/* check insn_off */
 7790		if (i == 0) {
 7791			if (krecord[i].insn_off) {
 7792				verbose(env,
 7793					"nonzero insn_off %u for the first func info record",
 7794					krecord[i].insn_off);
 7795				ret = -EINVAL;
 7796				goto err_free;
 7797			}
 7798		} else if (krecord[i].insn_off <= prev_offset) {
 7799			verbose(env,
 7800				"same or smaller insn offset (%u) than previous func info record (%u)",
 7801				krecord[i].insn_off, prev_offset);
 7802			ret = -EINVAL;
 7803			goto err_free;
 7804		}
 7805
 7806		if (env->subprog_info[i].start != krecord[i].insn_off) {
 7807			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
 7808			ret = -EINVAL;
 7809			goto err_free;
 7810		}
 7811
 7812		/* check type_id */
 7813		type = btf_type_by_id(btf, krecord[i].type_id);
 7814		if (!type || !btf_type_is_func(type)) {
 7815			verbose(env, "invalid type id %d in func info",
 7816				krecord[i].type_id);
 7817			ret = -EINVAL;
 7818			goto err_free;
 7819		}
 7820		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
 7821		prev_offset = krecord[i].insn_off;
 7822		urecord += urec_size;
 7823	}
 7824
 7825	prog->aux->func_info = krecord;
 7826	prog->aux->func_info_cnt = nfuncs;
 7827	prog->aux->func_info_aux = info_aux;
 7828	return 0;
 7829
 7830err_free:
 7831	kvfree(krecord);
 7832	kfree(info_aux);
 7833	return ret;
 7834}
 7835
 7836static void adjust_btf_func(struct bpf_verifier_env *env)
 7837{
 7838	struct bpf_prog_aux *aux = env->prog->aux;
 7839	int i;
 7840
 7841	if (!aux->func_info)
 7842		return;
 7843
 7844	for (i = 0; i < env->subprog_cnt; i++)
 7845		aux->func_info[i].insn_off = env->subprog_info[i].start;
 7846}
 7847
 7848#define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
 7849		sizeof(((struct bpf_line_info *)(0))->line_col))
 7850#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
 7851
 7852static int check_btf_line(struct bpf_verifier_env *env,
 7853			  const union bpf_attr *attr,
 7854			  union bpf_attr __user *uattr)
 7855{
 7856	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
 7857	struct bpf_subprog_info *sub;
 7858	struct bpf_line_info *linfo;
 7859	struct bpf_prog *prog;
 7860	const struct btf *btf;
 7861	void __user *ulinfo;
 7862	int err;
 7863
 7864	nr_linfo = attr->line_info_cnt;
 7865	if (!nr_linfo)
 7866		return 0;
 7867
 7868	rec_size = attr->line_info_rec_size;
 7869	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
 7870	    rec_size > MAX_LINEINFO_REC_SIZE ||
 7871	    rec_size & (sizeof(u32) - 1))
 7872		return -EINVAL;
 7873
 7874	/* Need to zero it in case the userspace may
 7875	 * pass in a smaller bpf_line_info object.
 7876	 */
 7877	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
 7878			 GFP_KERNEL | __GFP_NOWARN);
 7879	if (!linfo)
 7880		return -ENOMEM;
 7881
 7882	prog = env->prog;
 7883	btf = prog->aux->btf;
 7884
 7885	s = 0;
 7886	sub = env->subprog_info;
 7887	ulinfo = u64_to_user_ptr(attr->line_info);
 7888	expected_size = sizeof(struct bpf_line_info);
 7889	ncopy = min_t(u32, expected_size, rec_size);
 7890	for (i = 0; i < nr_linfo; i++) {
 7891		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
 7892		if (err) {
 7893			if (err == -E2BIG) {
 7894				verbose(env, "nonzero tailing record in line_info");
 7895				if (put_user(expected_size,
 7896					     &uattr->line_info_rec_size))
 7897					err = -EFAULT;
 7898			}
 7899			goto err_free;
 7900		}
 7901
 7902		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
 7903			err = -EFAULT;
 7904			goto err_free;
 7905		}
 7906
 7907		/*
 7908		 * Check insn_off to ensure
 7909		 * 1) strictly increasing AND
 7910		 * 2) bounded by prog->len
 7911		 *
 7912		 * The linfo[0].insn_off == 0 check logically falls into
 7913		 * the later "missing bpf_line_info for func..." case
 7914		 * because the first linfo[0].insn_off must be the
 7915		 * first sub also and the first sub must have
 7916		 * subprog_info[0].start == 0.
 7917		 */
 7918		if ((i && linfo[i].insn_off <= prev_offset) ||
 7919		    linfo[i].insn_off >= prog->len) {
 7920			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
 7921				i, linfo[i].insn_off, prev_offset,
 7922				prog->len);
 7923			err = -EINVAL;
 7924			goto err_free;
 7925		}
 7926
 7927		if (!prog->insnsi[linfo[i].insn_off].code) {
 7928			verbose(env,
 7929				"Invalid insn code at line_info[%u].insn_off\n",
 7930				i);
 7931			err = -EINVAL;
 7932			goto err_free;
 7933		}
 7934
 7935		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
 7936		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
 7937			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
 7938			err = -EINVAL;
 7939			goto err_free;
 7940		}
 7941
 7942		if (s != env->subprog_cnt) {
 7943			if (linfo[i].insn_off == sub[s].start) {
 7944				sub[s].linfo_idx = i;
 7945				s++;
 7946			} else if (sub[s].start < linfo[i].insn_off) {
 7947				verbose(env, "missing bpf_line_info for func#%u\n", s);
 7948				err = -EINVAL;
 7949				goto err_free;
 7950			}
 7951		}
 7952
 7953		prev_offset = linfo[i].insn_off;
 7954		ulinfo += rec_size;
 7955	}
 7956
 7957	if (s != env->subprog_cnt) {
 7958		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
 7959			env->subprog_cnt - s, s);
 7960		err = -EINVAL;
 7961		goto err_free;
 7962	}
 7963
 7964	prog->aux->linfo = linfo;
 7965	prog->aux->nr_linfo = nr_linfo;
 7966
 7967	return 0;
 7968
 7969err_free:
 7970	kvfree(linfo);
 7971	return err;
 7972}
 7973
 7974static int check_btf_info(struct bpf_verifier_env *env,
 7975			  const union bpf_attr *attr,
 7976			  union bpf_attr __user *uattr)
 7977{
 7978	struct btf *btf;
 7979	int err;
 7980
 7981	if (!attr->func_info_cnt && !attr->line_info_cnt)
 7982		return 0;
 7983
 7984	btf = btf_get_by_fd(attr->prog_btf_fd);
 7985	if (IS_ERR(btf))
 7986		return PTR_ERR(btf);
 7987	env->prog->aux->btf = btf;
 7988
 7989	err = check_btf_func(env, attr, uattr);
 7990	if (err)
 7991		return err;
 7992
 7993	err = check_btf_line(env, attr, uattr);
 7994	if (err)
 7995		return err;
 7996
 7997	return 0;
 7998}
 7999
 8000/* check %cur's range satisfies %old's */
 8001static bool range_within(struct bpf_reg_state *old,
 8002			 struct bpf_reg_state *cur)
 8003{
 8004	return old->umin_value <= cur->umin_value &&
 8005	       old->umax_value >= cur->umax_value &&
 8006	       old->smin_value <= cur->smin_value &&
 8007	       old->smax_value >= cur->smax_value;
 8008}
 8009
 8010/* Maximum number of register states that can exist at once */
 8011#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
 8012struct idpair {
 8013	u32 old;
 8014	u32 cur;
 8015};
 8016
 8017/* If in the old state two registers had the same id, then they need to have
 8018 * the same id in the new state as well.  But that id could be different from
 8019 * the old state, so we need to track the mapping from old to new ids.
 8020 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
 8021 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
 8022 * regs with a different old id could still have new id 9, we don't care about
 8023 * that.
 8024 * So we look through our idmap to see if this old id has been seen before.  If
 8025 * so, we require the new id to match; otherwise, we add the id pair to the map.
 8026 */
 8027static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
 8028{
 8029	unsigned int i;
 8030
 8031	for (i = 0; i < ID_MAP_SIZE; i++) {
 8032		if (!idmap[i].old) {
 8033			/* Reached an empty slot; haven't seen this id before */
 8034			idmap[i].old = old_id;
 8035			idmap[i].cur = cur_id;
 8036			return true;
 8037		}
 8038		if (idmap[i].old == old_id)
 8039			return idmap[i].cur == cur_id;
 8040	}
 8041	/* We ran out of idmap slots, which should be impossible */
 8042	WARN_ON_ONCE(1);
 8043	return false;
 8044}
 8045
 8046static void clean_func_state(struct bpf_verifier_env *env,
 8047			     struct bpf_func_state *st)
 8048{
 8049	enum bpf_reg_liveness live;
 8050	int i, j;
 8051
 8052	for (i = 0; i < BPF_REG_FP; i++) {
 8053		live = st->regs[i].live;
 8054		/* liveness must not touch this register anymore */
 8055		st->regs[i].live |= REG_LIVE_DONE;
 8056		if (!(live & REG_LIVE_READ))
 8057			/* since the register is unused, clear its state
 8058			 * to make further comparison simpler
 8059			 */
 8060			__mark_reg_not_init(env, &st->regs[i]);
 8061	}
 8062
 8063	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
 8064		live = st->stack[i].spilled_ptr.live;
 8065		/* liveness must not touch this stack slot anymore */
 8066		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
 8067		if (!(live & REG_LIVE_READ)) {
 8068			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
 8069			for (j = 0; j < BPF_REG_SIZE; j++)
 8070				st->stack[i].slot_type[j] = STACK_INVALID;
 8071		}
 8072	}
 8073}
 8074
 8075static void clean_verifier_state(struct bpf_verifier_env *env,
 8076				 struct bpf_verifier_state *st)
 8077{
 8078	int i;
 8079
 8080	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
 8081		/* all regs in this state in all frames were already marked */
 8082		return;
 8083
 8084	for (i = 0; i <= st->curframe; i++)
 8085		clean_func_state(env, st->frame[i]);
 8086}
 8087
 8088/* the parentage chains form a tree.
 8089 * the verifier states are added to state lists at given insn and
 8090 * pushed into state stack for future exploration.
 8091 * when the verifier reaches bpf_exit insn some of the verifer states
 8092 * stored in the state lists have their final liveness state already,
 8093 * but a lot of states will get revised from liveness point of view when
 8094 * the verifier explores other branches.
 8095 * Example:
 8096 * 1: r0 = 1
 8097 * 2: if r1 == 100 goto pc+1
 8098 * 3: r0 = 2
 8099 * 4: exit
 8100 * when the verifier reaches exit insn the register r0 in the state list of
 8101 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
 8102 * of insn 2 and goes exploring further. At the insn 4 it will walk the
 8103 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
 8104 *
 8105 * Since the verifier pushes the branch states as it sees them while exploring
 8106 * the program the condition of walking the branch instruction for the second
 8107 * time means that all states below this branch were already explored and
 8108 * their final liveness markes are already propagated.
 8109 * Hence when the verifier completes the search of state list in is_state_visited()
 8110 * we can call this clean_live_states() function to mark all liveness states
 8111 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
 8112 * will not be used.
 8113 * This function also clears the registers and stack for states that !READ
 8114 * to simplify state merging.
 8115 *
 8116 * Important note here that walking the same branch instruction in the callee
 8117 * doesn't meant that the states are DONE. The verifier has to compare
 8118 * the callsites
 8119 */
 8120static void clean_live_states(struct bpf_verifier_env *env, int insn,
 8121			      struct bpf_verifier_state *cur)
 8122{
 8123	struct bpf_verifier_state_list *sl;
 8124	int i;
 8125
 8126	sl = *explored_state(env, insn);
 8127	while (sl) {
 8128		if (sl->state.branches)
 8129			goto next;
 8130		if (sl->state.insn_idx != insn ||
 8131		    sl->state.curframe != cur->curframe)
 8132			goto next;
 8133		for (i = 0; i <= cur->curframe; i++)
 8134			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
 8135				goto next;
 8136		clean_verifier_state(env, &sl->state);
 8137next:
 8138		sl = sl->next;
 8139	}
 8140}
 8141
 8142/* Returns true if (rold safe implies rcur safe) */
 8143static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 8144		    struct idpair *idmap)
 8145{
 8146	bool equal;
 8147
 8148	if (!(rold->live & REG_LIVE_READ))
 8149		/* explored state didn't use this */
 8150		return true;
 8151
 8152	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
 8153
 8154	if (rold->type == PTR_TO_STACK)
 8155		/* two stack pointers are equal only if they're pointing to
 8156		 * the same stack frame, since fp-8 in foo != fp-8 in bar
 8157		 */
 8158		return equal && rold->frameno == rcur->frameno;
 8159
 8160	if (equal)
 8161		return true;
 8162
 8163	if (rold->type == NOT_INIT)
 8164		/* explored state can't have used this */
 8165		return true;
 8166	if (rcur->type == NOT_INIT)
 8167		return false;
 8168	switch (rold->type) {
 8169	case SCALAR_VALUE:
 8170		if (rcur->type == SCALAR_VALUE) {
 8171			if (!rold->precise && !rcur->precise)
 8172				return true;
 8173			/* new val must satisfy old val knowledge */
 8174			return range_within(rold, rcur) &&
 8175			       tnum_in(rold->var_off, rcur->var_off);
 8176		} else {
 8177			/* We're trying to use a pointer in place of a scalar.
 8178			 * Even if the scalar was unbounded, this could lead to
 8179			 * pointer leaks because scalars are allowed to leak
 8180			 * while pointers are not. We could make this safe in
 8181			 * special cases if root is calling us, but it's
 8182			 * probably not worth the hassle.
 8183			 */
 8184			return false;
 8185		}
 8186	case PTR_TO_MAP_VALUE:
 8187		/* If the new min/max/var_off satisfy the old ones and
 8188		 * everything else matches, we are OK.
 8189		 * 'id' is not compared, since it's only used for maps with
 8190		 * bpf_spin_lock inside map element and in such cases if
 8191		 * the rest of the prog is valid for one map element then
 8192		 * it's valid for all map elements regardless of the key
 8193		 * used in bpf_map_lookup()
 8194		 */
 8195		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
 8196		       range_within(rold, rcur) &&
 8197		       tnum_in(rold->var_off, rcur->var_off);
 8198	case PTR_TO_MAP_VALUE_OR_NULL:
 8199		/* a PTR_TO_MAP_VALUE could be safe to use as a
 8200		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
 8201		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
 8202		 * checked, doing so could have affected others with the same
 8203		 * id, and we can't check for that because we lost the id when
 8204		 * we converted to a PTR_TO_MAP_VALUE.
 8205		 */
 8206		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
 8207			return false;
 8208		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
 8209			return false;
 8210		/* Check our ids match any regs they're supposed to */
 8211		return check_ids(rold->id, rcur->id, idmap);
 8212	case PTR_TO_PACKET_META:
 8213	case PTR_TO_PACKET:
 8214		if (rcur->type != rold->type)
 8215			return false;
 8216		/* We must have at least as much range as the old ptr
 8217		 * did, so that any accesses which were safe before are
 8218		 * still safe.  This is true even if old range < old off,
 8219		 * since someone could have accessed through (ptr - k), or
 8220		 * even done ptr -= k in a register, to get a safe access.
 8221		 */
 8222		if (rold->range > rcur->range)
 8223			return false;
 8224		/* If the offsets don't match, we can't trust our alignment;
 8225		 * nor can we be sure that we won't fall out of range.
 8226		 */
 8227		if (rold->off != rcur->off)
 8228			return false;
 8229		/* id relations must be preserved */
 8230		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
 8231			return false;
 8232		/* new val must satisfy old val knowledge */
 8233		return range_within(rold, rcur) &&
 8234		       tnum_in(rold->var_off, rcur->var_off);
 8235	case PTR_TO_CTX:
 8236	case CONST_PTR_TO_MAP:
 8237	case PTR_TO_PACKET_END:
 8238	case PTR_TO_FLOW_KEYS:
 8239	case PTR_TO_SOCKET:
 8240	case PTR_TO_SOCKET_OR_NULL:
 8241	case PTR_TO_SOCK_COMMON:
 8242	case PTR_TO_SOCK_COMMON_OR_NULL:
 8243	case PTR_TO_TCP_SOCK:
 8244	case PTR_TO_TCP_SOCK_OR_NULL:
 8245	case PTR_TO_XDP_SOCK:
 8246		/* Only valid matches are exact, which memcmp() above
 8247		 * would have accepted
 8248		 */
 8249	default:
 8250		/* Don't know what's going on, just say it's not safe */
 8251		return false;
 8252	}
 8253
 8254	/* Shouldn't get here; if we do, say it's not safe */
 8255	WARN_ON_ONCE(1);
 8256	return false;
 8257}
 8258
 8259static bool stacksafe(struct bpf_func_state *old,
 8260		      struct bpf_func_state *cur,
 8261		      struct idpair *idmap)
 8262{
 8263	int i, spi;
 8264
 8265	/* walk slots of the explored stack and ignore any additional
 8266	 * slots in the current stack, since explored(safe) state
 8267	 * didn't use them
 8268	 */
 8269	for (i = 0; i < old->allocated_stack; i++) {
 8270		spi = i / BPF_REG_SIZE;
 8271
 8272		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
 8273			i += BPF_REG_SIZE - 1;
 8274			/* explored state didn't use this */
 8275			continue;
 8276		}
 8277
 8278		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
 8279			continue;
 8280
 8281		/* explored stack has more populated slots than current stack
 8282		 * and these slots were used
 8283		 */
 8284		if (i >= cur->allocated_stack)
 8285			return false;
 8286
 8287		/* if old state was safe with misc data in the stack
 8288		 * it will be safe with zero-initialized stack.
 8289		 * The opposite is not true
 8290		 */
 8291		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
 8292		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
 8293			continue;
 8294		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
 8295		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
 8296			/* Ex: old explored (safe) state has STACK_SPILL in
 8297			 * this stack slot, but current has STACK_MISC ->
 8298			 * this verifier states are not equivalent,
 8299			 * return false to continue verification of this path
 8300			 */
 8301			return false;
 8302		if (i % BPF_REG_SIZE)
 8303			continue;
 8304		if (old->stack[spi].slot_type[0] != STACK_SPILL)
 8305			continue;
 8306		if (!regsafe(&old->stack[spi].spilled_ptr,
 8307			     &cur->stack[spi].spilled_ptr,
 8308			     idmap))
 8309			/* when explored and current stack slot are both storing
 8310			 * spilled registers, check that stored pointers types
 8311			 * are the same as well.
 8312			 * Ex: explored safe path could have stored
 8313			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
 8314			 * but current path has stored:
 8315			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
 8316			 * such verifier states are not equivalent.
 8317			 * return false to continue verification of this path
 8318			 */
 8319			return false;
 8320	}
 8321	return true;
 8322}
 8323
 8324static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
 8325{
 8326	if (old->acquired_refs != cur->acquired_refs)
 8327		return false;
 8328	return !memcmp(old->refs, cur->refs,
 8329		       sizeof(*old->refs) * old->acquired_refs);
 8330}
 8331
 8332/* compare two verifier states
 8333 *
 8334 * all states stored in state_list are known to be valid, since
 8335 * verifier reached 'bpf_exit' instruction through them
 8336 *
 8337 * this function is called when verifier exploring different branches of
 8338 * execution popped from the state stack. If it sees an old state that has
 8339 * more strict register state and more strict stack state then this execution
 8340 * branch doesn't need to be explored further, since verifier already
 8341 * concluded that more strict state leads to valid finish.
 8342 *
 8343 * Therefore two states are equivalent if register state is more conservative
 8344 * and explored stack state is more conservative than the current one.
 8345 * Example:
 8346 *       explored                   current
 8347 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
 8348 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
 8349 *
 8350 * In other words if current stack state (one being explored) has more
 8351 * valid slots than old one that already passed validation, it means
 8352 * the verifier can stop exploring and conclude that current state is valid too
 8353 *
 8354 * Similarly with registers. If explored state has register type as invalid
 8355 * whereas register type in current state is meaningful, it means that
 8356 * the current state will reach 'bpf_exit' instruction safely
 8357 */
 8358static bool func_states_equal(struct bpf_func_state *old,
 8359			      struct bpf_func_state *cur)
 8360{
 8361	struct idpair *idmap;
 8362	bool ret = false;
 8363	int i;
 8364
 8365	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
 8366	/* If we failed to allocate the idmap, just say it's not safe */
 8367	if (!idmap)
 8368		return false;
 8369
 8370	for (i = 0; i < MAX_BPF_REG; i++) {
 8371		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
 8372			goto out_free;
 8373	}
 8374
 8375	if (!stacksafe(old, cur, idmap))
 8376		goto out_free;
 8377
 8378	if (!refsafe(old, cur))
 8379		goto out_free;
 8380	ret = true;
 8381out_free:
 8382	kfree(idmap);
 8383	return ret;
 8384}
 8385
 8386static bool states_equal(struct bpf_verifier_env *env,
 8387			 struct bpf_verifier_state *old,
 8388			 struct bpf_verifier_state *cur)
 8389{
 8390	int i;
 8391
 8392	if (old->curframe != cur->curframe)
 8393		return false;
 8394
 8395	/* Verification state from speculative execution simulation
 8396	 * must never prune a non-speculative execution one.
 8397	 */
 8398	if (old->speculative && !cur->speculative)
 8399		return false;
 8400
 8401	if (old->active_spin_lock != cur->active_spin_lock)
 8402		return false;
 8403
 8404	/* for states to be equal callsites have to be the same
 8405	 * and all frame states need to be equivalent
 8406	 */
 8407	for (i = 0; i <= old->curframe; i++) {
 8408		if (old->frame[i]->callsite != cur->frame[i]->callsite)
 8409			return false;
 8410		if (!func_states_equal(old->frame[i], cur->frame[i]))
 8411			return false;
 8412	}
 8413	return true;
 8414}
 8415
 8416/* Return 0 if no propagation happened. Return negative error code if error
 8417 * happened. Otherwise, return the propagated bit.
 8418 */
 8419static int propagate_liveness_reg(struct bpf_verifier_env *env,
 8420				  struct bpf_reg_state *reg,
 8421				  struct bpf_reg_state *parent_reg)
 8422{
 8423	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
 8424	u8 flag = reg->live & REG_LIVE_READ;
 8425	int err;
 8426
 8427	/* When comes here, read flags of PARENT_REG or REG could be any of
 8428	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
 8429	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
 8430	 */
 8431	if (parent_flag == REG_LIVE_READ64 ||
 8432	    /* Or if there is no read flag from REG. */
 8433	    !flag ||
 8434	    /* Or if the read flag from REG is the same as PARENT_REG. */
 8435	    parent_flag == flag)
 8436		return 0;
 8437
 8438	err = mark_reg_read(env, reg, parent_reg, flag);
 8439	if (err)
 8440		return err;
 8441
 8442	return flag;
 8443}
 8444
 8445/* A write screens off any subsequent reads; but write marks come from the
 8446 * straight-line code between a state and its parent.  When we arrive at an
 8447 * equivalent state (jump target or such) we didn't arrive by the straight-line
 8448 * code, so read marks in the state must propagate to the parent regardless
 8449 * of the state's write marks. That's what 'parent == state->parent' comparison
 8450 * in mark_reg_read() is for.
 8451 */
 8452static int propagate_liveness(struct bpf_verifier_env *env,
 8453			      const struct bpf_verifier_state *vstate,
 8454			      struct bpf_verifier_state *vparent)
 8455{
 8456	struct bpf_reg_state *state_reg, *parent_reg;
 8457	struct bpf_func_state *state, *parent;
 8458	int i, frame, err = 0;
 8459
 8460	if (vparent->curframe != vstate->curframe) {
 8461		WARN(1, "propagate_live: parent frame %d current frame %d\n",
 8462		     vparent->curframe, vstate->curframe);
 8463		return -EFAULT;
 8464	}
 8465	/* Propagate read liveness of registers... */
 8466	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
 8467	for (frame = 0; frame <= vstate->curframe; frame++) {
 8468		parent = vparent->frame[frame];
 8469		state = vstate->frame[frame];
 8470		parent_reg = parent->regs;
 8471		state_reg = state->regs;
 8472		/* We don't need to worry about FP liveness, it's read-only */
 8473		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
 8474			err = propagate_liveness_reg(env, &state_reg[i],
 8475						     &parent_reg[i]);
 8476			if (err < 0)
 8477				return err;
 8478			if (err == REG_LIVE_READ64)
 8479				mark_insn_zext(env, &parent_reg[i]);
 8480		}
 8481
 8482		/* Propagate stack slots. */
 8483		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
 8484			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
 8485			parent_reg = &parent->stack[i].spilled_ptr;
 8486			state_reg = &state->stack[i].spilled_ptr;
 8487			err = propagate_liveness_reg(env, state_reg,
 8488						     parent_reg);
 8489			if (err < 0)
 8490				return err;
 8491		}
 8492	}
 8493	return 0;
 8494}
 8495
 8496/* find precise scalars in the previous equivalent state and
 8497 * propagate them into the current state
 8498 */
 8499static int propagate_precision(struct bpf_verifier_env *env,
 8500			       const struct bpf_verifier_state *old)
 8501{
 8502	struct bpf_reg_state *state_reg;
 8503	struct bpf_func_state *state;
 8504	int i, err = 0;
 8505
 8506	state = old->frame[old->curframe];
 8507	state_reg = state->regs;
 8508	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
 8509		if (state_reg->type != SCALAR_VALUE ||
 8510		    !state_reg->precise)
 8511			continue;
 8512		if (env->log.level & BPF_LOG_LEVEL2)
 8513			verbose(env, "propagating r%d\n", i);
 8514		err = mark_chain_precision(env, i);
 8515		if (err < 0)
 8516			return err;
 8517	}
 8518
 8519	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 8520		if (state->stack[i].slot_type[0] != STACK_SPILL)
 8521			continue;
 8522		state_reg = &state->stack[i].spilled_ptr;
 8523		if (state_reg->type != SCALAR_VALUE ||
 8524		    !state_reg->precise)
 8525			continue;
 8526		if (env->log.level & BPF_LOG_LEVEL2)
 8527			verbose(env, "propagating fp%d\n",
 8528				(-i - 1) * BPF_REG_SIZE);
 8529		err = mark_chain_precision_stack(env, i);
 8530		if (err < 0)
 8531			return err;
 8532	}
 8533	return 0;
 8534}
 8535
 8536static bool states_maybe_looping(struct bpf_verifier_state *old,
 8537				 struct bpf_verifier_state *cur)
 8538{
 8539	struct bpf_func_state *fold, *fcur;
 8540	int i, fr = cur->curframe;
 8541
 8542	if (old->curframe != fr)
 8543		return false;
 8544
 8545	fold = old->frame[fr];
 8546	fcur = cur->frame[fr];
 8547	for (i = 0; i < MAX_BPF_REG; i++)
 8548		if (memcmp(&fold->regs[i], &fcur->regs[i],
 8549			   offsetof(struct bpf_reg_state, parent)))
 8550			return false;
 8551	return true;
 8552}
 8553
 8554
 8555static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 8556{
 8557	struct bpf_verifier_state_list *new_sl;
 8558	struct bpf_verifier_state_list *sl, **pprev;
 8559	struct bpf_verifier_state *cur = env->cur_state, *new;
 8560	int i, j, err, states_cnt = 0;
 8561	bool add_new_state = env->test_state_freq ? true : false;
 8562
 8563	cur->last_insn_idx = env->prev_insn_idx;
 8564	if (!env->insn_aux_data[insn_idx].prune_point)
 8565		/* this 'insn_idx' instruction wasn't marked, so we will not
 8566		 * be doing state search here
 8567		 */
 8568		return 0;
 8569
 8570	/* bpf progs typically have pruning point every 4 instructions
 8571	 * http://vger.kernel.org/bpfconf2019.html#session-1
 8572	 * Do not add new state for future pruning if the verifier hasn't seen
 8573	 * at least 2 jumps and at least 8 instructions.
 8574	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
 8575	 * In tests that amounts to up to 50% reduction into total verifier
 8576	 * memory consumption and 20% verifier time speedup.
 8577	 */
 8578	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
 8579	    env->insn_processed - env->prev_insn_processed >= 8)
 8580		add_new_state = true;
 8581
 8582	pprev = explored_state(env, insn_idx);
 8583	sl = *pprev;
 8584
 8585	clean_live_states(env, insn_idx, cur);
 8586
 8587	while (sl) {
 8588		states_cnt++;
 8589		if (sl->state.insn_idx != insn_idx)
 8590			goto next;
 8591		if (sl->state.branches) {
 8592			if (states_maybe_looping(&sl->state, cur) &&
 8593			    states_equal(env, &sl->state, cur)) {
 8594				verbose_linfo(env, insn_idx, "; ");
 8595				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
 8596				return -EINVAL;
 8597			}
 8598			/* if the verifier is processing a loop, avoid adding new state
 8599			 * too often, since different loop iterations have distinct
 8600			 * states and may not help future pruning.
 8601			 * This threshold shouldn't be too low to make sure that
 8602			 * a loop with large bound will be rejected quickly.
 8603			 * The most abusive loop will be:
 8604			 * r1 += 1
 8605			 * if r1 < 1000000 goto pc-2
 8606			 * 1M insn_procssed limit / 100 == 10k peak states.
 8607			 * This threshold shouldn't be too high either, since states
 8608			 * at the end of the loop are likely to be useful in pruning.
 8609			 */
 8610			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
 8611			    env->insn_processed - env->prev_insn_processed < 100)
 8612				add_new_state = false;
 8613			goto miss;
 8614		}
 8615		if (states_equal(env, &sl->state, cur)) {
 8616			sl->hit_cnt++;
 8617			/* reached equivalent register/stack state,
 8618			 * prune the search.
 8619			 * Registers read by the continuation are read by us.
 8620			 * If we have any write marks in env->cur_state, they
 8621			 * will prevent corresponding reads in the continuation
 8622			 * from reaching our parent (an explored_state).  Our
 8623			 * own state will get the read marks recorded, but
 8624			 * they'll be immediately forgotten as we're pruning
 8625			 * this state and will pop a new one.
 8626			 */
 8627			err = propagate_liveness(env, &sl->state, cur);
 8628
 8629			/* if previous state reached the exit with precision and
 8630			 * current state is equivalent to it (except precsion marks)
 8631			 * the precision needs to be propagated back in
 8632			 * the current state.
 8633			 */
 8634			err = err ? : push_jmp_history(env, cur);
 8635			err = err ? : propagate_precision(env, &sl->state);
 8636			if (err)
 8637				return err;
 8638			return 1;
 8639		}
 8640miss:
 8641		/* when new state is not going to be added do not increase miss count.
 8642		 * Otherwise several loop iterations will remove the state
 8643		 * recorded earlier. The goal of these heuristics is to have
 8644		 * states from some iterations of the loop (some in the beginning
 8645		 * and some at the end) to help pruning.
 8646		 */
 8647		if (add_new_state)
 8648			sl->miss_cnt++;
 8649		/* heuristic to determine whether this state is beneficial
 8650		 * to keep checking from state equivalence point of view.
 8651		 * Higher numbers increase max_states_per_insn and verification time,
 8652		 * but do not meaningfully decrease insn_processed.
 8653		 */
 8654		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
 8655			/* the state is unlikely to be useful. Remove it to
 8656			 * speed up verification
 8657			 */
 8658			*pprev = sl->next;
 8659			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
 8660				u32 br = sl->state.branches;
 8661
 8662				WARN_ONCE(br,
 8663					  "BUG live_done but branches_to_explore %d\n",
 8664					  br);
 8665				free_verifier_state(&sl->state, false);
 8666				kfree(sl);
 8667				env->peak_states--;
 8668			} else {
 8669				/* cannot free this state, since parentage chain may
 8670				 * walk it later. Add it for free_list instead to
 8671				 * be freed at the end of verification
 8672				 */
 8673				sl->next = env->free_list;
 8674				env->free_list = sl;
 8675			}
 8676			sl = *pprev;
 8677			continue;
 8678		}
 8679next:
 8680		pprev = &sl->next;
 8681		sl = *pprev;
 8682	}
 8683
 8684	if (env->max_states_per_insn < states_cnt)
 8685		env->max_states_per_insn = states_cnt;
 8686
 8687	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
 8688		return push_jmp_history(env, cur);
 8689
 8690	if (!add_new_state)
 8691		return push_jmp_history(env, cur);
 8692
 8693	/* There were no equivalent states, remember the current one.
 8694	 * Technically the current state is not proven to be safe yet,
 8695	 * but it will either reach outer most bpf_exit (which means it's safe)
 8696	 * or it will be rejected. When there are no loops the verifier won't be
 8697	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
 8698	 * again on the way to bpf_exit.
 8699	 * When looping the sl->state.branches will be > 0 and this state
 8700	 * will not be considered for equivalence until branches == 0.
 8701	 */
 8702	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
 8703	if (!new_sl)
 8704		return -ENOMEM;
 8705	env->total_states++;
 8706	env->peak_states++;
 8707	env->prev_jmps_processed = env->jmps_processed;
 8708	env->prev_insn_processed = env->insn_processed;
 8709
 8710	/* add new state to the head of linked list */
 8711	new = &new_sl->state;
 8712	err = copy_verifier_state(new, cur);
 8713	if (err) {
 8714		free_verifier_state(new, false);
 8715		kfree(new_sl);
 8716		return err;
 8717	}
 8718	new->insn_idx = insn_idx;
 8719	WARN_ONCE(new->branches != 1,
 8720		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
 8721
 8722	cur->parent = new;
 8723	cur->first_insn_idx = insn_idx;
 8724	clear_jmp_history(cur);
 8725	new_sl->next = *explored_state(env, insn_idx);
 8726	*explored_state(env, insn_idx) = new_sl;
 8727	/* connect new state to parentage chain. Current frame needs all
 8728	 * registers connected. Only r6 - r9 of the callers are alive (pushed
 8729	 * to the stack implicitly by JITs) so in callers' frames connect just
 8730	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
 8731	 * the state of the call instruction (with WRITTEN set), and r0 comes
 8732	 * from callee with its full parentage chain, anyway.
 8733	 */
 8734	/* clear write marks in current state: the writes we did are not writes
 8735	 * our child did, so they don't screen off its reads from us.
 8736	 * (There are no read marks in current state, because reads always mark
 8737	 * their parent and current state never has children yet.  Only
 8738	 * explored_states can get read marks.)
 8739	 */
 8740	for (j = 0; j <= cur->curframe; j++) {
 8741		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
 8742			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
 8743		for (i = 0; i < BPF_REG_FP; i++)
 8744			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
 8745	}
 8746
 8747	/* all stack frames are accessible from callee, clear them all */
 8748	for (j = 0; j <= cur->curframe; j++) {
 8749		struct bpf_func_state *frame = cur->frame[j];
 8750		struct bpf_func_state *newframe = new->frame[j];
 8751
 8752		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
 8753			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
 8754			frame->stack[i].spilled_ptr.parent =
 8755						&newframe->stack[i].spilled_ptr;
 8756		}
 8757	}
 8758	return 0;
 8759}
 8760
 8761/* Return true if it's OK to have the same insn return a different type. */
 8762static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 8763{
 8764	switch (type) {
 8765	case PTR_TO_CTX:
 8766	case PTR_TO_SOCKET:
 8767	case PTR_TO_SOCKET_OR_NULL:
 8768	case PTR_TO_SOCK_COMMON:
 8769	case PTR_TO_SOCK_COMMON_OR_NULL:
 8770	case PTR_TO_TCP_SOCK:
 8771	case PTR_TO_TCP_SOCK_OR_NULL:
 8772	case PTR_TO_XDP_SOCK:
 8773	case PTR_TO_BTF_ID:
 8774	case PTR_TO_BTF_ID_OR_NULL:
 8775		return false;
 8776	default:
 8777		return true;
 8778	}
 8779}
 8780
 8781/* If an instruction was previously used with particular pointer types, then we
 8782 * need to be careful to avoid cases such as the below, where it may be ok
 8783 * for one branch accessing the pointer, but not ok for the other branch:
 8784 *
 8785 * R1 = sock_ptr
 8786 * goto X;
 8787 * ...
 8788 * R1 = some_other_valid_ptr;
 8789 * goto X;
 8790 * ...
 8791 * R2 = *(u32 *)(R1 + 0);
 8792 */
 8793static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
 8794{
 8795	return src != prev && (!reg_type_mismatch_ok(src) ||
 8796			       !reg_type_mismatch_ok(prev));
 8797}
 8798
 8799static int do_check(struct bpf_verifier_env *env)
 8800{
 8801	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
 8802	struct bpf_verifier_state *state = env->cur_state;
 8803	struct bpf_insn *insns = env->prog->insnsi;
 8804	struct bpf_reg_state *regs;
 8805	int insn_cnt = env->prog->len;
 8806	bool do_print_state = false;
 8807	int prev_insn_idx = -1;
 8808
 8809	for (;;) {
 8810		struct bpf_insn *insn;
 8811		u8 class;
 8812		int err;
 8813
 8814		env->prev_insn_idx = prev_insn_idx;
 8815		if (env->insn_idx >= insn_cnt) {
 8816			verbose(env, "invalid insn idx %d insn_cnt %d\n",
 8817				env->insn_idx, insn_cnt);
 8818			return -EFAULT;
 8819		}
 8820
 8821		insn = &insns[env->insn_idx];
 8822		class = BPF_CLASS(insn->code);
 8823
 8824		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
 8825			verbose(env,
 8826				"BPF program is too large. Processed %d insn\n",
 8827				env->insn_processed);
 8828			return -E2BIG;
 8829		}
 8830
 8831		err = is_state_visited(env, env->insn_idx);
 8832		if (err < 0)
 8833			return err;
 8834		if (err == 1) {
 8835			/* found equivalent state, can prune the search */
 8836			if (env->log.level & BPF_LOG_LEVEL) {
 8837				if (do_print_state)
 8838					verbose(env, "\nfrom %d to %d%s: safe\n",
 8839						env->prev_insn_idx, env->insn_idx,
 8840						env->cur_state->speculative ?
 8841						" (speculative execution)" : "");
 8842				else
 8843					verbose(env, "%d: safe\n", env->insn_idx);
 8844			}
 8845			goto process_bpf_exit;
 8846		}
 8847
 8848		if (signal_pending(current))
 8849			return -EAGAIN;
 8850
 8851		if (need_resched())
 8852			cond_resched();
 8853
 8854		if (env->log.level & BPF_LOG_LEVEL2 ||
 8855		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
 8856			if (env->log.level & BPF_LOG_LEVEL2)
 8857				verbose(env, "%d:", env->insn_idx);
 8858			else
 8859				verbose(env, "\nfrom %d to %d%s:",
 8860					env->prev_insn_idx, env->insn_idx,
 8861					env->cur_state->speculative ?
 8862					" (speculative execution)" : "");
 8863			print_verifier_state(env, state->frame[state->curframe]);
 8864			do_print_state = false;
 8865		}
 8866
 8867		if (env->log.level & BPF_LOG_LEVEL) {
 8868			const struct bpf_insn_cbs cbs = {
 8869				.cb_print	= verbose,
 8870				.private_data	= env,
 8871			};
 8872
 8873			verbose_linfo(env, env->insn_idx, "; ");
 8874			verbose(env, "%d: ", env->insn_idx);
 8875			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
 8876		}
 8877
 8878		if (bpf_prog_is_dev_bound(env->prog->aux)) {
 8879			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
 8880							   env->prev_insn_idx);
 8881			if (err)
 8882				return err;
 8883		}
 8884
 8885		regs = cur_regs(env);
 8886		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
 8887		prev_insn_idx = env->insn_idx;
 8888
 8889		if (class == BPF_ALU || class == BPF_ALU64) {
 8890			err = check_alu_op(env, insn);
 8891			if (err)
 8892				return err;
 8893
 8894		} else if (class == BPF_LDX) {
 8895			enum bpf_reg_type *prev_src_type, src_reg_type;
 8896
 8897			/* check for reserved fields is already done */
 8898
 8899			/* check src operand */
 8900			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 8901			if (err)
 8902				return err;
 8903
 8904			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
 8905			if (err)
 8906				return err;
 8907
 8908			src_reg_type = regs[insn->src_reg].type;
 8909
 8910			/* check that memory (src_reg + off) is readable,
 8911			 * the state of dst_reg will be updated by this func
 8912			 */
 8913			err = check_mem_access(env, env->insn_idx, insn->src_reg,
 8914					       insn->off, BPF_SIZE(insn->code),
 8915					       BPF_READ, insn->dst_reg, false);
 8916			if (err)
 8917				return err;
 8918
 8919			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
 8920
 8921			if (*prev_src_type == NOT_INIT) {
 8922				/* saw a valid insn
 8923				 * dst_reg = *(u32 *)(src_reg + off)
 8924				 * save type to validate intersecting paths
 8925				 */
 8926				*prev_src_type = src_reg_type;
 8927
 8928			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
 8929				/* ABuser program is trying to use the same insn
 8930				 * dst_reg = *(u32*) (src_reg + off)
 8931				 * with different pointer types:
 8932				 * src_reg == ctx in one branch and
 8933				 * src_reg == stack|map in some other branch.
 8934				 * Reject it.
 8935				 */
 8936				verbose(env, "same insn cannot be used with different pointers\n");
 8937				return -EINVAL;
 8938			}
 8939
 8940		} else if (class == BPF_STX) {
 8941			enum bpf_reg_type *prev_dst_type, dst_reg_type;
 8942
 8943			if (BPF_MODE(insn->code) == BPF_XADD) {
 8944				err = check_xadd(env, env->insn_idx, insn);
 8945				if (err)
 8946					return err;
 8947				env->insn_idx++;
 8948				continue;
 8949			}
 8950
 8951			/* check src1 operand */
 8952			err = check_reg_arg(env, insn->src_reg, SRC_OP);
 8953			if (err)
 8954				return err;
 8955			/* check src2 operand */
 8956			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 8957			if (err)
 8958				return err;
 8959
 8960			dst_reg_type = regs[insn->dst_reg].type;
 8961
 8962			/* check that memory (dst_reg + off) is writeable */
 8963			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
 8964					       insn->off, BPF_SIZE(insn->code),
 8965					       BPF_WRITE, insn->src_reg, false);
 8966			if (err)
 8967				return err;
 8968
 8969			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
 8970
 8971			if (*prev_dst_type == NOT_INIT) {
 8972				*prev_dst_type = dst_reg_type;
 8973			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
 8974				verbose(env, "same insn cannot be used with different pointers\n");
 8975				return -EINVAL;
 8976			}
 8977
 8978		} else if (class == BPF_ST) {
 8979			if (BPF_MODE(insn->code) != BPF_MEM ||
 8980			    insn->src_reg != BPF_REG_0) {
 8981				verbose(env, "BPF_ST uses reserved fields\n");
 8982				return -EINVAL;
 8983			}
 8984			/* check src operand */
 8985			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
 8986			if (err)
 8987				return err;
 8988
 8989			if (is_ctx_reg(env, insn->dst_reg)) {
 8990				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
 8991					insn->dst_reg,
 8992					reg_type_str[reg_state(env, insn->dst_reg)->type]);
 8993				return -EACCES;
 8994			}
 8995
 8996			/* check that memory (dst_reg + off) is writeable */
 8997			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
 8998					       insn->off, BPF_SIZE(insn->code),
 8999					       BPF_WRITE, -1, false);
 9000			if (err)
 9001				return err;
 9002
 9003		} else if (class == BPF_JMP || class == BPF_JMP32) {
 9004			u8 opcode = BPF_OP(insn->code);
 9005
 9006			env->jmps_processed++;
 9007			if (opcode == BPF_CALL) {
 9008				if (BPF_SRC(insn->code) != BPF_K ||
 9009				    insn->off != 0 ||
 9010				    (insn->src_reg != BPF_REG_0 &&
 9011				     insn->src_reg != BPF_PSEUDO_CALL) ||
 9012				    insn->dst_reg != BPF_REG_0 ||
 9013				    class == BPF_JMP32) {
 9014					verbose(env, "BPF_CALL uses reserved fields\n");
 9015					return -EINVAL;
 9016				}
 9017
 9018				if (env->cur_state->active_spin_lock &&
 9019				    (insn->src_reg == BPF_PSEUDO_CALL ||
 9020				     insn->imm != BPF_FUNC_spin_unlock)) {
 9021					verbose(env, "function calls are not allowed while holding a lock\n");
 9022					return -EINVAL;
 9023				}
 9024				if (insn->src_reg == BPF_PSEUDO_CALL)
 9025					err = check_func_call(env, insn, &env->insn_idx);
 9026				else
 9027					err = check_helper_call(env, insn->imm, env->insn_idx);
 9028				if (err)
 9029					return err;
 9030
 9031			} else if (opcode == BPF_JA) {
 9032				if (BPF_SRC(insn->code) != BPF_K ||
 9033				    insn->imm != 0 ||
 9034				    insn->src_reg != BPF_REG_0 ||
 9035				    insn->dst_reg != BPF_REG_0 ||
 9036				    class == BPF_JMP32) {
 9037					verbose(env, "BPF_JA uses reserved fields\n");
 9038					return -EINVAL;
 9039				}
 9040
 9041				env->insn_idx += insn->off + 1;
 9042				continue;
 9043
 9044			} else if (opcode == BPF_EXIT) {
 9045				if (BPF_SRC(insn->code) != BPF_K ||
 9046				    insn->imm != 0 ||
 9047				    insn->src_reg != BPF_REG_0 ||
 9048				    insn->dst_reg != BPF_REG_0 ||
 9049				    class == BPF_JMP32) {
 9050					verbose(env, "BPF_EXIT uses reserved fields\n");
 9051					return -EINVAL;
 9052				}
 9053
 9054				if (env->cur_state->active_spin_lock) {
 9055					verbose(env, "bpf_spin_unlock is missing\n");
 9056					return -EINVAL;
 9057				}
 9058
 9059				if (state->curframe) {
 9060					/* exit from nested function */
 9061					err = prepare_func_exit(env, &env->insn_idx);
 9062					if (err)
 9063						return err;
 9064					do_print_state = true;
 9065					continue;
 9066				}
 9067
 9068				err = check_reference_leak(env);
 9069				if (err)
 9070					return err;
 9071
 9072				err = check_return_code(env);
 9073				if (err)
 9074					return err;
 9075process_bpf_exit:
 9076				update_branch_counts(env, env->cur_state);
 9077				err = pop_stack(env, &prev_insn_idx,
 9078						&env->insn_idx, pop_log);
 9079				if (err < 0) {
 9080					if (err != -ENOENT)
 9081						return err;
 9082					break;
 9083				} else {
 9084					do_print_state = true;
 9085					continue;
 9086				}
 9087			} else {
 9088				err = check_cond_jmp_op(env, insn, &env->insn_idx);
 9089				if (err)
 9090					return err;
 9091			}
 9092		} else if (class == BPF_LD) {
 9093			u8 mode = BPF_MODE(insn->code);
 9094
 9095			if (mode == BPF_ABS || mode == BPF_IND) {
 9096				err = check_ld_abs(env, insn);
 9097				if (err)
 9098					return err;
 9099
 9100			} else if (mode == BPF_IMM) {
 9101				err = check_ld_imm(env, insn);
 9102				if (err)
 9103					return err;
 9104
 9105				env->insn_idx++;
 9106				env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
 9107			} else {
 9108				verbose(env, "invalid BPF_LD mode\n");
 9109				return -EINVAL;
 9110			}
 9111		} else {
 9112			verbose(env, "unknown insn class %d\n", class);
 9113			return -EINVAL;
 9114		}
 9115
 9116		env->insn_idx++;
 9117	}
 9118
 9119	return 0;
 9120}
 9121
 9122static int check_map_prealloc(struct bpf_map *map)
 9123{
 9124	return (map->map_type != BPF_MAP_TYPE_HASH &&
 9125		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
 9126		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
 9127		!(map->map_flags & BPF_F_NO_PREALLOC);
 9128}
 9129
 9130static bool is_tracing_prog_type(enum bpf_prog_type type)
 9131{
 9132	switch (type) {
 9133	case BPF_PROG_TYPE_KPROBE:
 9134	case BPF_PROG_TYPE_TRACEPOINT:
 9135	case BPF_PROG_TYPE_PERF_EVENT:
 9136	case BPF_PROG_TYPE_RAW_TRACEPOINT:
 9137		return true;
 9138	default:
 9139		return false;
 9140	}
 9141}
 9142
 9143static bool is_preallocated_map(struct bpf_map *map)
 9144{
 9145	if (!check_map_prealloc(map))
 9146		return false;
 9147	if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
 9148		return false;
 9149	return true;
 9150}
 9151
 9152static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 9153					struct bpf_map *map,
 9154					struct bpf_prog *prog)
 9155
 9156{
 9157	/*
 9158	 * Validate that trace type programs use preallocated hash maps.
 9159	 *
 9160	 * For programs attached to PERF events this is mandatory as the
 9161	 * perf NMI can hit any arbitrary code sequence.
 9162	 *
 9163	 * All other trace types using preallocated hash maps are unsafe as
 9164	 * well because tracepoint or kprobes can be inside locked regions
 9165	 * of the memory allocator or at a place where a recursion into the
 9166	 * memory allocator would see inconsistent state.
 9167	 *
 9168	 * On RT enabled kernels run-time allocation of all trace type
 9169	 * programs is strictly prohibited due to lock type constraints. On
 9170	 * !RT kernels it is allowed for backwards compatibility reasons for
 9171	 * now, but warnings are emitted so developers are made aware of
 9172	 * the unsafety and can fix their programs before this is enforced.
 9173	 */
 9174	if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) {
 9175		if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
 9176			verbose(env, "perf_event programs can only use preallocated hash map\n");
 9177			return -EINVAL;
 9178		}
 9179		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
 9180			verbose(env, "trace type programs can only use preallocated hash map\n");
 9181			return -EINVAL;
 9182		}
 9183		WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
 9184		verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
 9185	}
 9186
 9187	if ((is_tracing_prog_type(prog->type) ||
 9188	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
 9189	    map_value_has_spin_lock(map)) {
 9190		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
 9191		return -EINVAL;
 9192	}
 9193
 9194	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
 9195	    !bpf_offload_prog_map_match(prog, map)) {
 9196		verbose(env, "offload device mismatch between prog and map\n");
 9197		return -EINVAL;
 9198	}
 9199
 9200	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
 9201		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
 9202		return -EINVAL;
 9203	}
 9204
 9205	return 0;
 9206}
 9207
 9208static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
 9209{
 9210	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
 9211		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
 9212}
 9213
 9214/* look for pseudo eBPF instructions that access map FDs and
 9215 * replace them with actual map pointers
 9216 */
 9217static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
 9218{
 9219	struct bpf_insn *insn = env->prog->insnsi;
 9220	int insn_cnt = env->prog->len;
 9221	int i, j, err;
 9222
 9223	err = bpf_prog_calc_tag(env->prog);
 9224	if (err)
 9225		return err;
 9226
 9227	for (i = 0; i < insn_cnt; i++, insn++) {
 9228		if (BPF_CLASS(insn->code) == BPF_LDX &&
 9229		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
 9230			verbose(env, "BPF_LDX uses reserved fields\n");
 9231			return -EINVAL;
 9232		}
 9233
 9234		if (BPF_CLASS(insn->code) == BPF_STX &&
 9235		    ((BPF_MODE(insn->code) != BPF_MEM &&
 9236		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
 9237			verbose(env, "BPF_STX uses reserved fields\n");
 9238			return -EINVAL;
 9239		}
 9240
 9241		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 9242			struct bpf_insn_aux_data *aux;
 9243			struct bpf_map *map;
 9244			struct fd f;
 9245			u64 addr;
 9246
 9247			if (i == insn_cnt - 1 || insn[1].code != 0 ||
 9248			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
 9249			    insn[1].off != 0) {
 9250				verbose(env, "invalid bpf_ld_imm64 insn\n");
 9251				return -EINVAL;
 9252			}
 9253
 9254			if (insn[0].src_reg == 0)
 9255				/* valid generic load 64-bit imm */
 9256				goto next_insn;
 9257
 9258			/* In final convert_pseudo_ld_imm64() step, this is
 9259			 * converted into regular 64-bit imm load insn.
 9260			 */
 9261			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
 9262			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
 9263			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
 9264			     insn[1].imm != 0)) {
 9265				verbose(env,
 9266					"unrecognized bpf_ld_imm64 insn\n");
 9267				return -EINVAL;
 9268			}
 9269
 9270			f = fdget(insn[0].imm);
 9271			map = __bpf_map_get(f);
 9272			if (IS_ERR(map)) {
 9273				verbose(env, "fd %d is not pointing to valid bpf_map\n",
 9274					insn[0].imm);
 9275				return PTR_ERR(map);
 9276			}
 9277
 9278			err = check_map_prog_compatibility(env, map, env->prog);
 9279			if (err) {
 9280				fdput(f);
 9281				return err;
 9282			}
 9283
 9284			aux = &env->insn_aux_data[i];
 9285			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
 9286				addr = (unsigned long)map;
 9287			} else {
 9288				u32 off = insn[1].imm;
 9289
 9290				if (off >= BPF_MAX_VAR_OFF) {
 9291					verbose(env, "direct value offset of %u is not allowed\n", off);
 9292					fdput(f);
 9293					return -EINVAL;
 9294				}
 9295
 9296				if (!map->ops->map_direct_value_addr) {
 9297					verbose(env, "no direct value access support for this map type\n");
 9298					fdput(f);
 9299					return -EINVAL;
 9300				}
 9301
 9302				err = map->ops->map_direct_value_addr(map, &addr, off);
 9303				if (err) {
 9304					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
 9305						map->value_size, off);
 9306					fdput(f);
 9307					return err;
 9308				}
 9309
 9310				aux->map_off = off;
 9311				addr += off;
 9312			}
 9313
 9314			insn[0].imm = (u32)addr;
 9315			insn[1].imm = addr >> 32;
 9316
 9317			/* check whether we recorded this map already */
 9318			for (j = 0; j < env->used_map_cnt; j++) {
 9319				if (env->used_maps[j] == map) {
 9320					aux->map_index = j;
 9321					fdput(f);
 9322					goto next_insn;
 9323				}
 9324			}
 9325
 9326			if (env->used_map_cnt >= MAX_USED_MAPS) {
 9327				fdput(f);
 9328				return -E2BIG;
 9329			}
 9330
 9331			/* hold the map. If the program is rejected by verifier,
 9332			 * the map will be released by release_maps() or it
 9333			 * will be used by the valid program until it's unloaded
 9334			 * and all maps are released in free_used_maps()
 9335			 */
 9336			bpf_map_inc(map);
 9337
 9338			aux->map_index = env->used_map_cnt;
 9339			env->used_maps[env->used_map_cnt++] = map;
 9340
 9341			if (bpf_map_is_cgroup_storage(map) &&
 9342			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
 9343				verbose(env, "only one cgroup storage of each type is allowed\n");
 9344				fdput(f);
 9345				return -EBUSY;
 9346			}
 9347
 9348			fdput(f);
 9349next_insn:
 9350			insn++;
 9351			i++;
 9352			continue;
 9353		}
 9354
 9355		/* Basic sanity check before we invest more work here. */
 9356		if (!bpf_opcode_in_insntable(insn->code)) {
 9357			verbose(env, "unknown opcode %02x\n", insn->code);
 9358			return -EINVAL;
 9359		}
 9360	}
 9361
 9362	/* now all pseudo BPF_LD_IMM64 instructions load valid
 9363	 * 'struct bpf_map *' into a register instead of user map_fd.
 9364	 * These pointers will be used later by verifier to validate map access.
 9365	 */
 9366	return 0;
 9367}
 9368
 9369/* drop refcnt of maps used by the rejected program */
 9370static void release_maps(struct bpf_verifier_env *env)
 9371{
 9372	__bpf_free_used_maps(env->prog->aux, env->used_maps,
 9373			     env->used_map_cnt);
 9374}
 9375
 9376/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
 9377static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
 9378{
 9379	struct bpf_insn *insn = env->prog->insnsi;
 9380	int insn_cnt = env->prog->len;
 9381	int i;
 9382
 9383	for (i = 0; i < insn_cnt; i++, insn++)
 9384		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
 9385			insn->src_reg = 0;
 9386}
 9387
 9388/* single env->prog->insni[off] instruction was replaced with the range
 9389 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
 9390 * [0, off) and [off, end) to new locations, so the patched range stays zero
 9391 */
 9392static int adjust_insn_aux_data(struct bpf_verifier_env *env,
 9393				struct bpf_prog *new_prog, u32 off, u32 cnt)
 9394{
 9395	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
 9396	struct bpf_insn *insn = new_prog->insnsi;
 9397	u32 prog_len;
 9398	int i;
 9399
 9400	/* aux info at OFF always needs adjustment, no matter fast path
 9401	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
 9402	 * original insn at old prog.
 9403	 */
 9404	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
 9405
 9406	if (cnt == 1)
 9407		return 0;
 9408	prog_len = new_prog->len;
 9409	new_data = vzalloc(array_size(prog_len,
 9410				      sizeof(struct bpf_insn_aux_data)));
 9411	if (!new_data)
 9412		return -ENOMEM;
 9413	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
 9414	memcpy(new_data + off + cnt - 1, old_data + off,
 9415	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
 9416	for (i = off; i < off + cnt - 1; i++) {
 9417		new_data[i].seen = env->pass_cnt;
 9418		new_data[i].zext_dst = insn_has_def32(env, insn + i);
 9419	}
 9420	env->insn_aux_data = new_data;
 9421	vfree(old_data);
 9422	return 0;
 9423}
 9424
 9425static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
 9426{
 9427	int i;
 9428
 9429	if (len == 1)
 9430		return;
 9431	/* NOTE: fake 'exit' subprog should be updated as well. */
 9432	for (i = 0; i <= env->subprog_cnt; i++) {
 9433		if (env->subprog_info[i].start <= off)
 9434			continue;
 9435		env->subprog_info[i].start += len - 1;
 9436	}
 9437}
 9438
 9439static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
 9440					    const struct bpf_insn *patch, u32 len)
 9441{
 9442	struct bpf_prog *new_prog;
 9443
 9444	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
 9445	if (IS_ERR(new_prog)) {
 9446		if (PTR_ERR(new_prog) == -ERANGE)
 9447			verbose(env,
 9448				"insn %d cannot be patched due to 16-bit range\n",
 9449				env->insn_aux_data[off].orig_idx);
 9450		return NULL;
 9451	}
 9452	if (adjust_insn_aux_data(env, new_prog, off, len))
 9453		return NULL;
 9454	adjust_subprog_starts(env, off, len);
 9455	return new_prog;
 9456}
 9457
 9458static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
 9459					      u32 off, u32 cnt)
 9460{
 9461	int i, j;
 9462
 9463	/* find first prog starting at or after off (first to remove) */
 9464	for (i = 0; i < env->subprog_cnt; i++)
 9465		if (env->subprog_info[i].start >= off)
 9466			break;
 9467	/* find first prog starting at or after off + cnt (first to stay) */
 9468	for (j = i; j < env->subprog_cnt; j++)
 9469		if (env->subprog_info[j].start >= off + cnt)
 9470			break;
 9471	/* if j doesn't start exactly at off + cnt, we are just removing
 9472	 * the front of previous prog
 9473	 */
 9474	if (env->subprog_info[j].start != off + cnt)
 9475		j--;
 9476
 9477	if (j > i) {
 9478		struct bpf_prog_aux *aux = env->prog->aux;
 9479		int move;
 9480
 9481		/* move fake 'exit' subprog as well */
 9482		move = env->subprog_cnt + 1 - j;
 9483
 9484		memmove(env->subprog_info + i,
 9485			env->subprog_info + j,
 9486			sizeof(*env->subprog_info) * move);
 9487		env->subprog_cnt -= j - i;
 9488
 9489		/* remove func_info */
 9490		if (aux->func_info) {
 9491			move = aux->func_info_cnt - j;
 9492
 9493			memmove(aux->func_info + i,
 9494				aux->func_info + j,
 9495				sizeof(*aux->func_info) * move);
 9496			aux->func_info_cnt -= j - i;
 9497			/* func_info->insn_off is set after all code rewrites,
 9498			 * in adjust_btf_func() - no need to adjust
 9499			 */
 9500		}
 9501	} else {
 9502		/* convert i from "first prog to remove" to "first to adjust" */
 9503		if (env->subprog_info[i].start == off)
 9504			i++;
 9505	}
 9506
 9507	/* update fake 'exit' subprog as well */
 9508	for (; i <= env->subprog_cnt; i++)
 9509		env->subprog_info[i].start -= cnt;
 9510
 9511	return 0;
 9512}
 9513
 9514static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
 9515				      u32 cnt)
 9516{
 9517	struct bpf_prog *prog = env->prog;
 9518	u32 i, l_off, l_cnt, nr_linfo;
 9519	struct bpf_line_info *linfo;
 9520
 9521	nr_linfo = prog->aux->nr_linfo;
 9522	if (!nr_linfo)
 9523		return 0;
 9524
 9525	linfo = prog->aux->linfo;
 9526
 9527	/* find first line info to remove, count lines to be removed */
 9528	for (i = 0; i < nr_linfo; i++)
 9529		if (linfo[i].insn_off >= off)
 9530			break;
 9531
 9532	l_off = i;
 9533	l_cnt = 0;
 9534	for (; i < nr_linfo; i++)
 9535		if (linfo[i].insn_off < off + cnt)
 9536			l_cnt++;
 9537		else
 9538			break;
 9539
 9540	/* First live insn doesn't match first live linfo, it needs to "inherit"
 9541	 * last removed linfo.  prog is already modified, so prog->len == off
 9542	 * means no live instructions after (tail of the program was removed).
 9543	 */
 9544	if (prog->len != off && l_cnt &&
 9545	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
 9546		l_cnt--;
 9547		linfo[--i].insn_off = off + cnt;
 9548	}
 9549
 9550	/* remove the line info which refer to the removed instructions */
 9551	if (l_cnt) {
 9552		memmove(linfo + l_off, linfo + i,
 9553			sizeof(*linfo) * (nr_linfo - i));
 9554
 9555		prog->aux->nr_linfo -= l_cnt;
 9556		nr_linfo = prog->aux->nr_linfo;
 9557	}
 9558
 9559	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
 9560	for (i = l_off; i < nr_linfo; i++)
 9561		linfo[i].insn_off -= cnt;
 9562
 9563	/* fix up all subprogs (incl. 'exit') which start >= off */
 9564	for (i = 0; i <= env->subprog_cnt; i++)
 9565		if (env->subprog_info[i].linfo_idx > l_off) {
 9566			/* program may have started in the removed region but
 9567			 * may not be fully removed
 9568			 */
 9569			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
 9570				env->subprog_info[i].linfo_idx -= l_cnt;
 9571			else
 9572				env->subprog_info[i].linfo_idx = l_off;
 9573		}
 9574
 9575	return 0;
 9576}
 9577
 9578static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
 9579{
 9580	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
 9581	unsigned int orig_prog_len = env->prog->len;
 9582	int err;
 9583
 9584	if (bpf_prog_is_dev_bound(env->prog->aux))
 9585		bpf_prog_offload_remove_insns(env, off, cnt);
 9586
 9587	err = bpf_remove_insns(env->prog, off, cnt);
 9588	if (err)
 9589		return err;
 9590
 9591	err = adjust_subprog_starts_after_remove(env, off, cnt);
 9592	if (err)
 9593		return err;
 9594
 9595	err = bpf_adj_linfo_after_remove(env, off, cnt);
 9596	if (err)
 9597		return err;
 9598
 9599	memmove(aux_data + off,	aux_data + off + cnt,
 9600		sizeof(*aux_data) * (orig_prog_len - off - cnt));
 9601
 9602	return 0;
 9603}
 9604
 9605/* The verifier does more data flow analysis than llvm and will not
 9606 * explore branches that are dead at run time. Malicious programs can
 9607 * have dead code too. Therefore replace all dead at-run-time code
 9608 * with 'ja -1'.
 9609 *
 9610 * Just nops are not optimal, e.g. if they would sit at the end of the
 9611 * program and through another bug we would manage to jump there, then
 9612 * we'd execute beyond program memory otherwise. Returning exception
 9613 * code also wouldn't work since we can have subprogs where the dead
 9614 * code could be located.
 9615 */
 9616static void sanitize_dead_code(struct bpf_verifier_env *env)
 9617{
 9618	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
 9619	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
 9620	struct bpf_insn *insn = env->prog->insnsi;
 9621	const int insn_cnt = env->prog->len;
 9622	int i;
 9623
 9624	for (i = 0; i < insn_cnt; i++) {
 9625		if (aux_data[i].seen)
 9626			continue;
 9627		memcpy(insn + i, &trap, sizeof(trap));
 9628	}
 9629}
 9630
 9631static bool insn_is_cond_jump(u8 code)
 9632{
 9633	u8 op;
 9634
 9635	if (BPF_CLASS(code) == BPF_JMP32)
 9636		return true;
 9637
 9638	if (BPF_CLASS(code) != BPF_JMP)
 9639		return false;
 9640
 9641	op = BPF_OP(code);
 9642	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
 9643}
 9644
 9645static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
 9646{
 9647	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
 9648	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
 9649	struct bpf_insn *insn = env->prog->insnsi;
 9650	const int insn_cnt = env->prog->len;
 9651	int i;
 9652
 9653	for (i = 0; i < insn_cnt; i++, insn++) {
 9654		if (!insn_is_cond_jump(insn->code))
 9655			continue;
 9656
 9657		if (!aux_data[i + 1].seen)
 9658			ja.off = insn->off;
 9659		else if (!aux_data[i + 1 + insn->off].seen)
 9660			ja.off = 0;
 9661		else
 9662			continue;
 9663
 9664		if (bpf_prog_is_dev_bound(env->prog->aux))
 9665			bpf_prog_offload_replace_insn(env, i, &ja);
 9666
 9667		memcpy(insn, &ja, sizeof(ja));
 9668	}
 9669}
 9670
 9671static int opt_remove_dead_code(struct bpf_verifier_env *env)
 9672{
 9673	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
 9674	int insn_cnt = env->prog->len;
 9675	int i, err;
 9676
 9677	for (i = 0; i < insn_cnt; i++) {
 9678		int j;
 9679
 9680		j = 0;
 9681		while (i + j < insn_cnt && !aux_data[i + j].seen)
 9682			j++;
 9683		if (!j)
 9684			continue;
 9685
 9686		err = verifier_remove_insns(env, i, j);
 9687		if (err)
 9688			return err;
 9689		insn_cnt = env->prog->len;
 9690	}
 9691
 9692	return 0;
 9693}
 9694
 9695static int opt_remove_nops(struct bpf_verifier_env *env)
 9696{
 9697	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
 9698	struct bpf_insn *insn = env->prog->insnsi;
 9699	int insn_cnt = env->prog->len;
 9700	int i, err;
 9701
 9702	for (i = 0; i < insn_cnt; i++) {
 9703		if (memcmp(&insn[i], &ja, sizeof(ja)))
 9704			continue;
 9705
 9706		err = verifier_remove_insns(env, i, 1);
 9707		if (err)
 9708			return err;
 9709		insn_cnt--;
 9710		i--;
 9711	}
 9712
 9713	return 0;
 9714}
 9715
 9716static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 9717					 const union bpf_attr *attr)
 9718{
 9719	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
 9720	struct bpf_insn_aux_data *aux = env->insn_aux_data;
 9721	int i, patch_len, delta = 0, len = env->prog->len;
 9722	struct bpf_insn *insns = env->prog->insnsi;
 9723	struct bpf_prog *new_prog;
 9724	bool rnd_hi32;
 9725
 9726	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
 9727	zext_patch[1] = BPF_ZEXT_REG(0);
 9728	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
 9729	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
 9730	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
 9731	for (i = 0; i < len; i++) {
 9732		int adj_idx = i + delta;
 9733		struct bpf_insn insn;
 9734
 9735		insn = insns[adj_idx];
 9736		if (!aux[adj_idx].zext_dst) {
 9737			u8 code, class;
 9738			u32 imm_rnd;
 9739
 9740			if (!rnd_hi32)
 9741				continue;
 9742
 9743			code = insn.code;
 9744			class = BPF_CLASS(code);
 9745			if (insn_no_def(&insn))
 9746				continue;
 9747
 9748			/* NOTE: arg "reg" (the fourth one) is only used for
 9749			 *       BPF_STX which has been ruled out in above
 9750			 *       check, it is safe to pass NULL here.
 9751			 */
 9752			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
 9753				if (class == BPF_LD &&
 9754				    BPF_MODE(code) == BPF_IMM)
 9755					i++;
 9756				continue;
 9757			}
 9758
 9759			/* ctx load could be transformed into wider load. */
 9760			if (class == BPF_LDX &&
 9761			    aux[adj_idx].ptr_type == PTR_TO_CTX)
 9762				continue;
 9763
 9764			imm_rnd = get_random_int();
 9765			rnd_hi32_patch[0] = insn;
 9766			rnd_hi32_patch[1].imm = imm_rnd;
 9767			rnd_hi32_patch[3].dst_reg = insn.dst_reg;
 9768			patch = rnd_hi32_patch;
 9769			patch_len = 4;
 9770			goto apply_patch_buffer;
 9771		}
 9772
 9773		if (!bpf_jit_needs_zext())
 9774			continue;
 9775
 9776		zext_patch[0] = insn;
 9777		zext_patch[1].dst_reg = insn.dst_reg;
 9778		zext_patch[1].src_reg = insn.dst_reg;
 9779		patch = zext_patch;
 9780		patch_len = 2;
 9781apply_patch_buffer:
 9782		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
 9783		if (!new_prog)
 9784			return -ENOMEM;
 9785		env->prog = new_prog;
 9786		insns = new_prog->insnsi;
 9787		aux = env->insn_aux_data;
 9788		delta += patch_len - 1;
 9789	}
 9790
 9791	return 0;
 9792}
 9793
 9794/* convert load instructions that access fields of a context type into a
 9795 * sequence of instructions that access fields of the underlying structure:
 9796 *     struct __sk_buff    -> struct sk_buff
 9797 *     struct bpf_sock_ops -> struct sock
 9798 */
 9799static int convert_ctx_accesses(struct bpf_verifier_env *env)
 9800{
 9801	const struct bpf_verifier_ops *ops = env->ops;
 9802	int i, cnt, size, ctx_field_size, delta = 0;
 9803	const int insn_cnt = env->prog->len;
 9804	struct bpf_insn insn_buf[16], *insn;
 9805	u32 target_size, size_default, off;
 9806	struct bpf_prog *new_prog;
 9807	enum bpf_access_type type;
 9808	bool is_narrower_load;
 9809
 9810	if (ops->gen_prologue || env->seen_direct_write) {
 9811		if (!ops->gen_prologue) {
 9812			verbose(env, "bpf verifier is misconfigured\n");
 9813			return -EINVAL;
 9814		}
 9815		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
 9816					env->prog);
 9817		if (cnt >= ARRAY_SIZE(insn_buf)) {
 9818			verbose(env, "bpf verifier is misconfigured\n");
 9819			return -EINVAL;
 9820		} else if (cnt) {
 9821			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
 9822			if (!new_prog)
 9823				return -ENOMEM;
 9824
 9825			env->prog = new_prog;
 9826			delta += cnt - 1;
 9827		}
 9828	}
 9829
 9830	if (bpf_prog_is_dev_bound(env->prog->aux))
 9831		return 0;
 9832
 9833	insn = env->prog->insnsi + delta;
 9834
 9835	for (i = 0; i < insn_cnt; i++, insn++) {
 9836		bpf_convert_ctx_access_t convert_ctx_access;
 9837
 9838		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
 9839		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
 9840		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
 9841		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
 9842			type = BPF_READ;
 9843		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
 9844			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
 9845			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
 9846			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
 9847			type = BPF_WRITE;
 9848		else
 9849			continue;
 9850
 9851		if (type == BPF_WRITE &&
 9852		    env->insn_aux_data[i + delta].sanitize_stack_off) {
 9853			struct bpf_insn patch[] = {
 9854				/* Sanitize suspicious stack slot with zero.
 9855				 * There are no memory dependencies for this store,
 9856				 * since it's only using frame pointer and immediate
 9857				 * constant of zero
 9858				 */
 9859				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
 9860					   env->insn_aux_data[i + delta].sanitize_stack_off,
 9861					   0),
 9862				/* the original STX instruction will immediately
 9863				 * overwrite the same stack slot with appropriate value
 9864				 */
 9865				*insn,
 9866			};
 9867
 9868			cnt = ARRAY_SIZE(patch);
 9869			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
 9870			if (!new_prog)
 9871				return -ENOMEM;
 9872
 9873			delta    += cnt - 1;
 9874			env->prog = new_prog;
 9875			insn      = new_prog->insnsi + i + delta;
 9876			continue;
 9877		}
 9878
 9879		switch (env->insn_aux_data[i + delta].ptr_type) {
 9880		case PTR_TO_CTX:
 9881			if (!ops->convert_ctx_access)
 9882				continue;
 9883			convert_ctx_access = ops->convert_ctx_access;
 9884			break;
 9885		case PTR_TO_SOCKET:
 9886		case PTR_TO_SOCK_COMMON:
 9887			convert_ctx_access = bpf_sock_convert_ctx_access;
 9888			break;
 9889		case PTR_TO_TCP_SOCK:
 9890			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
 9891			break;
 9892		case PTR_TO_XDP_SOCK:
 9893			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
 9894			break;
 9895		case PTR_TO_BTF_ID:
 9896			if (type == BPF_READ) {
 9897				insn->code = BPF_LDX | BPF_PROBE_MEM |
 9898					BPF_SIZE((insn)->code);
 9899				env->prog->aux->num_exentries++;
 9900			} else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
 9901				verbose(env, "Writes through BTF pointers are not allowed\n");
 9902				return -EINVAL;
 9903			}
 9904			continue;
 9905		default:
 9906			continue;
 9907		}
 9908
 9909		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
 9910		size = BPF_LDST_BYTES(insn);
 9911
 9912		/* If the read access is a narrower load of the field,
 9913		 * convert to a 4/8-byte load, to minimum program type specific
 9914		 * convert_ctx_access changes. If conversion is successful,
 9915		 * we will apply proper mask to the result.
 9916		 */
 9917		is_narrower_load = size < ctx_field_size;
 9918		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
 9919		off = insn->off;
 9920		if (is_narrower_load) {
 9921			u8 size_code;
 9922
 9923			if (type == BPF_WRITE) {
 9924				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
 9925				return -EINVAL;
 9926			}
 9927
 9928			size_code = BPF_H;
 9929			if (ctx_field_size == 4)
 9930				size_code = BPF_W;
 9931			else if (ctx_field_size == 8)
 9932				size_code = BPF_DW;
 9933
 9934			insn->off = off & ~(size_default - 1);
 9935			insn->code = BPF_LDX | BPF_MEM | size_code;
 9936		}
 9937
 9938		target_size = 0;
 9939		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
 9940					 &target_size);
 9941		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
 9942		    (ctx_field_size && !target_size)) {
 9943			verbose(env, "bpf verifier is misconfigured\n");
 9944			return -EINVAL;
 9945		}
 9946
 9947		if (is_narrower_load && size < target_size) {
 9948			u8 shift = bpf_ctx_narrow_access_offset(
 9949				off, size, size_default) * 8;
 9950			if (ctx_field_size <= 4) {
 9951				if (shift)
 9952					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
 9953									insn->dst_reg,
 9954									shift);
 9955				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
 9956								(1 << size * 8) - 1);
 9957			} else {
 9958				if (shift)
 9959					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
 9960									insn->dst_reg,
 9961									shift);
 9962				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
 9963								(1ULL << size * 8) - 1);
 9964			}
 9965		}
 9966
 9967		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
 9968		if (!new_prog)
 9969			return -ENOMEM;
 9970
 9971		delta += cnt - 1;
 9972
 9973		/* keep walking new program and skip insns we just inserted */
 9974		env->prog = new_prog;
 9975		insn      = new_prog->insnsi + i + delta;
 9976	}
 9977
 9978	return 0;
 9979}
 9980
 9981static int jit_subprogs(struct bpf_verifier_env *env)
 9982{
 9983	struct bpf_prog *prog = env->prog, **func, *tmp;
 9984	int i, j, subprog_start, subprog_end = 0, len, subprog;
 9985	struct bpf_insn *insn;
 9986	void *old_bpf_func;
 9987	int err, num_exentries;
 9988
 9989	if (env->subprog_cnt <= 1)
 9990		return 0;
 9991
 9992	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
 9993		if (insn->code != (BPF_JMP | BPF_CALL) ||
 9994		    insn->src_reg != BPF_PSEUDO_CALL)
 9995			continue;
 9996		/* Upon error here we cannot fall back to interpreter but
 9997		 * need a hard reject of the program. Thus -EFAULT is
 9998		 * propagated in any case.
 9999		 */
10000		subprog = find_subprog(env, i + insn->imm + 1);
10001		if (subprog < 0) {
10002			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
10003				  i + insn->imm + 1);
10004			return -EFAULT;
10005		}
10006		/* temporarily remember subprog id inside insn instead of
10007		 * aux_data, since next loop will split up all insns into funcs
10008		 */
10009		insn->off = subprog;
10010		/* remember original imm in case JIT fails and fallback
10011		 * to interpreter will be needed
10012		 */
10013		env->insn_aux_data[i].call_imm = insn->imm;
10014		/* point imm to __bpf_call_base+1 from JITs point of view */
10015		insn->imm = 1;
10016	}
10017
10018	err = bpf_prog_alloc_jited_linfo(prog);
10019	if (err)
10020		goto out_undo_insn;
10021
10022	err = -ENOMEM;
10023	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
10024	if (!func)
10025		goto out_undo_insn;
10026
10027	for (i = 0; i < env->subprog_cnt; i++) {
10028		subprog_start = subprog_end;
10029		subprog_end = env->subprog_info[i + 1].start;
10030
10031		len = subprog_end - subprog_start;
10032		/* BPF_PROG_RUN doesn't call subprogs directly,
10033		 * hence main prog stats include the runtime of subprogs.
10034		 * subprogs don't have IDs and not reachable via prog_get_next_id
10035		 * func[i]->aux->stats will never be accessed and stays NULL
10036		 */
10037		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
10038		if (!func[i])
10039			goto out_free;
10040		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
10041		       len * sizeof(struct bpf_insn));
10042		func[i]->type = prog->type;
10043		func[i]->len = len;
10044		if (bpf_prog_calc_tag(func[i]))
10045			goto out_free;
10046		func[i]->is_func = 1;
10047		func[i]->aux->func_idx = i;
10048		/* the btf and func_info will be freed only at prog->aux */
10049		func[i]->aux->btf = prog->aux->btf;
10050		func[i]->aux->func_info = prog->aux->func_info;
10051
10052		/* Use bpf_prog_F_tag to indicate functions in stack traces.
10053		 * Long term would need debug info to populate names
10054		 */
10055		func[i]->aux->name[0] = 'F';
10056		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
10057		func[i]->jit_requested = 1;
10058		func[i]->aux->linfo = prog->aux->linfo;
10059		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
10060		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
10061		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
10062		num_exentries = 0;
10063		insn = func[i]->insnsi;
10064		for (j = 0; j < func[i]->len; j++, insn++) {
10065			if (BPF_CLASS(insn->code) == BPF_LDX &&
10066			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
10067				num_exentries++;
10068		}
10069		func[i]->aux->num_exentries = num_exentries;
10070		func[i] = bpf_int_jit_compile(func[i]);
10071		if (!func[i]->jited) {
10072			err = -ENOTSUPP;
10073			goto out_free;
10074		}
10075		cond_resched();
10076	}
10077	/* at this point all bpf functions were successfully JITed
10078	 * now populate all bpf_calls with correct addresses and
10079	 * run last pass of JIT
10080	 */
10081	for (i = 0; i < env->subprog_cnt; i++) {
10082		insn = func[i]->insnsi;
10083		for (j = 0; j < func[i]->len; j++, insn++) {
10084			if (insn->code != (BPF_JMP | BPF_CALL) ||
10085			    insn->src_reg != BPF_PSEUDO_CALL)
10086				continue;
10087			subprog = insn->off;
10088			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
10089				    __bpf_call_base;
10090		}
10091
10092		/* we use the aux data to keep a list of the start addresses
10093		 * of the JITed images for each function in the program
10094		 *
10095		 * for some architectures, such as powerpc64, the imm field
10096		 * might not be large enough to hold the offset of the start
10097		 * address of the callee's JITed image from __bpf_call_base
10098		 *
10099		 * in such cases, we can lookup the start address of a callee
10100		 * by using its subprog id, available from the off field of
10101		 * the call instruction, as an index for this list
10102		 */
10103		func[i]->aux->func = func;
10104		func[i]->aux->func_cnt = env->subprog_cnt;
10105	}
10106	for (i = 0; i < env->subprog_cnt; i++) {
10107		old_bpf_func = func[i]->bpf_func;
10108		tmp = bpf_int_jit_compile(func[i]);
10109		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
10110			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
10111			err = -ENOTSUPP;
10112			goto out_free;
10113		}
10114		cond_resched();
10115	}
10116
10117	/* finally lock prog and jit images for all functions and
10118	 * populate kallsysm
10119	 */
10120	for (i = 0; i < env->subprog_cnt; i++) {
10121		bpf_prog_lock_ro(func[i]);
10122		bpf_prog_kallsyms_add(func[i]);
10123	}
10124
10125	/* Last step: make now unused interpreter insns from main
10126	 * prog consistent for later dump requests, so they can
10127	 * later look the same as if they were interpreted only.
10128	 */
10129	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
10130		if (insn->code != (BPF_JMP | BPF_CALL) ||
10131		    insn->src_reg != BPF_PSEUDO_CALL)
10132			continue;
10133		insn->off = env->insn_aux_data[i].call_imm;
10134		subprog = find_subprog(env, i + insn->off + 1);
10135		insn->imm = subprog;
10136	}
10137
10138	prog->jited = 1;
10139	prog->bpf_func = func[0]->bpf_func;
10140	prog->aux->func = func;
10141	prog->aux->func_cnt = env->subprog_cnt;
10142	bpf_prog_free_unused_jited_linfo(prog);
10143	return 0;
10144out_free:
10145	for (i = 0; i < env->subprog_cnt; i++)
10146		if (func[i])
10147			bpf_jit_free(func[i]);
10148	kfree(func);
10149out_undo_insn:
10150	/* cleanup main prog to be interpreted */
10151	prog->jit_requested = 0;
10152	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
10153		if (insn->code != (BPF_JMP | BPF_CALL) ||
10154		    insn->src_reg != BPF_PSEUDO_CALL)
10155			continue;
10156		insn->off = 0;
10157		insn->imm = env->insn_aux_data[i].call_imm;
10158	}
10159	bpf_prog_free_jited_linfo(prog);
10160	return err;
10161}
10162
10163static int fixup_call_args(struct bpf_verifier_env *env)
10164{
10165#ifndef CONFIG_BPF_JIT_ALWAYS_ON
10166	struct bpf_prog *prog = env->prog;
10167	struct bpf_insn *insn = prog->insnsi;
10168	int i, depth;
10169#endif
10170	int err = 0;
10171
10172	if (env->prog->jit_requested &&
10173	    !bpf_prog_is_dev_bound(env->prog->aux)) {
10174		err = jit_subprogs(env);
10175		if (err == 0)
10176			return 0;
10177		if (err == -EFAULT)
10178			return err;
10179	}
10180#ifndef CONFIG_BPF_JIT_ALWAYS_ON
10181	for (i = 0; i < prog->len; i++, insn++) {
10182		if (insn->code != (BPF_JMP | BPF_CALL) ||
10183		    insn->src_reg != BPF_PSEUDO_CALL)
10184			continue;
10185		depth = get_callee_stack_depth(env, insn, i);
10186		if (depth < 0)
10187			return depth;
10188		bpf_patch_call_args(insn, depth);
10189	}
10190	err = 0;
10191#endif
10192	return err;
10193}
10194
10195/* fixup insn->imm field of bpf_call instructions
10196 * and inline eligible helpers as explicit sequence of BPF instructions
10197 *
10198 * this function is called after eBPF program passed verification
10199 */
10200static int fixup_bpf_calls(struct bpf_verifier_env *env)
10201{
10202	struct bpf_prog *prog = env->prog;
10203	bool expect_blinding = bpf_jit_blinding_enabled(prog);
10204	struct bpf_insn *insn = prog->insnsi;
10205	const struct bpf_func_proto *fn;
10206	const int insn_cnt = prog->len;
10207	const struct bpf_map_ops *ops;
10208	struct bpf_insn_aux_data *aux;
10209	struct bpf_insn insn_buf[16];
10210	struct bpf_prog *new_prog;
10211	struct bpf_map *map_ptr;
10212	int i, ret, cnt, delta = 0;
10213
10214	for (i = 0; i < insn_cnt; i++, insn++) {
10215		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
10216		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
10217		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
10218		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
10219			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
10220			struct bpf_insn mask_and_div[] = {
10221				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
10222				/* Rx div 0 -> 0 */
10223				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
10224				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
10225				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
10226				*insn,
10227			};
10228			struct bpf_insn mask_and_mod[] = {
10229				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
10230				/* Rx mod 0 -> Rx */
10231				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
10232				*insn,
10233			};
10234			struct bpf_insn *patchlet;
10235
10236			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
10237			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
10238				patchlet = mask_and_div + (is64 ? 1 : 0);
10239				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
10240			} else {
10241				patchlet = mask_and_mod + (is64 ? 1 : 0);
10242				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
10243			}
10244
10245			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
10246			if (!new_prog)
10247				return -ENOMEM;
10248
10249			delta    += cnt - 1;
10250			env->prog = prog = new_prog;
10251			insn      = new_prog->insnsi + i + delta;
10252			continue;
10253		}
10254
10255		if (BPF_CLASS(insn->code) == BPF_LD &&
10256		    (BPF_MODE(insn->code) == BPF_ABS ||
10257		     BPF_MODE(insn->code) == BPF_IND)) {
10258			cnt = env->ops->gen_ld_abs(insn, insn_buf);
10259			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
10260				verbose(env, "bpf verifier is misconfigured\n");
10261				return -EINVAL;
10262			}
10263
10264			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10265			if (!new_prog)
10266				return -ENOMEM;
10267
10268			delta    += cnt - 1;
10269			env->prog = prog = new_prog;
10270			insn      = new_prog->insnsi + i + delta;
10271			continue;
10272		}
10273
10274		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
10275		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
10276			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
10277			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
10278			struct bpf_insn insn_buf[16];
10279			struct bpf_insn *patch = &insn_buf[0];
10280			bool issrc, isneg;
10281			u32 off_reg;
10282
10283			aux = &env->insn_aux_data[i + delta];
10284			if (!aux->alu_state ||
10285			    aux->alu_state == BPF_ALU_NON_POINTER)
10286				continue;
10287
10288			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
10289			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
10290				BPF_ALU_SANITIZE_SRC;
10291
10292			off_reg = issrc ? insn->src_reg : insn->dst_reg;
10293			if (isneg)
10294				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
10295			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
10296			*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
10297			*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
10298			*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
10299			*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
10300			if (issrc) {
10301				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
10302							 off_reg);
10303				insn->src_reg = BPF_REG_AX;
10304			} else {
10305				*patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
10306							 BPF_REG_AX);
10307			}
10308			if (isneg)
10309				insn->code = insn->code == code_add ?
10310					     code_sub : code_add;
10311			*patch++ = *insn;
10312			if (issrc && isneg)
10313				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
10314			cnt = patch - insn_buf;
10315
10316			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10317			if (!new_prog)
10318				return -ENOMEM;
10319
10320			delta    += cnt - 1;
10321			env->prog = prog = new_prog;
10322			insn      = new_prog->insnsi + i + delta;
10323			continue;
10324		}
10325
10326		if (insn->code != (BPF_JMP | BPF_CALL))
10327			continue;
10328		if (insn->src_reg == BPF_PSEUDO_CALL)
10329			continue;
10330
10331		if (insn->imm == BPF_FUNC_get_route_realm)
10332			prog->dst_needed = 1;
10333		if (insn->imm == BPF_FUNC_get_prandom_u32)
10334			bpf_user_rnd_init_once();
10335		if (insn->imm == BPF_FUNC_override_return)
10336			prog->kprobe_override = 1;
10337		if (insn->imm == BPF_FUNC_tail_call) {
10338			/* If we tail call into other programs, we
10339			 * cannot make any assumptions since they can
10340			 * be replaced dynamically during runtime in
10341			 * the program array.
10342			 */
10343			prog->cb_access = 1;
10344			env->prog->aux->stack_depth = MAX_BPF_STACK;
10345			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
10346
10347			/* mark bpf_tail_call as different opcode to avoid
10348			 * conditional branch in the interpeter for every normal
10349			 * call and to prevent accidental JITing by JIT compiler
10350			 * that doesn't support bpf_tail_call yet
10351			 */
10352			insn->imm = 0;
10353			insn->code = BPF_JMP | BPF_TAIL_CALL;
10354
10355			aux = &env->insn_aux_data[i + delta];
10356			if (env->bpf_capable && !expect_blinding &&
10357			    prog->jit_requested &&
10358			    !bpf_map_key_poisoned(aux) &&
10359			    !bpf_map_ptr_poisoned(aux) &&
10360			    !bpf_map_ptr_unpriv(aux)) {
10361				struct bpf_jit_poke_descriptor desc = {
10362					.reason = BPF_POKE_REASON_TAIL_CALL,
10363					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
10364					.tail_call.key = bpf_map_key_immediate(aux),
10365				};
10366
10367				ret = bpf_jit_add_poke_descriptor(prog, &desc);
10368				if (ret < 0) {
10369					verbose(env, "adding tail call poke descriptor failed\n");
10370					return ret;
10371				}
10372
10373				insn->imm = ret + 1;
10374				continue;
10375			}
10376
10377			if (!bpf_map_ptr_unpriv(aux))
10378				continue;
10379
10380			/* instead of changing every JIT dealing with tail_call
10381			 * emit two extra insns:
10382			 * if (index >= max_entries) goto out;
10383			 * index &= array->index_mask;
10384			 * to avoid out-of-bounds cpu speculation
10385			 */
10386			if (bpf_map_ptr_poisoned(aux)) {
10387				verbose(env, "tail_call abusing map_ptr\n");
10388				return -EINVAL;
10389			}
10390
10391			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
10392			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
10393						  map_ptr->max_entries, 2);
10394			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
10395						    container_of(map_ptr,
10396								 struct bpf_array,
10397								 map)->index_mask);
10398			insn_buf[2] = *insn;
10399			cnt = 3;
10400			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10401			if (!new_prog)
10402				return -ENOMEM;
10403
10404			delta    += cnt - 1;
10405			env->prog = prog = new_prog;
10406			insn      = new_prog->insnsi + i + delta;
10407			continue;
10408		}
10409
10410		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
10411		 * and other inlining handlers are currently limited to 64 bit
10412		 * only.
10413		 */
10414		if (prog->jit_requested && BITS_PER_LONG == 64 &&
10415		    (insn->imm == BPF_FUNC_map_lookup_elem ||
10416		     insn->imm == BPF_FUNC_map_update_elem ||
10417		     insn->imm == BPF_FUNC_map_delete_elem ||
10418		     insn->imm == BPF_FUNC_map_push_elem   ||
10419		     insn->imm == BPF_FUNC_map_pop_elem    ||
10420		     insn->imm == BPF_FUNC_map_peek_elem)) {
10421			aux = &env->insn_aux_data[i + delta];
10422			if (bpf_map_ptr_poisoned(aux))
10423				goto patch_call_imm;
10424
10425			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
10426			ops = map_ptr->ops;
10427			if (insn->imm == BPF_FUNC_map_lookup_elem &&
10428			    ops->map_gen_lookup) {
10429				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
10430				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
10431					verbose(env, "bpf verifier is misconfigured\n");
10432					return -EINVAL;
10433				}
10434
10435				new_prog = bpf_patch_insn_data(env, i + delta,
10436							       insn_buf, cnt);
10437				if (!new_prog)
10438					return -ENOMEM;
10439
10440				delta    += cnt - 1;
10441				env->prog = prog = new_prog;
10442				insn      = new_prog->insnsi + i + delta;
10443				continue;
10444			}
10445
10446			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
10447				     (void *(*)(struct bpf_map *map, void *key))NULL));
10448			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
10449				     (int (*)(struct bpf_map *map, void *key))NULL));
10450			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
10451				     (int (*)(struct bpf_map *map, void *key, void *value,
10452					      u64 flags))NULL));
10453			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
10454				     (int (*)(struct bpf_map *map, void *value,
10455					      u64 flags))NULL));
10456			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
10457				     (int (*)(struct bpf_map *map, void *value))NULL));
10458			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
10459				     (int (*)(struct bpf_map *map, void *value))NULL));
10460
10461			switch (insn->imm) {
10462			case BPF_FUNC_map_lookup_elem:
10463				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
10464					    __bpf_call_base;
10465				continue;
10466			case BPF_FUNC_map_update_elem:
10467				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
10468					    __bpf_call_base;
10469				continue;
10470			case BPF_FUNC_map_delete_elem:
10471				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
10472					    __bpf_call_base;
10473				continue;
10474			case BPF_FUNC_map_push_elem:
10475				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
10476					    __bpf_call_base;
10477				continue;
10478			case BPF_FUNC_map_pop_elem:
10479				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
10480					    __bpf_call_base;
10481				continue;
10482			case BPF_FUNC_map_peek_elem:
10483				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
10484					    __bpf_call_base;
10485				continue;
10486			}
10487
10488			goto patch_call_imm;
10489		}
10490
10491		if (prog->jit_requested && BITS_PER_LONG == 64 &&
10492		    insn->imm == BPF_FUNC_jiffies64) {
10493			struct bpf_insn ld_jiffies_addr[2] = {
10494				BPF_LD_IMM64(BPF_REG_0,
10495					     (unsigned long)&jiffies),
10496			};
10497
10498			insn_buf[0] = ld_jiffies_addr[0];
10499			insn_buf[1] = ld_jiffies_addr[1];
10500			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
10501						  BPF_REG_0, 0);
10502			cnt = 3;
10503
10504			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
10505						       cnt);
10506			if (!new_prog)
10507				return -ENOMEM;
10508
10509			delta    += cnt - 1;
10510			env->prog = prog = new_prog;
10511			insn      = new_prog->insnsi + i + delta;
10512			continue;
10513		}
10514
10515patch_call_imm:
10516		fn = env->ops->get_func_proto(insn->imm, env->prog);
10517		/* all functions that have prototype and verifier allowed
10518		 * programs to call them, must be real in-kernel functions
10519		 */
10520		if (!fn->func) {
10521			verbose(env,
10522				"kernel subsystem misconfigured func %s#%d\n",
10523				func_id_name(insn->imm), insn->imm);
10524			return -EFAULT;
10525		}
10526		insn->imm = fn->func - __bpf_call_base;
10527	}
10528
10529	/* Since poke tab is now finalized, publish aux to tracker. */
10530	for (i = 0; i < prog->aux->size_poke_tab; i++) {
10531		map_ptr = prog->aux->poke_tab[i].tail_call.map;
10532		if (!map_ptr->ops->map_poke_track ||
10533		    !map_ptr->ops->map_poke_untrack ||
10534		    !map_ptr->ops->map_poke_run) {
10535			verbose(env, "bpf verifier is misconfigured\n");
10536			return -EINVAL;
10537		}
10538
10539		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
10540		if (ret < 0) {
10541			verbose(env, "tracking tail call prog failed\n");
10542			return ret;
10543		}
10544	}
10545
10546	return 0;
10547}
10548
10549static void free_states(struct bpf_verifier_env *env)
10550{
10551	struct bpf_verifier_state_list *sl, *sln;
10552	int i;
10553
10554	sl = env->free_list;
10555	while (sl) {
10556		sln = sl->next;
10557		free_verifier_state(&sl->state, false);
10558		kfree(sl);
10559		sl = sln;
10560	}
10561	env->free_list = NULL;
10562
10563	if (!env->explored_states)
10564		return;
10565
10566	for (i = 0; i < state_htab_size(env); i++) {
10567		sl = env->explored_states[i];
10568
10569		while (sl) {
10570			sln = sl->next;
10571			free_verifier_state(&sl->state, false);
10572			kfree(sl);
10573			sl = sln;
10574		}
10575		env->explored_states[i] = NULL;
10576	}
10577}
10578
10579/* The verifier is using insn_aux_data[] to store temporary data during
10580 * verification and to store information for passes that run after the
10581 * verification like dead code sanitization. do_check_common() for subprogram N
10582 * may analyze many other subprograms. sanitize_insn_aux_data() clears all
10583 * temporary data after do_check_common() finds that subprogram N cannot be
10584 * verified independently. pass_cnt counts the number of times
10585 * do_check_common() was run and insn->aux->seen tells the pass number
10586 * insn_aux_data was touched. These variables are compared to clear temporary
10587 * data from failed pass. For testing and experiments do_check_common() can be
10588 * run multiple times even when prior attempt to verify is unsuccessful.
10589 */
10590static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
10591{
10592	struct bpf_insn *insn = env->prog->insnsi;
10593	struct bpf_insn_aux_data *aux;
10594	int i, class;
10595
10596	for (i = 0; i < env->prog->len; i++) {
10597		class = BPF_CLASS(insn[i].code);
10598		if (class != BPF_LDX && class != BPF_STX)
10599			continue;
10600		aux = &env->insn_aux_data[i];
10601		if (aux->seen != env->pass_cnt)
10602			continue;
10603		memset(aux, 0, offsetof(typeof(*aux), orig_idx));
10604	}
10605}
10606
10607static int do_check_common(struct bpf_verifier_env *env, int subprog)
10608{
10609	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
10610	struct bpf_verifier_state *state;
10611	struct bpf_reg_state *regs;
10612	int ret, i;
10613
10614	env->prev_linfo = NULL;
10615	env->pass_cnt++;
10616
10617	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
10618	if (!state)
10619		return -ENOMEM;
10620	state->curframe = 0;
10621	state->speculative = false;
10622	state->branches = 1;
10623	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
10624	if (!state->frame[0]) {
10625		kfree(state);
10626		return -ENOMEM;
10627	}
10628	env->cur_state = state;
10629	init_func_state(env, state->frame[0],
10630			BPF_MAIN_FUNC /* callsite */,
10631			0 /* frameno */,
10632			subprog);
10633
10634	regs = state->frame[state->curframe]->regs;
10635	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
10636		ret = btf_prepare_func_args(env, subprog, regs);
10637		if (ret)
10638			goto out;
10639		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
10640			if (regs[i].type == PTR_TO_CTX)
10641				mark_reg_known_zero(env, regs, i);
10642			else if (regs[i].type == SCALAR_VALUE)
10643				mark_reg_unknown(env, regs, i);
10644		}
10645	} else {
10646		/* 1st arg to a function */
10647		regs[BPF_REG_1].type = PTR_TO_CTX;
10648		mark_reg_known_zero(env, regs, BPF_REG_1);
10649		ret = btf_check_func_arg_match(env, subprog, regs);
10650		if (ret == -EFAULT)
10651			/* unlikely verifier bug. abort.
10652			 * ret == 0 and ret < 0 are sadly acceptable for
10653			 * main() function due to backward compatibility.
10654			 * Like socket filter program may be written as:
10655			 * int bpf_prog(struct pt_regs *ctx)
10656			 * and never dereference that ctx in the program.
10657			 * 'struct pt_regs' is a type mismatch for socket
10658			 * filter that should be using 'struct __sk_buff'.
10659			 */
10660			goto out;
10661	}
10662
10663	ret = do_check(env);
10664out:
10665	/* check for NULL is necessary, since cur_state can be freed inside
10666	 * do_check() under memory pressure.
10667	 */
10668	if (env->cur_state) {
10669		free_verifier_state(env->cur_state, true);
10670		env->cur_state = NULL;
10671	}
10672	while (!pop_stack(env, NULL, NULL, false));
10673	if (!ret && pop_log)
10674		bpf_vlog_reset(&env->log, 0);
10675	free_states(env);
10676	if (ret)
10677		/* clean aux data in case subprog was rejected */
10678		sanitize_insn_aux_data(env);
10679	return ret;
10680}
10681
10682/* Verify all global functions in a BPF program one by one based on their BTF.
10683 * All global functions must pass verification. Otherwise the whole program is rejected.
10684 * Consider:
10685 * int bar(int);
10686 * int foo(int f)
10687 * {
10688 *    return bar(f);
10689 * }
10690 * int bar(int b)
10691 * {
10692 *    ...
10693 * }
10694 * foo() will be verified first for R1=any_scalar_value. During verification it
10695 * will be assumed that bar() already verified successfully and call to bar()
10696 * from foo() will be checked for type match only. Later bar() will be verified
10697 * independently to check that it's safe for R1=any_scalar_value.
10698 */
10699static int do_check_subprogs(struct bpf_verifier_env *env)
10700{
10701	struct bpf_prog_aux *aux = env->prog->aux;
10702	int i, ret;
10703
10704	if (!aux->func_info)
10705		return 0;
10706
10707	for (i = 1; i < env->subprog_cnt; i++) {
10708		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
10709			continue;
10710		env->insn_idx = env->subprog_info[i].start;
10711		WARN_ON_ONCE(env->insn_idx == 0);
10712		ret = do_check_common(env, i);
10713		if (ret) {
10714			return ret;
10715		} else if (env->log.level & BPF_LOG_LEVEL) {
10716			verbose(env,
10717				"Func#%d is safe for any args that match its prototype\n",
10718				i);
10719		}
10720	}
10721	return 0;
10722}
10723
10724static int do_check_main(struct bpf_verifier_env *env)
10725{
10726	int ret;
10727
10728	env->insn_idx = 0;
10729	ret = do_check_common(env, 0);
10730	if (!ret)
10731		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
10732	return ret;
10733}
10734
10735
10736static void print_verification_stats(struct bpf_verifier_env *env)
10737{
10738	int i;
10739
10740	if (env->log.level & BPF_LOG_STATS) {
10741		verbose(env, "verification time %lld usec\n",
10742			div_u64(env->verification_time, 1000));
10743		verbose(env, "stack depth ");
10744		for (i = 0; i < env->subprog_cnt; i++) {
10745			u32 depth = env->subprog_info[i].stack_depth;
10746
10747			verbose(env, "%d", depth);
10748			if (i + 1 < env->subprog_cnt)
10749				verbose(env, "+");
10750		}
10751		verbose(env, "\n");
10752	}
10753	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
10754		"total_states %d peak_states %d mark_read %d\n",
10755		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
10756		env->max_states_per_insn, env->total_states,
10757		env->peak_states, env->longest_mark_read_walk);
10758}
10759
10760static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
10761{
10762	const struct btf_type *t, *func_proto;
10763	const struct bpf_struct_ops *st_ops;
10764	const struct btf_member *member;
10765	struct bpf_prog *prog = env->prog;
10766	u32 btf_id, member_idx;
10767	const char *mname;
10768
10769	btf_id = prog->aux->attach_btf_id;
10770	st_ops = bpf_struct_ops_find(btf_id);
10771	if (!st_ops) {
10772		verbose(env, "attach_btf_id %u is not a supported struct\n",
10773			btf_id);
10774		return -ENOTSUPP;
10775	}
10776
10777	t = st_ops->type;
10778	member_idx = prog->expected_attach_type;
10779	if (member_idx >= btf_type_vlen(t)) {
10780		verbose(env, "attach to invalid member idx %u of struct %s\n",
10781			member_idx, st_ops->name);
10782		return -EINVAL;
10783	}
10784
10785	member = &btf_type_member(t)[member_idx];
10786	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
10787	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
10788					       NULL);
10789	if (!func_proto) {
10790		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
10791			mname, member_idx, st_ops->name);
10792		return -EINVAL;
10793	}
10794
10795	if (st_ops->check_member) {
10796		int err = st_ops->check_member(t, member);
10797
10798		if (err) {
10799			verbose(env, "attach to unsupported member %s of struct %s\n",
10800				mname, st_ops->name);
10801			return err;
10802		}
10803	}
10804
10805	prog->aux->attach_func_proto = func_proto;
10806	prog->aux->attach_func_name = mname;
10807	env->ops = st_ops->verifier_ops;
10808
10809	return 0;
10810}
10811#define SECURITY_PREFIX "security_"
10812
10813static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
10814{
10815	if (within_error_injection_list(addr) ||
10816	    !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
10817		     sizeof(SECURITY_PREFIX) - 1))
10818		return 0;
10819
10820	return -EINVAL;
10821}
10822
10823static int check_attach_btf_id(struct bpf_verifier_env *env)
10824{
10825	struct bpf_prog *prog = env->prog;
10826	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
10827	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
10828	u32 btf_id = prog->aux->attach_btf_id;
10829	const char prefix[] = "btf_trace_";
10830	struct btf_func_model fmodel;
10831	int ret = 0, subprog = -1, i;
10832	struct bpf_trampoline *tr;
10833	const struct btf_type *t;
10834	bool conservative = true;
10835	const char *tname;
10836	struct btf *btf;
10837	long addr;
10838	u64 key;
10839
10840	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
10841		return check_struct_ops_btf_id(env);
10842
10843	if (prog->type != BPF_PROG_TYPE_TRACING &&
10844	    prog->type != BPF_PROG_TYPE_LSM &&
10845	    !prog_extension)
10846		return 0;
10847
10848	if (!btf_id) {
10849		verbose(env, "Tracing programs must provide btf_id\n");
10850		return -EINVAL;
10851	}
10852	btf = bpf_prog_get_target_btf(prog);
10853	if (!btf) {
10854		verbose(env,
10855			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
10856		return -EINVAL;
10857	}
10858	t = btf_type_by_id(btf, btf_id);
10859	if (!t) {
10860		verbose(env, "attach_btf_id %u is invalid\n", btf_id);
10861		return -EINVAL;
10862	}
10863	tname = btf_name_by_offset(btf, t->name_off);
10864	if (!tname) {
10865		verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
10866		return -EINVAL;
10867	}
10868	if (tgt_prog) {
10869		struct bpf_prog_aux *aux = tgt_prog->aux;
10870
10871		for (i = 0; i < aux->func_info_cnt; i++)
10872			if (aux->func_info[i].type_id == btf_id) {
10873				subprog = i;
10874				break;
10875			}
10876		if (subprog == -1) {
10877			verbose(env, "Subprog %s doesn't exist\n", tname);
10878			return -EINVAL;
10879		}
10880		conservative = aux->func_info_aux[subprog].unreliable;
10881		if (prog_extension) {
10882			if (conservative) {
10883				verbose(env,
10884					"Cannot replace static functions\n");
10885				return -EINVAL;
10886			}
10887			if (!prog->jit_requested) {
10888				verbose(env,
10889					"Extension programs should be JITed\n");
10890				return -EINVAL;
10891			}
10892			env->ops = bpf_verifier_ops[tgt_prog->type];
10893			prog->expected_attach_type = tgt_prog->expected_attach_type;
10894		}
10895		if (!tgt_prog->jited) {
10896			verbose(env, "Can attach to only JITed progs\n");
10897			return -EINVAL;
10898		}
10899		if (tgt_prog->type == prog->type) {
10900			/* Cannot fentry/fexit another fentry/fexit program.
10901			 * Cannot attach program extension to another extension.
10902			 * It's ok to attach fentry/fexit to extension program.
10903			 */
10904			verbose(env, "Cannot recursively attach\n");
10905			return -EINVAL;
10906		}
10907		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
10908		    prog_extension &&
10909		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
10910		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
10911			/* Program extensions can extend all program types
10912			 * except fentry/fexit. The reason is the following.
10913			 * The fentry/fexit programs are used for performance
10914			 * analysis, stats and can be attached to any program
10915			 * type except themselves. When extension program is
10916			 * replacing XDP function it is necessary to allow
10917			 * performance analysis of all functions. Both original
10918			 * XDP program and its program extension. Hence
10919			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
10920			 * allowed. If extending of fentry/fexit was allowed it
10921			 * would be possible to create long call chain
10922			 * fentry->extension->fentry->extension beyond
10923			 * reasonable stack size. Hence extending fentry is not
10924			 * allowed.
10925			 */
10926			verbose(env, "Cannot extend fentry/fexit\n");
10927			return -EINVAL;
10928		}
10929		key = ((u64)aux->id) << 32 | btf_id;
10930	} else {
10931		if (prog_extension) {
10932			verbose(env, "Cannot replace kernel functions\n");
10933			return -EINVAL;
10934		}
10935		key = btf_id;
10936	}
10937
10938	switch (prog->expected_attach_type) {
10939	case BPF_TRACE_RAW_TP:
10940		if (tgt_prog) {
10941			verbose(env,
10942				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
10943			return -EINVAL;
10944		}
10945		if (!btf_type_is_typedef(t)) {
10946			verbose(env, "attach_btf_id %u is not a typedef\n",
10947				btf_id);
10948			return -EINVAL;
10949		}
10950		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
10951			verbose(env, "attach_btf_id %u points to wrong type name %s\n",
10952				btf_id, tname);
10953			return -EINVAL;
10954		}
10955		tname += sizeof(prefix) - 1;
10956		t = btf_type_by_id(btf, t->type);
10957		if (!btf_type_is_ptr(t))
10958			/* should never happen in valid vmlinux build */
10959			return -EINVAL;
10960		t = btf_type_by_id(btf, t->type);
10961		if (!btf_type_is_func_proto(t))
10962			/* should never happen in valid vmlinux build */
10963			return -EINVAL;
10964
10965		/* remember two read only pointers that are valid for
10966		 * the life time of the kernel
10967		 */
10968		prog->aux->attach_func_name = tname;
10969		prog->aux->attach_func_proto = t;
10970		prog->aux->attach_btf_trace = true;
10971		return 0;
10972	case BPF_TRACE_ITER:
10973		if (!btf_type_is_func(t)) {
10974			verbose(env, "attach_btf_id %u is not a function\n",
10975				btf_id);
10976			return -EINVAL;
10977		}
10978		t = btf_type_by_id(btf, t->type);
10979		if (!btf_type_is_func_proto(t))
10980			return -EINVAL;
10981		prog->aux->attach_func_name = tname;
10982		prog->aux->attach_func_proto = t;
10983		if (!bpf_iter_prog_supported(prog))
10984			return -EINVAL;
10985		ret = btf_distill_func_proto(&env->log, btf, t,
10986					     tname, &fmodel);
10987		return ret;
10988	default:
10989		if (!prog_extension)
10990			return -EINVAL;
10991		fallthrough;
10992	case BPF_MODIFY_RETURN:
10993	case BPF_LSM_MAC:
10994	case BPF_TRACE_FENTRY:
10995	case BPF_TRACE_FEXIT:
10996		prog->aux->attach_func_name = tname;
10997		if (prog->type == BPF_PROG_TYPE_LSM) {
10998			ret = bpf_lsm_verify_prog(&env->log, prog);
10999			if (ret < 0)
11000				return ret;
11001		}
11002
11003		if (!btf_type_is_func(t)) {
11004			verbose(env, "attach_btf_id %u is not a function\n",
11005				btf_id);
11006			return -EINVAL;
11007		}
11008		if (prog_extension &&
11009		    btf_check_type_match(env, prog, btf, t))
11010			return -EINVAL;
11011		t = btf_type_by_id(btf, t->type);
11012		if (!btf_type_is_func_proto(t))
11013			return -EINVAL;
11014		tr = bpf_trampoline_lookup(key);
11015		if (!tr)
11016			return -ENOMEM;
11017		/* t is either vmlinux type or another program's type */
11018		prog->aux->attach_func_proto = t;
11019		mutex_lock(&tr->mutex);
11020		if (tr->func.addr) {
11021			prog->aux->trampoline = tr;
11022			goto out;
11023		}
11024		if (tgt_prog && conservative) {
11025			prog->aux->attach_func_proto = NULL;
11026			t = NULL;
11027		}
11028		ret = btf_distill_func_proto(&env->log, btf, t,
11029					     tname, &tr->func.model);
11030		if (ret < 0)
11031			goto out;
11032		if (tgt_prog) {
11033			if (subprog == 0)
11034				addr = (long) tgt_prog->bpf_func;
11035			else
11036				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
11037		} else {
11038			addr = kallsyms_lookup_name(tname);
11039			if (!addr) {
11040				verbose(env,
11041					"The address of function %s cannot be found\n",
11042					tname);
11043				ret = -ENOENT;
11044				goto out;
11045			}
11046		}
11047
11048		if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
11049			ret = check_attach_modify_return(prog, addr);
11050			if (ret)
11051				verbose(env, "%s() is not modifiable\n",
11052					prog->aux->attach_func_name);
11053		}
11054
11055		if (ret)
11056			goto out;
11057		tr->func.addr = (void *)addr;
11058		prog->aux->trampoline = tr;
11059out:
11060		mutex_unlock(&tr->mutex);
11061		if (ret)
11062			bpf_trampoline_put(tr);
11063		return ret;
11064	}
11065}
11066
11067int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
11068	      union bpf_attr __user *uattr)
11069{
11070	u64 start_time = ktime_get_ns();
11071	struct bpf_verifier_env *env;
11072	struct bpf_verifier_log *log;
11073	int i, len, ret = -EINVAL;
11074	bool is_priv;
11075
11076	/* no program is valid */
11077	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
11078		return -EINVAL;
11079
11080	/* 'struct bpf_verifier_env' can be global, but since it's not small,
11081	 * allocate/free it every time bpf_check() is called
11082	 */
11083	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
11084	if (!env)
11085		return -ENOMEM;
11086	log = &env->log;
11087
11088	len = (*prog)->len;
11089	env->insn_aux_data =
11090		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
11091	ret = -ENOMEM;
11092	if (!env->insn_aux_data)
11093		goto err_free_env;
11094	for (i = 0; i < len; i++)
11095		env->insn_aux_data[i].orig_idx = i;
11096	env->prog = *prog;
11097	env->ops = bpf_verifier_ops[env->prog->type];
11098	is_priv = bpf_capable();
11099
11100	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
11101		mutex_lock(&bpf_verifier_lock);
11102		if (!btf_vmlinux)
11103			btf_vmlinux = btf_parse_vmlinux();
11104		mutex_unlock(&bpf_verifier_lock);
11105	}
11106
11107	/* grab the mutex to protect few globals used by verifier */
11108	if (!is_priv)
11109		mutex_lock(&bpf_verifier_lock);
11110
11111	if (attr->log_level || attr->log_buf || attr->log_size) {
11112		/* user requested verbose verifier output
11113		 * and supplied buffer to store the verification trace
11114		 */
11115		log->level = attr->log_level;
11116		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
11117		log->len_total = attr->log_size;
11118
11119		ret = -EINVAL;
11120		/* log attributes have to be sane */
11121		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
11122		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
11123			goto err_unlock;
11124	}
11125
11126	if (IS_ERR(btf_vmlinux)) {
11127		/* Either gcc or pahole or kernel are broken. */
11128		verbose(env, "in-kernel BTF is malformed\n");
11129		ret = PTR_ERR(btf_vmlinux);
11130		goto skip_full_check;
11131	}
11132
11133	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
11134	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
11135		env->strict_alignment = true;
11136	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
11137		env->strict_alignment = false;
11138
11139	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
11140	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
11141	env->bypass_spec_v1 = bpf_bypass_spec_v1();
11142	env->bypass_spec_v4 = bpf_bypass_spec_v4();
11143	env->bpf_capable = bpf_capable();
11144
11145	if (is_priv)
11146		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
11147
11148	ret = replace_map_fd_with_map_ptr(env);
11149	if (ret < 0)
11150		goto skip_full_check;
11151
11152	if (bpf_prog_is_dev_bound(env->prog->aux)) {
11153		ret = bpf_prog_offload_verifier_prep(env->prog);
11154		if (ret)
11155			goto skip_full_check;
11156	}
11157
11158	env->explored_states = kvcalloc(state_htab_size(env),
11159				       sizeof(struct bpf_verifier_state_list *),
11160				       GFP_USER);
11161	ret = -ENOMEM;
11162	if (!env->explored_states)
11163		goto skip_full_check;
11164
11165	ret = check_subprogs(env);
11166	if (ret < 0)
11167		goto skip_full_check;
11168
11169	ret = check_btf_info(env, attr, uattr);
11170	if (ret < 0)
11171		goto skip_full_check;
11172
11173	ret = check_attach_btf_id(env);
11174	if (ret)
11175		goto skip_full_check;
11176
11177	ret = check_cfg(env);
11178	if (ret < 0)
11179		goto skip_full_check;
11180
11181	ret = do_check_subprogs(env);
11182	ret = ret ?: do_check_main(env);
11183
11184	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
11185		ret = bpf_prog_offload_finalize(env);
11186
11187skip_full_check:
11188	kvfree(env->explored_states);
11189
11190	if (ret == 0)
11191		ret = check_max_stack_depth(env);
11192
11193	/* instruction rewrites happen after this point */
11194	if (is_priv) {
11195		if (ret == 0)
11196			opt_hard_wire_dead_code_branches(env);
11197		if (ret == 0)
11198			ret = opt_remove_dead_code(env);
11199		if (ret == 0)
11200			ret = opt_remove_nops(env);
11201	} else {
11202		if (ret == 0)
11203			sanitize_dead_code(env);
11204	}
11205
11206	if (ret == 0)
11207		/* program is valid, convert *(u32*)(ctx + off) accesses */
11208		ret = convert_ctx_accesses(env);
11209
11210	if (ret == 0)
11211		ret = fixup_bpf_calls(env);
11212
11213	/* do 32-bit optimization after insn patching has done so those patched
11214	 * insns could be handled correctly.
11215	 */
11216	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
11217		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
11218		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
11219								     : false;
11220	}
11221
11222	if (ret == 0)
11223		ret = fixup_call_args(env);
11224
11225	env->verification_time = ktime_get_ns() - start_time;
11226	print_verification_stats(env);
11227
11228	if (log->level && bpf_verifier_log_full(log))
11229		ret = -ENOSPC;
11230	if (log->level && !log->ubuf) {
11231		ret = -EFAULT;
11232		goto err_release_maps;
11233	}
11234
11235	if (ret == 0 && env->used_map_cnt) {
11236		/* if program passed verifier, update used_maps in bpf_prog_info */
11237		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
11238							  sizeof(env->used_maps[0]),
11239							  GFP_KERNEL);
11240
11241		if (!env->prog->aux->used_maps) {
11242			ret = -ENOMEM;
11243			goto err_release_maps;
11244		}
11245
11246		memcpy(env->prog->aux->used_maps, env->used_maps,
11247		       sizeof(env->used_maps[0]) * env->used_map_cnt);
11248		env->prog->aux->used_map_cnt = env->used_map_cnt;
11249
11250		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
11251		 * bpf_ld_imm64 instructions
11252		 */
11253		convert_pseudo_ld_imm64(env);
11254	}
11255
11256	if (ret == 0)
11257		adjust_btf_func(env);
11258
11259err_release_maps:
11260	if (!env->prog->aux->used_maps)
11261		/* if we didn't copy map pointers into bpf_prog_info, release
11262		 * them now. Otherwise free_used_maps() will release them.
11263		 */
11264		release_maps(env);
11265
11266	/* extension progs temporarily inherit the attach_type of their targets
11267	   for verification purposes, so set it back to zero before returning
11268	 */
11269	if (env->prog->type == BPF_PROG_TYPE_EXT)
11270		env->prog->expected_attach_type = 0;
11271
11272	*prog = env->prog;
11273err_unlock:
11274	if (!is_priv)
11275		mutex_unlock(&bpf_verifier_lock);
11276	vfree(env->insn_aux_data);
11277err_free_env:
11278	kfree(env);
11279	return ret;
11280}