Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3 * Copyright (c) 2016 Facebook
   4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5 */
   6#include <uapi/linux/btf.h>
   7#include <linux/kernel.h>
   8#include <linux/types.h>
   9#include <linux/slab.h>
  10#include <linux/bpf.h>
  11#include <linux/btf.h>
  12#include <linux/bpf_verifier.h>
  13#include <linux/filter.h>
  14#include <net/netlink.h>
  15#include <linux/file.h>
  16#include <linux/vmalloc.h>
  17#include <linux/stringify.h>
  18#include <linux/bsearch.h>
  19#include <linux/sort.h>
  20#include <linux/perf_event.h>
  21#include <linux/ctype.h>
  22
  23#include "disasm.h"
  24
  25static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  26#define BPF_PROG_TYPE(_id, _name) \
  27	[_id] = & _name ## _verifier_ops,
  28#define BPF_MAP_TYPE(_id, _ops)
  29#include <linux/bpf_types.h>
  30#undef BPF_PROG_TYPE
  31#undef BPF_MAP_TYPE
  32};
  33
  34/* bpf_check() is a static code analyzer that walks eBPF program
  35 * instruction by instruction and updates register/stack state.
  36 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  37 *
  38 * The first pass is depth-first-search to check that the program is a DAG.
  39 * It rejects the following programs:
  40 * - larger than BPF_MAXINSNS insns
  41 * - if loop is present (detected via back-edge)
  42 * - unreachable insns exist (shouldn't be a forest. program = one function)
  43 * - out of bounds or malformed jumps
  44 * The second pass is all possible path descent from the 1st insn.
  45 * Since it's analyzing all pathes through the program, the length of the
  46 * analysis is limited to 64k insn, which may be hit even if total number of
  47 * insn is less then 4K, but there are too many branches that change stack/regs.
  48 * Number of 'branches to be analyzed' is limited to 1k
  49 *
  50 * On entry to each instruction, each register has a type, and the instruction
  51 * changes the types of the registers depending on instruction semantics.
  52 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  53 * copied to R1.
  54 *
  55 * All registers are 64-bit.
  56 * R0 - return register
  57 * R1-R5 argument passing registers
  58 * R6-R9 callee saved registers
  59 * R10 - frame pointer read-only
  60 *
  61 * At the start of BPF program the register R1 contains a pointer to bpf_context
  62 * and has type PTR_TO_CTX.
  63 *
  64 * Verifier tracks arithmetic operations on pointers in case:
  65 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  66 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  67 * 1st insn copies R10 (which has FRAME_PTR) type into R1
  68 * and 2nd arithmetic instruction is pattern matched to recognize
  69 * that it wants to construct a pointer to some element within stack.
  70 * So after 2nd insn, the register R1 has type PTR_TO_STACK
  71 * (and -20 constant is saved for further stack bounds checking).
  72 * Meaning that this reg is a pointer to stack plus known immediate constant.
  73 *
  74 * Most of the time the registers have SCALAR_VALUE type, which
  75 * means the register has some value, but it's not a valid pointer.
  76 * (like pointer plus pointer becomes SCALAR_VALUE type)
  77 *
  78 * When verifier sees load or store instructions the type of base register
  79 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  80 * four pointer types recognized by check_mem_access() function.
  81 *
  82 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  83 * and the range of [ptr, ptr + map's value_size) is accessible.
  84 *
  85 * registers used to pass values to function calls are checked against
  86 * function argument constraints.
  87 *
  88 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  89 * It means that the register type passed to this function must be
  90 * PTR_TO_STACK and it will be used inside the function as
  91 * 'pointer to map element key'
  92 *
  93 * For example the argument constraints for bpf_map_lookup_elem():
  94 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
  95 *   .arg1_type = ARG_CONST_MAP_PTR,
  96 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
  97 *
  98 * ret_type says that this function returns 'pointer to map elem value or null'
  99 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 100 * 2nd argument should be a pointer to stack, which will be used inside
 101 * the helper function as a pointer to map element key.
 102 *
 103 * On the kernel side the helper function looks like:
 104 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 105 * {
 106 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 107 *    void *key = (void *) (unsigned long) r2;
 108 *    void *value;
 109 *
 110 *    here kernel can access 'key' and 'map' pointers safely, knowing that
 111 *    [key, key + map->key_size) bytes are valid and were initialized on
 112 *    the stack of eBPF program.
 113 * }
 114 *
 115 * Corresponding eBPF program may look like:
 116 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 117 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 118 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 119 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 120 * here verifier looks at prototype of map_lookup_elem() and sees:
 121 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 122 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 123 *
 124 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 125 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 126 * and were initialized prior to this call.
 127 * If it's ok, then verifier allows this BPF_CALL insn and looks at
 128 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 129 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 130 * returns ether pointer to map value or NULL.
 131 *
 132 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 133 * insn, the register holding that pointer in the true branch changes state to
 134 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 135 * branch. See check_cond_jmp_op().
 136 *
 137 * After the call R0 is set to return type of the function and registers R1-R5
 138 * are set to NOT_INIT to indicate that they are no longer readable.
 139 *
 140 * The following reference types represent a potential reference to a kernel
 141 * resource which, after first being allocated, must be checked and freed by
 142 * the BPF program:
 143 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 144 *
 145 * When the verifier sees a helper call return a reference type, it allocates a
 146 * pointer id for the reference and stores it in the current function state.
 147 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 148 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 149 * passes through a NULL-check conditional. For the branch wherein the state is
 150 * changed to CONST_IMM, the verifier releases the reference.
 151 *
 152 * For each helper function that allocates a reference, such as
 153 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 154 * bpf_sk_release(). When a reference type passes into the release function,
 155 * the verifier also releases the reference. If any unchecked or unreleased
 156 * reference remains at the end of the program, the verifier rejects it.
 157 */
 158
 159/* verifier_state + insn_idx are pushed to stack when branch is encountered */
 160struct bpf_verifier_stack_elem {
 161	/* verifer state is 'st'
 162	 * before processing instruction 'insn_idx'
 163	 * and after processing instruction 'prev_insn_idx'
 164	 */
 165	struct bpf_verifier_state st;
 166	int insn_idx;
 167	int prev_insn_idx;
 168	struct bpf_verifier_stack_elem *next;
 169};
 170
 171#define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
 172#define BPF_COMPLEXITY_LIMIT_STATES	64
 173
 174#define BPF_MAP_PTR_UNPRIV	1UL
 175#define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
 176					  POISON_POINTER_DELTA))
 177#define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 178
 179static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 180{
 181	return BPF_MAP_PTR(aux->map_state) == BPF_MAP_PTR_POISON;
 182}
 183
 184static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 185{
 186	return aux->map_state & BPF_MAP_PTR_UNPRIV;
 187}
 188
 189static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 190			      const struct bpf_map *map, bool unpriv)
 191{
 192	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 193	unpriv |= bpf_map_ptr_unpriv(aux);
 194	aux->map_state = (unsigned long)map |
 195			 (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 196}
 197
 198struct bpf_call_arg_meta {
 199	struct bpf_map *map_ptr;
 200	bool raw_mode;
 201	bool pkt_access;
 202	int regno;
 203	int access_size;
 204	s64 msize_smax_value;
 205	u64 msize_umax_value;
 206	int ref_obj_id;
 207	int func_id;
 208};
 209
 210static DEFINE_MUTEX(bpf_verifier_lock);
 211
 212static const struct bpf_line_info *
 213find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 214{
 215	const struct bpf_line_info *linfo;
 216	const struct bpf_prog *prog;
 217	u32 i, nr_linfo;
 218
 219	prog = env->prog;
 220	nr_linfo = prog->aux->nr_linfo;
 221
 222	if (!nr_linfo || insn_off >= prog->len)
 223		return NULL;
 224
 225	linfo = prog->aux->linfo;
 226	for (i = 1; i < nr_linfo; i++)
 227		if (insn_off < linfo[i].insn_off)
 228			break;
 229
 230	return &linfo[i - 1];
 231}
 232
 233void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 234		       va_list args)
 235{
 236	unsigned int n;
 237
 238	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 239
 240	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 241		  "verifier log line truncated - local buffer too short\n");
 242
 243	n = min(log->len_total - log->len_used - 1, n);
 244	log->kbuf[n] = '\0';
 245
 246	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 247		log->len_used += n;
 248	else
 249		log->ubuf = NULL;
 250}
 251
 252/* log_level controls verbosity level of eBPF verifier.
 253 * bpf_verifier_log_write() is used to dump the verification trace to the log,
 254 * so the user can figure out what's wrong with the program
 255 */
 256__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 257					   const char *fmt, ...)
 258{
 259	va_list args;
 260
 261	if (!bpf_verifier_log_needed(&env->log))
 262		return;
 263
 264	va_start(args, fmt);
 265	bpf_verifier_vlog(&env->log, fmt, args);
 266	va_end(args);
 267}
 268EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 269
 270__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 271{
 272	struct bpf_verifier_env *env = private_data;
 273	va_list args;
 274
 275	if (!bpf_verifier_log_needed(&env->log))
 276		return;
 277
 278	va_start(args, fmt);
 279	bpf_verifier_vlog(&env->log, fmt, args);
 280	va_end(args);
 281}
 282
 283static const char *ltrim(const char *s)
 284{
 285	while (isspace(*s))
 286		s++;
 287
 288	return s;
 289}
 290
 291__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 292					 u32 insn_off,
 293					 const char *prefix_fmt, ...)
 294{
 295	const struct bpf_line_info *linfo;
 296
 297	if (!bpf_verifier_log_needed(&env->log))
 298		return;
 299
 300	linfo = find_linfo(env, insn_off);
 301	if (!linfo || linfo == env->prev_linfo)
 302		return;
 303
 304	if (prefix_fmt) {
 305		va_list args;
 306
 307		va_start(args, prefix_fmt);
 308		bpf_verifier_vlog(&env->log, prefix_fmt, args);
 309		va_end(args);
 310	}
 311
 312	verbose(env, "%s\n",
 313		ltrim(btf_name_by_offset(env->prog->aux->btf,
 314					 linfo->line_off)));
 315
 316	env->prev_linfo = linfo;
 317}
 318
 319static bool type_is_pkt_pointer(enum bpf_reg_type type)
 320{
 321	return type == PTR_TO_PACKET ||
 322	       type == PTR_TO_PACKET_META;
 323}
 324
 325static bool type_is_sk_pointer(enum bpf_reg_type type)
 326{
 327	return type == PTR_TO_SOCKET ||
 328		type == PTR_TO_SOCK_COMMON ||
 329		type == PTR_TO_TCP_SOCK ||
 330		type == PTR_TO_XDP_SOCK;
 331}
 332
 333static bool reg_type_may_be_null(enum bpf_reg_type type)
 334{
 335	return type == PTR_TO_MAP_VALUE_OR_NULL ||
 336	       type == PTR_TO_SOCKET_OR_NULL ||
 337	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
 338	       type == PTR_TO_TCP_SOCK_OR_NULL;
 339}
 340
 341static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 342{
 343	return reg->type == PTR_TO_MAP_VALUE &&
 344		map_value_has_spin_lock(reg->map_ptr);
 345}
 346
 347static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 348{
 349	return type == PTR_TO_SOCKET ||
 350		type == PTR_TO_SOCKET_OR_NULL ||
 351		type == PTR_TO_TCP_SOCK ||
 352		type == PTR_TO_TCP_SOCK_OR_NULL;
 353}
 354
 355static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 356{
 357	return type == ARG_PTR_TO_SOCK_COMMON;
 358}
 359
 360/* Determine whether the function releases some resources allocated by another
 361 * function call. The first reference type argument will be assumed to be
 362 * released by release_reference().
 363 */
 364static bool is_release_function(enum bpf_func_id func_id)
 365{
 366	return func_id == BPF_FUNC_sk_release;
 367}
 368
 369static bool is_acquire_function(enum bpf_func_id func_id)
 370{
 371	return func_id == BPF_FUNC_sk_lookup_tcp ||
 372		func_id == BPF_FUNC_sk_lookup_udp ||
 373		func_id == BPF_FUNC_skc_lookup_tcp;
 374}
 375
 376static bool is_ptr_cast_function(enum bpf_func_id func_id)
 377{
 378	return func_id == BPF_FUNC_tcp_sock ||
 379		func_id == BPF_FUNC_sk_fullsock;
 380}
 381
 382/* string representation of 'enum bpf_reg_type' */
 383static const char * const reg_type_str[] = {
 384	[NOT_INIT]		= "?",
 385	[SCALAR_VALUE]		= "inv",
 386	[PTR_TO_CTX]		= "ctx",
 387	[CONST_PTR_TO_MAP]	= "map_ptr",
 388	[PTR_TO_MAP_VALUE]	= "map_value",
 389	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 390	[PTR_TO_STACK]		= "fp",
 391	[PTR_TO_PACKET]		= "pkt",
 392	[PTR_TO_PACKET_META]	= "pkt_meta",
 393	[PTR_TO_PACKET_END]	= "pkt_end",
 394	[PTR_TO_FLOW_KEYS]	= "flow_keys",
 395	[PTR_TO_SOCKET]		= "sock",
 396	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 397	[PTR_TO_SOCK_COMMON]	= "sock_common",
 398	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 399	[PTR_TO_TCP_SOCK]	= "tcp_sock",
 400	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 401	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 402	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 403};
 404
 405static char slot_type_char[] = {
 406	[STACK_INVALID]	= '?',
 407	[STACK_SPILL]	= 'r',
 408	[STACK_MISC]	= 'm',
 409	[STACK_ZERO]	= '0',
 410};
 411
 412static void print_liveness(struct bpf_verifier_env *env,
 413			   enum bpf_reg_liveness live)
 414{
 415	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 416	    verbose(env, "_");
 417	if (live & REG_LIVE_READ)
 418		verbose(env, "r");
 419	if (live & REG_LIVE_WRITTEN)
 420		verbose(env, "w");
 421	if (live & REG_LIVE_DONE)
 422		verbose(env, "D");
 423}
 424
 425static struct bpf_func_state *func(struct bpf_verifier_env *env,
 426				   const struct bpf_reg_state *reg)
 427{
 428	struct bpf_verifier_state *cur = env->cur_state;
 429
 430	return cur->frame[reg->frameno];
 431}
 432
 433static void print_verifier_state(struct bpf_verifier_env *env,
 434				 const struct bpf_func_state *state)
 435{
 436	const struct bpf_reg_state *reg;
 437	enum bpf_reg_type t;
 438	int i;
 439
 440	if (state->frameno)
 441		verbose(env, " frame%d:", state->frameno);
 442	for (i = 0; i < MAX_BPF_REG; i++) {
 443		reg = &state->regs[i];
 444		t = reg->type;
 445		if (t == NOT_INIT)
 446			continue;
 447		verbose(env, " R%d", i);
 448		print_liveness(env, reg->live);
 449		verbose(env, "=%s", reg_type_str[t]);
 450		if (t == SCALAR_VALUE && reg->precise)
 451			verbose(env, "P");
 452		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 453		    tnum_is_const(reg->var_off)) {
 454			/* reg->off should be 0 for SCALAR_VALUE */
 455			verbose(env, "%lld", reg->var_off.value + reg->off);
 456		} else {
 457			verbose(env, "(id=%d", reg->id);
 458			if (reg_type_may_be_refcounted_or_null(t))
 459				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 460			if (t != SCALAR_VALUE)
 461				verbose(env, ",off=%d", reg->off);
 462			if (type_is_pkt_pointer(t))
 463				verbose(env, ",r=%d", reg->range);
 464			else if (t == CONST_PTR_TO_MAP ||
 465				 t == PTR_TO_MAP_VALUE ||
 466				 t == PTR_TO_MAP_VALUE_OR_NULL)
 467				verbose(env, ",ks=%d,vs=%d",
 468					reg->map_ptr->key_size,
 469					reg->map_ptr->value_size);
 470			if (tnum_is_const(reg->var_off)) {
 471				/* Typically an immediate SCALAR_VALUE, but
 472				 * could be a pointer whose offset is too big
 473				 * for reg->off
 474				 */
 475				verbose(env, ",imm=%llx", reg->var_off.value);
 476			} else {
 477				if (reg->smin_value != reg->umin_value &&
 478				    reg->smin_value != S64_MIN)
 479					verbose(env, ",smin_value=%lld",
 480						(long long)reg->smin_value);
 481				if (reg->smax_value != reg->umax_value &&
 482				    reg->smax_value != S64_MAX)
 483					verbose(env, ",smax_value=%lld",
 484						(long long)reg->smax_value);
 485				if (reg->umin_value != 0)
 486					verbose(env, ",umin_value=%llu",
 487						(unsigned long long)reg->umin_value);
 488				if (reg->umax_value != U64_MAX)
 489					verbose(env, ",umax_value=%llu",
 490						(unsigned long long)reg->umax_value);
 491				if (!tnum_is_unknown(reg->var_off)) {
 492					char tn_buf[48];
 493
 494					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 495					verbose(env, ",var_off=%s", tn_buf);
 496				}
 497			}
 498			verbose(env, ")");
 499		}
 500	}
 501	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 502		char types_buf[BPF_REG_SIZE + 1];
 503		bool valid = false;
 504		int j;
 505
 506		for (j = 0; j < BPF_REG_SIZE; j++) {
 507			if (state->stack[i].slot_type[j] != STACK_INVALID)
 508				valid = true;
 509			types_buf[j] = slot_type_char[
 510					state->stack[i].slot_type[j]];
 511		}
 512		types_buf[BPF_REG_SIZE] = 0;
 513		if (!valid)
 514			continue;
 515		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 516		print_liveness(env, state->stack[i].spilled_ptr.live);
 517		if (state->stack[i].slot_type[0] == STACK_SPILL) {
 518			reg = &state->stack[i].spilled_ptr;
 519			t = reg->type;
 520			verbose(env, "=%s", reg_type_str[t]);
 521			if (t == SCALAR_VALUE && reg->precise)
 522				verbose(env, "P");
 523			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 524				verbose(env, "%lld", reg->var_off.value + reg->off);
 525		} else {
 526			verbose(env, "=%s", types_buf);
 527		}
 528	}
 529	if (state->acquired_refs && state->refs[0].id) {
 530		verbose(env, " refs=%d", state->refs[0].id);
 531		for (i = 1; i < state->acquired_refs; i++)
 532			if (state->refs[i].id)
 533				verbose(env, ",%d", state->refs[i].id);
 534	}
 535	verbose(env, "\n");
 536}
 537
 538#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
 539static int copy_##NAME##_state(struct bpf_func_state *dst,		\
 540			       const struct bpf_func_state *src)	\
 541{									\
 542	if (!src->FIELD)						\
 543		return 0;						\
 544	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
 545		/* internal bug, make state invalid to reject the program */ \
 546		memset(dst, 0, sizeof(*dst));				\
 547		return -EFAULT;						\
 548	}								\
 549	memcpy(dst->FIELD, src->FIELD,					\
 550	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
 551	return 0;							\
 552}
 553/* copy_reference_state() */
 554COPY_STATE_FN(reference, acquired_refs, refs, 1)
 555/* copy_stack_state() */
 556COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 557#undef COPY_STATE_FN
 558
 559#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
 560static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
 561				  bool copy_old)			\
 562{									\
 563	u32 old_size = state->COUNT;					\
 564	struct bpf_##NAME##_state *new_##FIELD;				\
 565	int slot = size / SIZE;						\
 566									\
 567	if (size <= old_size || !size) {				\
 568		if (copy_old)						\
 569			return 0;					\
 570		state->COUNT = slot * SIZE;				\
 571		if (!size && old_size) {				\
 572			kfree(state->FIELD);				\
 573			state->FIELD = NULL;				\
 574		}							\
 575		return 0;						\
 576	}								\
 577	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
 578				    GFP_KERNEL);			\
 579	if (!new_##FIELD)						\
 580		return -ENOMEM;						\
 581	if (copy_old) {							\
 582		if (state->FIELD)					\
 583			memcpy(new_##FIELD, state->FIELD,		\
 584			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
 585		memset(new_##FIELD + old_size / SIZE, 0,		\
 586		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
 587	}								\
 588	state->COUNT = slot * SIZE;					\
 589	kfree(state->FIELD);						\
 590	state->FIELD = new_##FIELD;					\
 591	return 0;							\
 592}
 593/* realloc_reference_state() */
 594REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
 595/* realloc_stack_state() */
 596REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 597#undef REALLOC_STATE_FN
 598
 599/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 600 * make it consume minimal amount of memory. check_stack_write() access from
 601 * the program calls into realloc_func_state() to grow the stack size.
 602 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 603 * which realloc_stack_state() copies over. It points to previous
 604 * bpf_verifier_state which is never reallocated.
 605 */
 606static int realloc_func_state(struct bpf_func_state *state, int stack_size,
 607			      int refs_size, bool copy_old)
 608{
 609	int err = realloc_reference_state(state, refs_size, copy_old);
 610	if (err)
 611		return err;
 612	return realloc_stack_state(state, stack_size, copy_old);
 613}
 614
 615/* Acquire a pointer id from the env and update the state->refs to include
 616 * this new pointer reference.
 617 * On success, returns a valid pointer id to associate with the register
 618 * On failure, returns a negative errno.
 619 */
 620static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 621{
 622	struct bpf_func_state *state = cur_func(env);
 623	int new_ofs = state->acquired_refs;
 624	int id, err;
 625
 626	err = realloc_reference_state(state, state->acquired_refs + 1, true);
 627	if (err)
 628		return err;
 629	id = ++env->id_gen;
 630	state->refs[new_ofs].id = id;
 631	state->refs[new_ofs].insn_idx = insn_idx;
 632
 633	return id;
 634}
 635
 636/* release function corresponding to acquire_reference_state(). Idempotent. */
 637static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 638{
 639	int i, last_idx;
 640
 641	last_idx = state->acquired_refs - 1;
 642	for (i = 0; i < state->acquired_refs; i++) {
 643		if (state->refs[i].id == ptr_id) {
 644			if (last_idx && i != last_idx)
 645				memcpy(&state->refs[i], &state->refs[last_idx],
 646				       sizeof(*state->refs));
 647			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 648			state->acquired_refs--;
 649			return 0;
 650		}
 651	}
 652	return -EINVAL;
 653}
 654
 655static int transfer_reference_state(struct bpf_func_state *dst,
 656				    struct bpf_func_state *src)
 657{
 658	int err = realloc_reference_state(dst, src->acquired_refs, false);
 659	if (err)
 660		return err;
 661	err = copy_reference_state(dst, src);
 662	if (err)
 663		return err;
 664	return 0;
 665}
 666
 667static void free_func_state(struct bpf_func_state *state)
 668{
 669	if (!state)
 670		return;
 671	kfree(state->refs);
 672	kfree(state->stack);
 673	kfree(state);
 674}
 675
 676static void clear_jmp_history(struct bpf_verifier_state *state)
 677{
 678	kfree(state->jmp_history);
 679	state->jmp_history = NULL;
 680	state->jmp_history_cnt = 0;
 681}
 682
 683static void free_verifier_state(struct bpf_verifier_state *state,
 684				bool free_self)
 685{
 686	int i;
 687
 688	for (i = 0; i <= state->curframe; i++) {
 689		free_func_state(state->frame[i]);
 690		state->frame[i] = NULL;
 691	}
 692	clear_jmp_history(state);
 693	if (free_self)
 694		kfree(state);
 695}
 696
 697/* copy verifier state from src to dst growing dst stack space
 698 * when necessary to accommodate larger src stack
 699 */
 700static int copy_func_state(struct bpf_func_state *dst,
 701			   const struct bpf_func_state *src)
 702{
 703	int err;
 704
 705	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
 706				 false);
 707	if (err)
 708		return err;
 709	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 710	err = copy_reference_state(dst, src);
 711	if (err)
 712		return err;
 713	return copy_stack_state(dst, src);
 714}
 715
 716static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 717			       const struct bpf_verifier_state *src)
 718{
 719	struct bpf_func_state *dst;
 720	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
 721	int i, err;
 722
 723	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
 724		kfree(dst_state->jmp_history);
 725		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
 726		if (!dst_state->jmp_history)
 727			return -ENOMEM;
 728	}
 729	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
 730	dst_state->jmp_history_cnt = src->jmp_history_cnt;
 731
 732	/* if dst has more stack frames then src frame, free them */
 733	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 734		free_func_state(dst_state->frame[i]);
 735		dst_state->frame[i] = NULL;
 736	}
 737	dst_state->speculative = src->speculative;
 738	dst_state->curframe = src->curframe;
 739	dst_state->active_spin_lock = src->active_spin_lock;
 740	dst_state->branches = src->branches;
 741	dst_state->parent = src->parent;
 742	dst_state->first_insn_idx = src->first_insn_idx;
 743	dst_state->last_insn_idx = src->last_insn_idx;
 744	for (i = 0; i <= src->curframe; i++) {
 745		dst = dst_state->frame[i];
 746		if (!dst) {
 747			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 748			if (!dst)
 749				return -ENOMEM;
 750			dst_state->frame[i] = dst;
 751		}
 752		err = copy_func_state(dst, src->frame[i]);
 753		if (err)
 754			return err;
 755	}
 756	return 0;
 757}
 758
 759static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 760{
 761	while (st) {
 762		u32 br = --st->branches;
 763
 764		/* WARN_ON(br > 1) technically makes sense here,
 765		 * but see comment in push_stack(), hence:
 766		 */
 767		WARN_ONCE((int)br < 0,
 768			  "BUG update_branch_counts:branches_to_explore=%d\n",
 769			  br);
 770		if (br)
 771			break;
 772		st = st->parent;
 773	}
 774}
 775
 776static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 777		     int *insn_idx)
 778{
 779	struct bpf_verifier_state *cur = env->cur_state;
 780	struct bpf_verifier_stack_elem *elem, *head = env->head;
 781	int err;
 782
 783	if (env->head == NULL)
 784		return -ENOENT;
 785
 786	if (cur) {
 787		err = copy_verifier_state(cur, &head->st);
 788		if (err)
 789			return err;
 790	}
 791	if (insn_idx)
 792		*insn_idx = head->insn_idx;
 793	if (prev_insn_idx)
 794		*prev_insn_idx = head->prev_insn_idx;
 795	elem = head->next;
 796	free_verifier_state(&head->st, false);
 797	kfree(head);
 798	env->head = elem;
 799	env->stack_size--;
 800	return 0;
 801}
 802
 803static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 804					     int insn_idx, int prev_insn_idx,
 805					     bool speculative)
 806{
 807	struct bpf_verifier_state *cur = env->cur_state;
 808	struct bpf_verifier_stack_elem *elem;
 809	int err;
 810
 811	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 812	if (!elem)
 813		goto err;
 814
 815	elem->insn_idx = insn_idx;
 816	elem->prev_insn_idx = prev_insn_idx;
 817	elem->next = env->head;
 818	env->head = elem;
 819	env->stack_size++;
 820	err = copy_verifier_state(&elem->st, cur);
 821	if (err)
 822		goto err;
 823	elem->st.speculative |= speculative;
 824	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 825		verbose(env, "The sequence of %d jumps is too complex.\n",
 826			env->stack_size);
 827		goto err;
 828	}
 829	if (elem->st.parent) {
 830		++elem->st.parent->branches;
 831		/* WARN_ON(branches > 2) technically makes sense here,
 832		 * but
 833		 * 1. speculative states will bump 'branches' for non-branch
 834		 * instructions
 835		 * 2. is_state_visited() heuristics may decide not to create
 836		 * a new state for a sequence of branches and all such current
 837		 * and cloned states will be pointing to a single parent state
 838		 * which might have large 'branches' count.
 839		 */
 840	}
 841	return &elem->st;
 842err:
 843	free_verifier_state(env->cur_state, true);
 844	env->cur_state = NULL;
 845	/* pop all elements and return */
 846	while (!pop_stack(env, NULL, NULL));
 847	return NULL;
 848}
 849
 850#define CALLER_SAVED_REGS 6
 851static const int caller_saved[CALLER_SAVED_REGS] = {
 852	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
 853};
 854
 855static void __mark_reg_not_init(struct bpf_reg_state *reg);
 856
 857/* Mark the unknown part of a register (variable offset or scalar value) as
 858 * known to have the value @imm.
 859 */
 860static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
 861{
 862	/* Clear id, off, and union(map_ptr, range) */
 863	memset(((u8 *)reg) + sizeof(reg->type), 0,
 864	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
 865	reg->var_off = tnum_const(imm);
 866	reg->smin_value = (s64)imm;
 867	reg->smax_value = (s64)imm;
 868	reg->umin_value = imm;
 869	reg->umax_value = imm;
 870}
 871
 872/* Mark the 'variable offset' part of a register as zero.  This should be
 873 * used only on registers holding a pointer type.
 874 */
 875static void __mark_reg_known_zero(struct bpf_reg_state *reg)
 876{
 877	__mark_reg_known(reg, 0);
 878}
 879
 880static void __mark_reg_const_zero(struct bpf_reg_state *reg)
 881{
 882	__mark_reg_known(reg, 0);
 883	reg->type = SCALAR_VALUE;
 884}
 885
 886static void mark_reg_known_zero(struct bpf_verifier_env *env,
 887				struct bpf_reg_state *regs, u32 regno)
 888{
 889	if (WARN_ON(regno >= MAX_BPF_REG)) {
 890		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
 891		/* Something bad happened, let's kill all regs */
 892		for (regno = 0; regno < MAX_BPF_REG; regno++)
 893			__mark_reg_not_init(regs + regno);
 894		return;
 895	}
 896	__mark_reg_known_zero(regs + regno);
 897}
 898
 899static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
 900{
 901	return type_is_pkt_pointer(reg->type);
 902}
 903
 904static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
 905{
 906	return reg_is_pkt_pointer(reg) ||
 907	       reg->type == PTR_TO_PACKET_END;
 908}
 909
 910/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
 911static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
 912				    enum bpf_reg_type which)
 913{
 914	/* The register can already have a range from prior markings.
 915	 * This is fine as long as it hasn't been advanced from its
 916	 * origin.
 917	 */
 918	return reg->type == which &&
 919	       reg->id == 0 &&
 920	       reg->off == 0 &&
 921	       tnum_equals_const(reg->var_off, 0);
 922}
 923
 924/* Attempts to improve min/max values based on var_off information */
 925static void __update_reg_bounds(struct bpf_reg_state *reg)
 926{
 927	/* min signed is max(sign bit) | min(other bits) */
 928	reg->smin_value = max_t(s64, reg->smin_value,
 929				reg->var_off.value | (reg->var_off.mask & S64_MIN));
 930	/* max signed is min(sign bit) | max(other bits) */
 931	reg->smax_value = min_t(s64, reg->smax_value,
 932				reg->var_off.value | (reg->var_off.mask & S64_MAX));
 933	reg->umin_value = max(reg->umin_value, reg->var_off.value);
 934	reg->umax_value = min(reg->umax_value,
 935			      reg->var_off.value | reg->var_off.mask);
 936}
 937
 938/* Uses signed min/max values to inform unsigned, and vice-versa */
 939static void __reg_deduce_bounds(struct bpf_reg_state *reg)
 940{
 941	/* Learn sign from signed bounds.
 942	 * If we cannot cross the sign boundary, then signed and unsigned bounds
 943	 * are the same, so combine.  This works even in the negative case, e.g.
 944	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
 945	 */
 946	if (reg->smin_value >= 0 || reg->smax_value < 0) {
 947		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 948							  reg->umin_value);
 949		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 950							  reg->umax_value);
 951		return;
 952	}
 953	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
 954	 * boundary, so we must be careful.
 955	 */
 956	if ((s64)reg->umax_value >= 0) {
 957		/* Positive.  We can't learn anything from the smin, but smax
 958		 * is positive, hence safe.
 959		 */
 960		reg->smin_value = reg->umin_value;
 961		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
 962							  reg->umax_value);
 963	} else if ((s64)reg->umin_value < 0) {
 964		/* Negative.  We can't learn anything from the smax, but smin
 965		 * is negative, hence safe.
 966		 */
 967		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
 968							  reg->umin_value);
 969		reg->smax_value = reg->umax_value;
 970	}
 971}
 972
 973/* Attempts to improve var_off based on unsigned min/max information */
 974static void __reg_bound_offset(struct bpf_reg_state *reg)
 975{
 976	reg->var_off = tnum_intersect(reg->var_off,
 977				      tnum_range(reg->umin_value,
 978						 reg->umax_value));
 979}
 980
 981/* Reset the min/max bounds of a register */
 982static void __mark_reg_unbounded(struct bpf_reg_state *reg)
 983{
 984	reg->smin_value = S64_MIN;
 985	reg->smax_value = S64_MAX;
 986	reg->umin_value = 0;
 987	reg->umax_value = U64_MAX;
 988}
 989
 990/* Mark a register as having a completely unknown (scalar) value. */
 991static void __mark_reg_unknown(struct bpf_reg_state *reg)
 992{
 993	/*
 994	 * Clear type, id, off, and union(map_ptr, range) and
 995	 * padding between 'type' and union
 996	 */
 997	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
 998	reg->type = SCALAR_VALUE;
 999	reg->var_off = tnum_unknown;
1000	reg->frameno = 0;
1001	__mark_reg_unbounded(reg);
1002}
1003
1004static void mark_reg_unknown(struct bpf_verifier_env *env,
1005			     struct bpf_reg_state *regs, u32 regno)
1006{
1007	if (WARN_ON(regno >= MAX_BPF_REG)) {
1008		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1009		/* Something bad happened, let's kill all regs except FP */
1010		for (regno = 0; regno < BPF_REG_FP; regno++)
1011			__mark_reg_not_init(regs + regno);
1012		return;
1013	}
1014	regs += regno;
1015	__mark_reg_unknown(regs);
1016	/* constant backtracking is enabled for root without bpf2bpf calls */
1017	regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
1018			true : false;
1019}
1020
1021static void __mark_reg_not_init(struct bpf_reg_state *reg)
1022{
1023	__mark_reg_unknown(reg);
1024	reg->type = NOT_INIT;
1025}
1026
1027static void mark_reg_not_init(struct bpf_verifier_env *env,
1028			      struct bpf_reg_state *regs, u32 regno)
1029{
1030	if (WARN_ON(regno >= MAX_BPF_REG)) {
1031		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1032		/* Something bad happened, let's kill all regs except FP */
1033		for (regno = 0; regno < BPF_REG_FP; regno++)
1034			__mark_reg_not_init(regs + regno);
1035		return;
1036	}
1037	__mark_reg_not_init(regs + regno);
1038}
1039
1040#define DEF_NOT_SUBREG	(0)
1041static void init_reg_state(struct bpf_verifier_env *env,
1042			   struct bpf_func_state *state)
1043{
1044	struct bpf_reg_state *regs = state->regs;
1045	int i;
1046
1047	for (i = 0; i < MAX_BPF_REG; i++) {
1048		mark_reg_not_init(env, regs, i);
1049		regs[i].live = REG_LIVE_NONE;
1050		regs[i].parent = NULL;
1051		regs[i].subreg_def = DEF_NOT_SUBREG;
1052	}
1053
1054	/* frame pointer */
1055	regs[BPF_REG_FP].type = PTR_TO_STACK;
1056	mark_reg_known_zero(env, regs, BPF_REG_FP);
1057	regs[BPF_REG_FP].frameno = state->frameno;
1058
1059	/* 1st arg to a function */
1060	regs[BPF_REG_1].type = PTR_TO_CTX;
1061	mark_reg_known_zero(env, regs, BPF_REG_1);
1062}
1063
1064#define BPF_MAIN_FUNC (-1)
1065static void init_func_state(struct bpf_verifier_env *env,
1066			    struct bpf_func_state *state,
1067			    int callsite, int frameno, int subprogno)
1068{
1069	state->callsite = callsite;
1070	state->frameno = frameno;
1071	state->subprogno = subprogno;
1072	init_reg_state(env, state);
1073}
1074
1075enum reg_arg_type {
1076	SRC_OP,		/* register is used as source operand */
1077	DST_OP,		/* register is used as destination operand */
1078	DST_OP_NO_MARK	/* same as above, check only, don't mark */
1079};
1080
1081static int cmp_subprogs(const void *a, const void *b)
1082{
1083	return ((struct bpf_subprog_info *)a)->start -
1084	       ((struct bpf_subprog_info *)b)->start;
1085}
1086
1087static int find_subprog(struct bpf_verifier_env *env, int off)
1088{
1089	struct bpf_subprog_info *p;
1090
1091	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1092		    sizeof(env->subprog_info[0]), cmp_subprogs);
1093	if (!p)
1094		return -ENOENT;
1095	return p - env->subprog_info;
1096
1097}
1098
1099static int add_subprog(struct bpf_verifier_env *env, int off)
1100{
1101	int insn_cnt = env->prog->len;
1102	int ret;
1103
1104	if (off >= insn_cnt || off < 0) {
1105		verbose(env, "call to invalid destination\n");
1106		return -EINVAL;
1107	}
1108	ret = find_subprog(env, off);
1109	if (ret >= 0)
1110		return 0;
1111	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1112		verbose(env, "too many subprograms\n");
1113		return -E2BIG;
1114	}
1115	env->subprog_info[env->subprog_cnt++].start = off;
1116	sort(env->subprog_info, env->subprog_cnt,
1117	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1118	return 0;
1119}
1120
1121static int check_subprogs(struct bpf_verifier_env *env)
1122{
1123	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1124	struct bpf_subprog_info *subprog = env->subprog_info;
1125	struct bpf_insn *insn = env->prog->insnsi;
1126	int insn_cnt = env->prog->len;
1127
1128	/* Add entry function. */
1129	ret = add_subprog(env, 0);
1130	if (ret < 0)
1131		return ret;
1132
1133	/* determine subprog starts. The end is one before the next starts */
1134	for (i = 0; i < insn_cnt; i++) {
1135		if (insn[i].code != (BPF_JMP | BPF_CALL))
1136			continue;
1137		if (insn[i].src_reg != BPF_PSEUDO_CALL)
1138			continue;
1139		if (!env->allow_ptr_leaks) {
1140			verbose(env, "function calls to other bpf functions are allowed for root only\n");
1141			return -EPERM;
1142		}
1143		ret = add_subprog(env, i + insn[i].imm + 1);
1144		if (ret < 0)
1145			return ret;
1146	}
1147
1148	/* Add a fake 'exit' subprog which could simplify subprog iteration
1149	 * logic. 'subprog_cnt' should not be increased.
1150	 */
1151	subprog[env->subprog_cnt].start = insn_cnt;
1152
1153	if (env->log.level & BPF_LOG_LEVEL2)
1154		for (i = 0; i < env->subprog_cnt; i++)
1155			verbose(env, "func#%d @%d\n", i, subprog[i].start);
1156
1157	/* now check that all jumps are within the same subprog */
1158	subprog_start = subprog[cur_subprog].start;
1159	subprog_end = subprog[cur_subprog + 1].start;
1160	for (i = 0; i < insn_cnt; i++) {
1161		u8 code = insn[i].code;
1162
1163		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1164			goto next;
1165		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1166			goto next;
1167		off = i + insn[i].off + 1;
1168		if (off < subprog_start || off >= subprog_end) {
1169			verbose(env, "jump out of range from insn %d to %d\n", i, off);
1170			return -EINVAL;
1171		}
1172next:
1173		if (i == subprog_end - 1) {
1174			/* to avoid fall-through from one subprog into another
1175			 * the last insn of the subprog should be either exit
1176			 * or unconditional jump back
1177			 */
1178			if (code != (BPF_JMP | BPF_EXIT) &&
1179			    code != (BPF_JMP | BPF_JA)) {
1180				verbose(env, "last insn is not an exit or jmp\n");
1181				return -EINVAL;
1182			}
1183			subprog_start = subprog_end;
1184			cur_subprog++;
1185			if (cur_subprog < env->subprog_cnt)
1186				subprog_end = subprog[cur_subprog + 1].start;
1187		}
1188	}
1189	return 0;
1190}
1191
1192/* Parentage chain of this register (or stack slot) should take care of all
1193 * issues like callee-saved registers, stack slot allocation time, etc.
1194 */
1195static int mark_reg_read(struct bpf_verifier_env *env,
1196			 const struct bpf_reg_state *state,
1197			 struct bpf_reg_state *parent, u8 flag)
1198{
1199	bool writes = parent == state->parent; /* Observe write marks */
1200	int cnt = 0;
1201
1202	while (parent) {
1203		/* if read wasn't screened by an earlier write ... */
1204		if (writes && state->live & REG_LIVE_WRITTEN)
1205			break;
1206		if (parent->live & REG_LIVE_DONE) {
1207			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1208				reg_type_str[parent->type],
1209				parent->var_off.value, parent->off);
1210			return -EFAULT;
1211		}
1212		/* The first condition is more likely to be true than the
1213		 * second, checked it first.
1214		 */
1215		if ((parent->live & REG_LIVE_READ) == flag ||
1216		    parent->live & REG_LIVE_READ64)
1217			/* The parentage chain never changes and
1218			 * this parent was already marked as LIVE_READ.
1219			 * There is no need to keep walking the chain again and
1220			 * keep re-marking all parents as LIVE_READ.
1221			 * This case happens when the same register is read
1222			 * multiple times without writes into it in-between.
1223			 * Also, if parent has the stronger REG_LIVE_READ64 set,
1224			 * then no need to set the weak REG_LIVE_READ32.
1225			 */
1226			break;
1227		/* ... then we depend on parent's value */
1228		parent->live |= flag;
1229		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1230		if (flag == REG_LIVE_READ64)
1231			parent->live &= ~REG_LIVE_READ32;
1232		state = parent;
1233		parent = state->parent;
1234		writes = true;
1235		cnt++;
1236	}
1237
1238	if (env->longest_mark_read_walk < cnt)
1239		env->longest_mark_read_walk = cnt;
1240	return 0;
1241}
1242
1243/* This function is supposed to be used by the following 32-bit optimization
1244 * code only. It returns TRUE if the source or destination register operates
1245 * on 64-bit, otherwise return FALSE.
1246 */
1247static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1248		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1249{
1250	u8 code, class, op;
1251
1252	code = insn->code;
1253	class = BPF_CLASS(code);
1254	op = BPF_OP(code);
1255	if (class == BPF_JMP) {
1256		/* BPF_EXIT for "main" will reach here. Return TRUE
1257		 * conservatively.
1258		 */
1259		if (op == BPF_EXIT)
1260			return true;
1261		if (op == BPF_CALL) {
1262			/* BPF to BPF call will reach here because of marking
1263			 * caller saved clobber with DST_OP_NO_MARK for which we
1264			 * don't care the register def because they are anyway
1265			 * marked as NOT_INIT already.
1266			 */
1267			if (insn->src_reg == BPF_PSEUDO_CALL)
1268				return false;
1269			/* Helper call will reach here because of arg type
1270			 * check, conservatively return TRUE.
1271			 */
1272			if (t == SRC_OP)
1273				return true;
1274
1275			return false;
1276		}
1277	}
1278
1279	if (class == BPF_ALU64 || class == BPF_JMP ||
1280	    /* BPF_END always use BPF_ALU class. */
1281	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1282		return true;
1283
1284	if (class == BPF_ALU || class == BPF_JMP32)
1285		return false;
1286
1287	if (class == BPF_LDX) {
1288		if (t != SRC_OP)
1289			return BPF_SIZE(code) == BPF_DW;
1290		/* LDX source must be ptr. */
1291		return true;
1292	}
1293
1294	if (class == BPF_STX) {
1295		if (reg->type != SCALAR_VALUE)
1296			return true;
1297		return BPF_SIZE(code) == BPF_DW;
1298	}
1299
1300	if (class == BPF_LD) {
1301		u8 mode = BPF_MODE(code);
1302
1303		/* LD_IMM64 */
1304		if (mode == BPF_IMM)
1305			return true;
1306
1307		/* Both LD_IND and LD_ABS return 32-bit data. */
1308		if (t != SRC_OP)
1309			return  false;
1310
1311		/* Implicit ctx ptr. */
1312		if (regno == BPF_REG_6)
1313			return true;
1314
1315		/* Explicit source could be any width. */
1316		return true;
1317	}
1318
1319	if (class == BPF_ST)
1320		/* The only source register for BPF_ST is a ptr. */
1321		return true;
1322
1323	/* Conservatively return true at default. */
1324	return true;
1325}
1326
1327/* Return TRUE if INSN doesn't have explicit value define. */
1328static bool insn_no_def(struct bpf_insn *insn)
1329{
1330	u8 class = BPF_CLASS(insn->code);
1331
1332	return (class == BPF_JMP || class == BPF_JMP32 ||
1333		class == BPF_STX || class == BPF_ST);
1334}
1335
1336/* Return TRUE if INSN has defined any 32-bit value explicitly. */
1337static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1338{
1339	if (insn_no_def(insn))
1340		return false;
1341
1342	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1343}
1344
1345static void mark_insn_zext(struct bpf_verifier_env *env,
1346			   struct bpf_reg_state *reg)
1347{
1348	s32 def_idx = reg->subreg_def;
1349
1350	if (def_idx == DEF_NOT_SUBREG)
1351		return;
1352
1353	env->insn_aux_data[def_idx - 1].zext_dst = true;
1354	/* The dst will be zero extended, so won't be sub-register anymore. */
1355	reg->subreg_def = DEF_NOT_SUBREG;
1356}
1357
1358static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1359			 enum reg_arg_type t)
1360{
1361	struct bpf_verifier_state *vstate = env->cur_state;
1362	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1363	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1364	struct bpf_reg_state *reg, *regs = state->regs;
1365	bool rw64;
1366
1367	if (regno >= MAX_BPF_REG) {
1368		verbose(env, "R%d is invalid\n", regno);
1369		return -EINVAL;
1370	}
1371
1372	reg = &regs[regno];
1373	rw64 = is_reg64(env, insn, regno, reg, t);
1374	if (t == SRC_OP) {
1375		/* check whether register used as source operand can be read */
1376		if (reg->type == NOT_INIT) {
1377			verbose(env, "R%d !read_ok\n", regno);
1378			return -EACCES;
1379		}
1380		/* We don't need to worry about FP liveness because it's read-only */
1381		if (regno == BPF_REG_FP)
1382			return 0;
1383
1384		if (rw64)
1385			mark_insn_zext(env, reg);
1386
1387		return mark_reg_read(env, reg, reg->parent,
1388				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1389	} else {
1390		/* check whether register used as dest operand can be written to */
1391		if (regno == BPF_REG_FP) {
1392			verbose(env, "frame pointer is read only\n");
1393			return -EACCES;
1394		}
1395		reg->live |= REG_LIVE_WRITTEN;
1396		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1397		if (t == DST_OP)
1398			mark_reg_unknown(env, regs, regno);
1399	}
1400	return 0;
1401}
1402
1403/* for any branch, call, exit record the history of jmps in the given state */
1404static int push_jmp_history(struct bpf_verifier_env *env,
1405			    struct bpf_verifier_state *cur)
1406{
1407	u32 cnt = cur->jmp_history_cnt;
1408	struct bpf_idx_pair *p;
1409
1410	cnt++;
1411	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1412	if (!p)
1413		return -ENOMEM;
1414	p[cnt - 1].idx = env->insn_idx;
1415	p[cnt - 1].prev_idx = env->prev_insn_idx;
1416	cur->jmp_history = p;
1417	cur->jmp_history_cnt = cnt;
1418	return 0;
1419}
1420
1421/* Backtrack one insn at a time. If idx is not at the top of recorded
1422 * history then previous instruction came from straight line execution.
1423 */
1424static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1425			     u32 *history)
1426{
1427	u32 cnt = *history;
1428
1429	if (cnt && st->jmp_history[cnt - 1].idx == i) {
1430		i = st->jmp_history[cnt - 1].prev_idx;
1431		(*history)--;
1432	} else {
1433		i--;
1434	}
1435	return i;
1436}
1437
1438/* For given verifier state backtrack_insn() is called from the last insn to
1439 * the first insn. Its purpose is to compute a bitmask of registers and
1440 * stack slots that needs precision in the parent verifier state.
1441 */
1442static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1443			  u32 *reg_mask, u64 *stack_mask)
1444{
1445	const struct bpf_insn_cbs cbs = {
1446		.cb_print	= verbose,
1447		.private_data	= env,
1448	};
1449	struct bpf_insn *insn = env->prog->insnsi + idx;
1450	u8 class = BPF_CLASS(insn->code);
1451	u8 opcode = BPF_OP(insn->code);
1452	u8 mode = BPF_MODE(insn->code);
1453	u32 dreg = 1u << insn->dst_reg;
1454	u32 sreg = 1u << insn->src_reg;
1455	u32 spi;
1456
1457	if (insn->code == 0)
1458		return 0;
1459	if (env->log.level & BPF_LOG_LEVEL) {
1460		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1461		verbose(env, "%d: ", idx);
1462		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1463	}
1464
1465	if (class == BPF_ALU || class == BPF_ALU64) {
1466		if (!(*reg_mask & dreg))
1467			return 0;
1468		if (opcode == BPF_MOV) {
1469			if (BPF_SRC(insn->code) == BPF_X) {
1470				/* dreg = sreg
1471				 * dreg needs precision after this insn
1472				 * sreg needs precision before this insn
1473				 */
1474				*reg_mask &= ~dreg;
1475				*reg_mask |= sreg;
1476			} else {
1477				/* dreg = K
1478				 * dreg needs precision after this insn.
1479				 * Corresponding register is already marked
1480				 * as precise=true in this verifier state.
1481				 * No further markings in parent are necessary
1482				 */
1483				*reg_mask &= ~dreg;
1484			}
1485		} else {
1486			if (BPF_SRC(insn->code) == BPF_X) {
1487				/* dreg += sreg
1488				 * both dreg and sreg need precision
1489				 * before this insn
1490				 */
1491				*reg_mask |= sreg;
1492			} /* else dreg += K
1493			   * dreg still needs precision before this insn
1494			   */
1495		}
1496	} else if (class == BPF_LDX) {
1497		if (!(*reg_mask & dreg))
1498			return 0;
1499		*reg_mask &= ~dreg;
1500
1501		/* scalars can only be spilled into stack w/o losing precision.
1502		 * Load from any other memory can be zero extended.
1503		 * The desire to keep that precision is already indicated
1504		 * by 'precise' mark in corresponding register of this state.
1505		 * No further tracking necessary.
1506		 */
1507		if (insn->src_reg != BPF_REG_FP)
1508			return 0;
1509		if (BPF_SIZE(insn->code) != BPF_DW)
1510			return 0;
1511
1512		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
1513		 * that [fp - off] slot contains scalar that needs to be
1514		 * tracked with precision
1515		 */
1516		spi = (-insn->off - 1) / BPF_REG_SIZE;
1517		if (spi >= 64) {
1518			verbose(env, "BUG spi %d\n", spi);
1519			WARN_ONCE(1, "verifier backtracking bug");
1520			return -EFAULT;
1521		}
1522		*stack_mask |= 1ull << spi;
1523	} else if (class == BPF_STX || class == BPF_ST) {
1524		if (*reg_mask & dreg)
1525			/* stx & st shouldn't be using _scalar_ dst_reg
1526			 * to access memory. It means backtracking
1527			 * encountered a case of pointer subtraction.
1528			 */
1529			return -ENOTSUPP;
1530		/* scalars can only be spilled into stack */
1531		if (insn->dst_reg != BPF_REG_FP)
1532			return 0;
1533		if (BPF_SIZE(insn->code) != BPF_DW)
1534			return 0;
1535		spi = (-insn->off - 1) / BPF_REG_SIZE;
1536		if (spi >= 64) {
1537			verbose(env, "BUG spi %d\n", spi);
1538			WARN_ONCE(1, "verifier backtracking bug");
1539			return -EFAULT;
1540		}
1541		if (!(*stack_mask & (1ull << spi)))
1542			return 0;
1543		*stack_mask &= ~(1ull << spi);
1544		if (class == BPF_STX)
1545			*reg_mask |= sreg;
1546	} else if (class == BPF_JMP || class == BPF_JMP32) {
1547		if (opcode == BPF_CALL) {
1548			if (insn->src_reg == BPF_PSEUDO_CALL)
1549				return -ENOTSUPP;
1550			/* regular helper call sets R0 */
1551			*reg_mask &= ~1;
1552			if (*reg_mask & 0x3f) {
1553				/* if backtracing was looking for registers R1-R5
1554				 * they should have been found already.
1555				 */
1556				verbose(env, "BUG regs %x\n", *reg_mask);
1557				WARN_ONCE(1, "verifier backtracking bug");
1558				return -EFAULT;
1559			}
1560		} else if (opcode == BPF_EXIT) {
1561			return -ENOTSUPP;
1562		}
1563	} else if (class == BPF_LD) {
1564		if (!(*reg_mask & dreg))
1565			return 0;
1566		*reg_mask &= ~dreg;
1567		/* It's ld_imm64 or ld_abs or ld_ind.
1568		 * For ld_imm64 no further tracking of precision
1569		 * into parent is necessary
1570		 */
1571		if (mode == BPF_IND || mode == BPF_ABS)
1572			/* to be analyzed */
1573			return -ENOTSUPP;
1574	}
1575	return 0;
1576}
1577
1578/* the scalar precision tracking algorithm:
1579 * . at the start all registers have precise=false.
1580 * . scalar ranges are tracked as normal through alu and jmp insns.
1581 * . once precise value of the scalar register is used in:
1582 *   .  ptr + scalar alu
1583 *   . if (scalar cond K|scalar)
1584 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1585 *   backtrack through the verifier states and mark all registers and
1586 *   stack slots with spilled constants that these scalar regisers
1587 *   should be precise.
1588 * . during state pruning two registers (or spilled stack slots)
1589 *   are equivalent if both are not precise.
1590 *
1591 * Note the verifier cannot simply walk register parentage chain,
1592 * since many different registers and stack slots could have been
1593 * used to compute single precise scalar.
1594 *
1595 * The approach of starting with precise=true for all registers and then
1596 * backtrack to mark a register as not precise when the verifier detects
1597 * that program doesn't care about specific value (e.g., when helper
1598 * takes register as ARG_ANYTHING parameter) is not safe.
1599 *
1600 * It's ok to walk single parentage chain of the verifier states.
1601 * It's possible that this backtracking will go all the way till 1st insn.
1602 * All other branches will be explored for needing precision later.
1603 *
1604 * The backtracking needs to deal with cases like:
1605 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1606 * r9 -= r8
1607 * r5 = r9
1608 * if r5 > 0x79f goto pc+7
1609 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1610 * r5 += 1
1611 * ...
1612 * call bpf_perf_event_output#25
1613 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1614 *
1615 * and this case:
1616 * r6 = 1
1617 * call foo // uses callee's r6 inside to compute r0
1618 * r0 += r6
1619 * if r0 == 0 goto
1620 *
1621 * to track above reg_mask/stack_mask needs to be independent for each frame.
1622 *
1623 * Also if parent's curframe > frame where backtracking started,
1624 * the verifier need to mark registers in both frames, otherwise callees
1625 * may incorrectly prune callers. This is similar to
1626 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1627 *
1628 * For now backtracking falls back into conservative marking.
1629 */
1630static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1631				     struct bpf_verifier_state *st)
1632{
1633	struct bpf_func_state *func;
1634	struct bpf_reg_state *reg;
1635	int i, j;
1636
1637	/* big hammer: mark all scalars precise in this path.
1638	 * pop_stack may still get !precise scalars.
1639	 */
1640	for (; st; st = st->parent)
1641		for (i = 0; i <= st->curframe; i++) {
1642			func = st->frame[i];
1643			for (j = 0; j < BPF_REG_FP; j++) {
1644				reg = &func->regs[j];
1645				if (reg->type != SCALAR_VALUE)
1646					continue;
1647				reg->precise = true;
1648			}
1649			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
1650				if (func->stack[j].slot_type[0] != STACK_SPILL)
1651					continue;
1652				reg = &func->stack[j].spilled_ptr;
1653				if (reg->type != SCALAR_VALUE)
1654					continue;
1655				reg->precise = true;
1656			}
1657		}
1658}
1659
1660static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
1661				  int spi)
1662{
1663	struct bpf_verifier_state *st = env->cur_state;
1664	int first_idx = st->first_insn_idx;
1665	int last_idx = env->insn_idx;
1666	struct bpf_func_state *func;
1667	struct bpf_reg_state *reg;
1668	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
1669	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
1670	bool skip_first = true;
1671	bool new_marks = false;
1672	int i, err;
1673
1674	if (!env->allow_ptr_leaks)
1675		/* backtracking is root only for now */
1676		return 0;
1677
1678	func = st->frame[st->curframe];
1679	if (regno >= 0) {
1680		reg = &func->regs[regno];
1681		if (reg->type != SCALAR_VALUE) {
1682			WARN_ONCE(1, "backtracing misuse");
1683			return -EFAULT;
1684		}
1685		if (!reg->precise)
1686			new_marks = true;
1687		else
1688			reg_mask = 0;
1689		reg->precise = true;
1690	}
1691
1692	while (spi >= 0) {
1693		if (func->stack[spi].slot_type[0] != STACK_SPILL) {
1694			stack_mask = 0;
1695			break;
1696		}
1697		reg = &func->stack[spi].spilled_ptr;
1698		if (reg->type != SCALAR_VALUE) {
1699			stack_mask = 0;
1700			break;
1701		}
1702		if (!reg->precise)
1703			new_marks = true;
1704		else
1705			stack_mask = 0;
1706		reg->precise = true;
1707		break;
1708	}
1709
1710	if (!new_marks)
1711		return 0;
1712	if (!reg_mask && !stack_mask)
1713		return 0;
1714	for (;;) {
1715		DECLARE_BITMAP(mask, 64);
1716		u32 history = st->jmp_history_cnt;
1717
1718		if (env->log.level & BPF_LOG_LEVEL)
1719			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
1720		for (i = last_idx;;) {
1721			if (skip_first) {
1722				err = 0;
1723				skip_first = false;
1724			} else {
1725				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
1726			}
1727			if (err == -ENOTSUPP) {
1728				mark_all_scalars_precise(env, st);
1729				return 0;
1730			} else if (err) {
1731				return err;
1732			}
1733			if (!reg_mask && !stack_mask)
1734				/* Found assignment(s) into tracked register in this state.
1735				 * Since this state is already marked, just return.
1736				 * Nothing to be tracked further in the parent state.
1737				 */
1738				return 0;
1739			if (i == first_idx)
1740				break;
1741			i = get_prev_insn_idx(st, i, &history);
1742			if (i >= env->prog->len) {
1743				/* This can happen if backtracking reached insn 0
1744				 * and there are still reg_mask or stack_mask
1745				 * to backtrack.
1746				 * It means the backtracking missed the spot where
1747				 * particular register was initialized with a constant.
1748				 */
1749				verbose(env, "BUG backtracking idx %d\n", i);
1750				WARN_ONCE(1, "verifier backtracking bug");
1751				return -EFAULT;
1752			}
1753		}
1754		st = st->parent;
1755		if (!st)
1756			break;
1757
1758		new_marks = false;
1759		func = st->frame[st->curframe];
1760		bitmap_from_u64(mask, reg_mask);
1761		for_each_set_bit(i, mask, 32) {
1762			reg = &func->regs[i];
1763			if (reg->type != SCALAR_VALUE) {
1764				reg_mask &= ~(1u << i);
1765				continue;
1766			}
1767			if (!reg->precise)
1768				new_marks = true;
1769			reg->precise = true;
1770		}
1771
1772		bitmap_from_u64(mask, stack_mask);
1773		for_each_set_bit(i, mask, 64) {
1774			if (i >= func->allocated_stack / BPF_REG_SIZE) {
1775				/* the sequence of instructions:
1776				 * 2: (bf) r3 = r10
1777				 * 3: (7b) *(u64 *)(r3 -8) = r0
1778				 * 4: (79) r4 = *(u64 *)(r10 -8)
1779				 * doesn't contain jmps. It's backtracked
1780				 * as a single block.
1781				 * During backtracking insn 3 is not recognized as
1782				 * stack access, so at the end of backtracking
1783				 * stack slot fp-8 is still marked in stack_mask.
1784				 * However the parent state may not have accessed
1785				 * fp-8 and it's "unallocated" stack space.
1786				 * In such case fallback to conservative.
1787				 */
1788				mark_all_scalars_precise(env, st);
1789				return 0;
1790			}
1791
1792			if (func->stack[i].slot_type[0] != STACK_SPILL) {
1793				stack_mask &= ~(1ull << i);
1794				continue;
1795			}
1796			reg = &func->stack[i].spilled_ptr;
1797			if (reg->type != SCALAR_VALUE) {
1798				stack_mask &= ~(1ull << i);
1799				continue;
1800			}
1801			if (!reg->precise)
1802				new_marks = true;
1803			reg->precise = true;
1804		}
1805		if (env->log.level & BPF_LOG_LEVEL) {
1806			print_verifier_state(env, func);
1807			verbose(env, "parent %s regs=%x stack=%llx marks\n",
1808				new_marks ? "didn't have" : "already had",
1809				reg_mask, stack_mask);
1810		}
1811
1812		if (!reg_mask && !stack_mask)
1813			break;
1814		if (!new_marks)
1815			break;
1816
1817		last_idx = st->last_insn_idx;
1818		first_idx = st->first_insn_idx;
1819	}
1820	return 0;
1821}
1822
1823static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
1824{
1825	return __mark_chain_precision(env, regno, -1);
1826}
1827
1828static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
1829{
1830	return __mark_chain_precision(env, -1, spi);
1831}
1832
1833static bool is_spillable_regtype(enum bpf_reg_type type)
1834{
1835	switch (type) {
1836	case PTR_TO_MAP_VALUE:
1837	case PTR_TO_MAP_VALUE_OR_NULL:
1838	case PTR_TO_STACK:
1839	case PTR_TO_CTX:
1840	case PTR_TO_PACKET:
1841	case PTR_TO_PACKET_META:
1842	case PTR_TO_PACKET_END:
1843	case PTR_TO_FLOW_KEYS:
1844	case CONST_PTR_TO_MAP:
1845	case PTR_TO_SOCKET:
1846	case PTR_TO_SOCKET_OR_NULL:
1847	case PTR_TO_SOCK_COMMON:
1848	case PTR_TO_SOCK_COMMON_OR_NULL:
1849	case PTR_TO_TCP_SOCK:
1850	case PTR_TO_TCP_SOCK_OR_NULL:
1851	case PTR_TO_XDP_SOCK:
1852		return true;
1853	default:
1854		return false;
1855	}
1856}
1857
1858/* Does this register contain a constant zero? */
1859static bool register_is_null(struct bpf_reg_state *reg)
1860{
1861	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
1862}
1863
1864static bool register_is_const(struct bpf_reg_state *reg)
1865{
1866	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
1867}
1868
1869static void save_register_state(struct bpf_func_state *state,
1870				int spi, struct bpf_reg_state *reg)
1871{
1872	int i;
1873
1874	state->stack[spi].spilled_ptr = *reg;
1875	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1876
1877	for (i = 0; i < BPF_REG_SIZE; i++)
1878		state->stack[spi].slot_type[i] = STACK_SPILL;
1879}
1880
1881/* check_stack_read/write functions track spill/fill of registers,
1882 * stack boundary and alignment are checked in check_mem_access()
1883 */
1884static int check_stack_write(struct bpf_verifier_env *env,
1885			     struct bpf_func_state *state, /* func where register points to */
1886			     int off, int size, int value_regno, int insn_idx)
1887{
1888	struct bpf_func_state *cur; /* state of the current function */
1889	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
1890	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
1891	struct bpf_reg_state *reg = NULL;
1892
1893	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
1894				 state->acquired_refs, true);
1895	if (err)
1896		return err;
1897	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
1898	 * so it's aligned access and [off, off + size) are within stack limits
1899	 */
1900	if (!env->allow_ptr_leaks &&
1901	    state->stack[spi].slot_type[0] == STACK_SPILL &&
1902	    size != BPF_REG_SIZE) {
1903		verbose(env, "attempt to corrupt spilled pointer on stack\n");
1904		return -EACCES;
1905	}
1906
1907	cur = env->cur_state->frame[env->cur_state->curframe];
1908	if (value_regno >= 0)
1909		reg = &cur->regs[value_regno];
1910
1911	if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
1912	    !register_is_null(reg) && env->allow_ptr_leaks) {
1913		if (dst_reg != BPF_REG_FP) {
1914			/* The backtracking logic can only recognize explicit
1915			 * stack slot address like [fp - 8]. Other spill of
1916			 * scalar via different register has to be conervative.
1917			 * Backtrack from here and mark all registers as precise
1918			 * that contributed into 'reg' being a constant.
1919			 */
1920			err = mark_chain_precision(env, value_regno);
1921			if (err)
1922				return err;
1923		}
1924		save_register_state(state, spi, reg);
1925	} else if (reg && is_spillable_regtype(reg->type)) {
1926		/* register containing pointer is being spilled into stack */
1927		if (size != BPF_REG_SIZE) {
1928			verbose_linfo(env, insn_idx, "; ");
1929			verbose(env, "invalid size of register spill\n");
1930			return -EACCES;
1931		}
1932
1933		if (state != cur && reg->type == PTR_TO_STACK) {
1934			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
1935			return -EINVAL;
1936		}
1937
1938		if (!env->allow_ptr_leaks) {
1939			bool sanitize = false;
1940
1941			if (state->stack[spi].slot_type[0] == STACK_SPILL &&
1942			    register_is_const(&state->stack[spi].spilled_ptr))
1943				sanitize = true;
1944			for (i = 0; i < BPF_REG_SIZE; i++)
1945				if (state->stack[spi].slot_type[i] == STACK_MISC) {
1946					sanitize = true;
1947					break;
1948				}
1949			if (sanitize) {
1950				int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
1951				int soff = (-spi - 1) * BPF_REG_SIZE;
1952
1953				/* detected reuse of integer stack slot with a pointer
1954				 * which means either llvm is reusing stack slot or
1955				 * an attacker is trying to exploit CVE-2018-3639
1956				 * (speculative store bypass)
1957				 * Have to sanitize that slot with preemptive
1958				 * store of zero.
1959				 */
1960				if (*poff && *poff != soff) {
1961					/* disallow programs where single insn stores
1962					 * into two different stack slots, since verifier
1963					 * cannot sanitize them
1964					 */
1965					verbose(env,
1966						"insn %d cannot access two stack slots fp%d and fp%d",
1967						insn_idx, *poff, soff);
1968					return -EINVAL;
1969				}
1970				*poff = soff;
1971			}
1972		}
1973		save_register_state(state, spi, reg);
1974	} else {
1975		u8 type = STACK_MISC;
1976
1977		/* regular write of data into stack destroys any spilled ptr */
1978		state->stack[spi].spilled_ptr.type = NOT_INIT;
1979		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
1980		if (state->stack[spi].slot_type[0] == STACK_SPILL)
1981			for (i = 0; i < BPF_REG_SIZE; i++)
1982				state->stack[spi].slot_type[i] = STACK_MISC;
1983
1984		/* only mark the slot as written if all 8 bytes were written
1985		 * otherwise read propagation may incorrectly stop too soon
1986		 * when stack slots are partially written.
1987		 * This heuristic means that read propagation will be
1988		 * conservative, since it will add reg_live_read marks
1989		 * to stack slots all the way to first state when programs
1990		 * writes+reads less than 8 bytes
1991		 */
1992		if (size == BPF_REG_SIZE)
1993			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1994
1995		/* when we zero initialize stack slots mark them as such */
1996		if (reg && register_is_null(reg)) {
1997			/* backtracking doesn't work for STACK_ZERO yet. */
1998			err = mark_chain_precision(env, value_regno);
1999			if (err)
2000				return err;
2001			type = STACK_ZERO;
2002		}
2003
2004		/* Mark slots affected by this stack write. */
2005		for (i = 0; i < size; i++)
2006			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2007				type;
2008	}
2009	return 0;
2010}
2011
2012static int check_stack_read(struct bpf_verifier_env *env,
2013			    struct bpf_func_state *reg_state /* func where register points to */,
2014			    int off, int size, int value_regno)
2015{
2016	struct bpf_verifier_state *vstate = env->cur_state;
2017	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2018	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2019	struct bpf_reg_state *reg;
2020	u8 *stype;
2021
2022	if (reg_state->allocated_stack <= slot) {
2023		verbose(env, "invalid read from stack off %d+0 size %d\n",
2024			off, size);
2025		return -EACCES;
2026	}
2027	stype = reg_state->stack[spi].slot_type;
2028	reg = &reg_state->stack[spi].spilled_ptr;
2029
2030	if (stype[0] == STACK_SPILL) {
2031		if (size != BPF_REG_SIZE) {
2032			if (reg->type != SCALAR_VALUE) {
2033				verbose_linfo(env, env->insn_idx, "; ");
2034				verbose(env, "invalid size of register fill\n");
2035				return -EACCES;
2036			}
2037			if (value_regno >= 0) {
2038				mark_reg_unknown(env, state->regs, value_regno);
2039				state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2040			}
2041			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2042			return 0;
2043		}
2044		for (i = 1; i < BPF_REG_SIZE; i++) {
2045			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2046				verbose(env, "corrupted spill memory\n");
2047				return -EACCES;
2048			}
2049		}
2050
2051		if (value_regno >= 0) {
2052			/* restore register state from stack */
2053			state->regs[value_regno] = *reg;
2054			/* mark reg as written since spilled pointer state likely
2055			 * has its liveness marks cleared by is_state_visited()
2056			 * which resets stack/reg liveness for state transitions
2057			 */
2058			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2059		}
2060		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2061	} else {
2062		int zeros = 0;
2063
2064		for (i = 0; i < size; i++) {
2065			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
2066				continue;
2067			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
2068				zeros++;
2069				continue;
2070			}
2071			verbose(env, "invalid read from stack off %d+%d size %d\n",
2072				off, i, size);
2073			return -EACCES;
2074		}
2075		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2076		if (value_regno >= 0) {
2077			if (zeros == size) {
2078				/* any size read into register is zero extended,
2079				 * so the whole register == const_zero
2080				 */
2081				__mark_reg_const_zero(&state->regs[value_regno]);
2082				/* backtracking doesn't support STACK_ZERO yet,
2083				 * so mark it precise here, so that later
2084				 * backtracking can stop here.
2085				 * Backtracking may not need this if this register
2086				 * doesn't participate in pointer adjustment.
2087				 * Forward propagation of precise flag is not
2088				 * necessary either. This mark is only to stop
2089				 * backtracking. Any register that contributed
2090				 * to const 0 was marked precise before spill.
2091				 */
2092				state->regs[value_regno].precise = true;
2093			} else {
2094				/* have read misc data from the stack */
2095				mark_reg_unknown(env, state->regs, value_regno);
2096			}
2097			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
2098		}
2099	}
2100	return 0;
2101}
2102
2103static int check_stack_access(struct bpf_verifier_env *env,
2104			      const struct bpf_reg_state *reg,
2105			      int off, int size)
2106{
2107	/* Stack accesses must be at a fixed offset, so that we
2108	 * can determine what type of data were returned. See
2109	 * check_stack_read().
2110	 */
2111	if (!tnum_is_const(reg->var_off)) {
2112		char tn_buf[48];
2113
2114		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2115		verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
2116			tn_buf, off, size);
2117		return -EACCES;
2118	}
2119
2120	if (off >= 0 || off < -MAX_BPF_STACK) {
2121		verbose(env, "invalid stack off=%d size=%d\n", off, size);
2122		return -EACCES;
2123	}
2124
2125	return 0;
2126}
2127
2128static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2129				 int off, int size, enum bpf_access_type type)
2130{
2131	struct bpf_reg_state *regs = cur_regs(env);
2132	struct bpf_map *map = regs[regno].map_ptr;
2133	u32 cap = bpf_map_flags_to_cap(map);
2134
2135	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2136		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
2137			map->value_size, off, size);
2138		return -EACCES;
2139	}
2140
2141	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2142		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2143			map->value_size, off, size);
2144		return -EACCES;
2145	}
2146
2147	return 0;
2148}
2149
2150/* check read/write into map element returned by bpf_map_lookup_elem() */
2151static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
2152			      int size, bool zero_size_allowed)
2153{
2154	struct bpf_reg_state *regs = cur_regs(env);
2155	struct bpf_map *map = regs[regno].map_ptr;
2156
2157	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
2158	    off + size > map->value_size) {
2159		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2160			map->value_size, off, size);
2161		return -EACCES;
2162	}
2163	return 0;
2164}
2165
2166/* check read/write into a map element with possible variable offset */
2167static int check_map_access(struct bpf_verifier_env *env, u32 regno,
2168			    int off, int size, bool zero_size_allowed)
2169{
2170	struct bpf_verifier_state *vstate = env->cur_state;
2171	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2172	struct bpf_reg_state *reg = &state->regs[regno];
2173	int err;
2174
2175	/* We may have adjusted the register to this map value, so we
2176	 * need to try adding each of min_value and max_value to off
2177	 * to make sure our theoretical access will be safe.
2178	 */
2179	if (env->log.level & BPF_LOG_LEVEL)
2180		print_verifier_state(env, state);
2181
2182	/* The minimum value is only important with signed
2183	 * comparisons where we can't assume the floor of a
2184	 * value is 0.  If we are using signed variables for our
2185	 * index'es we need to make sure that whatever we use
2186	 * will have a set floor within our range.
2187	 */
2188	if (reg->smin_value < 0 &&
2189	    (reg->smin_value == S64_MIN ||
2190	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2191	      reg->smin_value + off < 0)) {
2192		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2193			regno);
2194		return -EACCES;
2195	}
2196	err = __check_map_access(env, regno, reg->smin_value + off, size,
2197				 zero_size_allowed);
2198	if (err) {
2199		verbose(env, "R%d min value is outside of the array range\n",
2200			regno);
2201		return err;
2202	}
2203
2204	/* If we haven't set a max value then we need to bail since we can't be
2205	 * sure we won't do bad things.
2206	 * If reg->umax_value + off could overflow, treat that as unbounded too.
2207	 */
2208	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2209		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
2210			regno);
2211		return -EACCES;
2212	}
2213	err = __check_map_access(env, regno, reg->umax_value + off, size,
2214				 zero_size_allowed);
2215	if (err)
2216		verbose(env, "R%d max value is outside of the array range\n",
2217			regno);
2218
2219	if (map_value_has_spin_lock(reg->map_ptr)) {
2220		u32 lock = reg->map_ptr->spin_lock_off;
2221
2222		/* if any part of struct bpf_spin_lock can be touched by
2223		 * load/store reject this program.
2224		 * To check that [x1, x2) overlaps with [y1, y2)
2225		 * it is sufficient to check x1 < y2 && y1 < x2.
2226		 */
2227		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2228		     lock < reg->umax_value + off + size) {
2229			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2230			return -EACCES;
2231		}
2232	}
2233	return err;
2234}
2235
2236#define MAX_PACKET_OFF 0xffff
2237
2238static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
2239				       const struct bpf_call_arg_meta *meta,
2240				       enum bpf_access_type t)
2241{
2242	switch (env->prog->type) {
2243	/* Program types only with direct read access go here! */
2244	case BPF_PROG_TYPE_LWT_IN:
2245	case BPF_PROG_TYPE_LWT_OUT:
2246	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2247	case BPF_PROG_TYPE_SK_REUSEPORT:
2248	case BPF_PROG_TYPE_FLOW_DISSECTOR:
2249	case BPF_PROG_TYPE_CGROUP_SKB:
2250		if (t == BPF_WRITE)
2251			return false;
2252		/* fallthrough */
2253
2254	/* Program types with direct read + write access go here! */
2255	case BPF_PROG_TYPE_SCHED_CLS:
2256	case BPF_PROG_TYPE_SCHED_ACT:
2257	case BPF_PROG_TYPE_XDP:
2258	case BPF_PROG_TYPE_LWT_XMIT:
2259	case BPF_PROG_TYPE_SK_SKB:
2260	case BPF_PROG_TYPE_SK_MSG:
2261		if (meta)
2262			return meta->pkt_access;
2263
2264		env->seen_direct_write = true;
2265		return true;
2266
2267	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2268		if (t == BPF_WRITE)
2269			env->seen_direct_write = true;
2270
2271		return true;
2272
2273	default:
2274		return false;
2275	}
2276}
2277
2278static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
2279				 int off, int size, bool zero_size_allowed)
2280{
2281	struct bpf_reg_state *regs = cur_regs(env);
2282	struct bpf_reg_state *reg = &regs[regno];
2283
2284	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
2285	    (u64)off + size > reg->range) {
2286		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
2287			off, size, regno, reg->id, reg->off, reg->range);
2288		return -EACCES;
2289	}
2290	return 0;
2291}
2292
2293static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
2294			       int size, bool zero_size_allowed)
2295{
2296	struct bpf_reg_state *regs = cur_regs(env);
2297	struct bpf_reg_state *reg = &regs[regno];
2298	int err;
2299
2300	/* We may have added a variable offset to the packet pointer; but any
2301	 * reg->range we have comes after that.  We are only checking the fixed
2302	 * offset.
2303	 */
2304
2305	/* We don't allow negative numbers, because we aren't tracking enough
2306	 * detail to prove they're safe.
2307	 */
2308	if (reg->smin_value < 0) {
2309		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2310			regno);
2311		return -EACCES;
2312	}
2313	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
2314	if (err) {
2315		verbose(env, "R%d offset is outside of the packet\n", regno);
2316		return err;
2317	}
2318
2319	/* __check_packet_access has made sure "off + size - 1" is within u16.
2320	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2321	 * otherwise find_good_pkt_pointers would have refused to set range info
2322	 * that __check_packet_access would have rejected this pkt access.
2323	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2324	 */
2325	env->prog->aux->max_pkt_offset =
2326		max_t(u32, env->prog->aux->max_pkt_offset,
2327		      off + reg->umax_value + size - 1);
2328
2329	return err;
2330}
2331
2332/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
2333static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
2334			    enum bpf_access_type t, enum bpf_reg_type *reg_type)
2335{
2336	struct bpf_insn_access_aux info = {
2337		.reg_type = *reg_type,
2338	};
2339
2340	if (env->ops->is_valid_access &&
2341	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
2342		/* A non zero info.ctx_field_size indicates that this field is a
2343		 * candidate for later verifier transformation to load the whole
2344		 * field and then apply a mask when accessed with a narrower
2345		 * access than actual ctx access size. A zero info.ctx_field_size
2346		 * will only allow for whole field access and rejects any other
2347		 * type of narrower access.
2348		 */
2349		*reg_type = info.reg_type;
2350
2351		env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
2352		/* remember the offset of last byte accessed in ctx */
2353		if (env->prog->aux->max_ctx_offset < off + size)
2354			env->prog->aux->max_ctx_offset = off + size;
2355		return 0;
2356	}
2357
2358	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
2359	return -EACCES;
2360}
2361
2362static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
2363				  int size)
2364{
2365	if (size < 0 || off < 0 ||
2366	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
2367		verbose(env, "invalid access to flow keys off=%d size=%d\n",
2368			off, size);
2369		return -EACCES;
2370	}
2371	return 0;
2372}
2373
2374static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
2375			     u32 regno, int off, int size,
2376			     enum bpf_access_type t)
2377{
2378	struct bpf_reg_state *regs = cur_regs(env);
2379	struct bpf_reg_state *reg = &regs[regno];
2380	struct bpf_insn_access_aux info = {};
2381	bool valid;
2382
2383	if (reg->smin_value < 0) {
2384		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2385			regno);
2386		return -EACCES;
2387	}
2388
2389	switch (reg->type) {
2390	case PTR_TO_SOCK_COMMON:
2391		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
2392		break;
2393	case PTR_TO_SOCKET:
2394		valid = bpf_sock_is_valid_access(off, size, t, &info);
2395		break;
2396	case PTR_TO_TCP_SOCK:
2397		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
2398		break;
2399	case PTR_TO_XDP_SOCK:
2400		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
2401		break;
2402	default:
2403		valid = false;
2404	}
2405
2406
2407	if (valid) {
2408		env->insn_aux_data[insn_idx].ctx_field_size =
2409			info.ctx_field_size;
2410		return 0;
2411	}
2412
2413	verbose(env, "R%d invalid %s access off=%d size=%d\n",
2414		regno, reg_type_str[reg->type], off, size);
2415
2416	return -EACCES;
2417}
2418
2419static bool __is_pointer_value(bool allow_ptr_leaks,
2420			       const struct bpf_reg_state *reg)
2421{
2422	if (allow_ptr_leaks)
2423		return false;
2424
2425	return reg->type != SCALAR_VALUE;
2426}
2427
2428static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2429{
2430	return cur_regs(env) + regno;
2431}
2432
2433static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
2434{
2435	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
2436}
2437
2438static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
2439{
2440	const struct bpf_reg_state *reg = reg_state(env, regno);
2441
2442	return reg->type == PTR_TO_CTX;
2443}
2444
2445static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
2446{
2447	const struct bpf_reg_state *reg = reg_state(env, regno);
2448
2449	return type_is_sk_pointer(reg->type);
2450}
2451
2452static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
2453{
2454	const struct bpf_reg_state *reg = reg_state(env, regno);
2455
2456	return type_is_pkt_pointer(reg->type);
2457}
2458
2459static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
2460{
2461	const struct bpf_reg_state *reg = reg_state(env, regno);
2462
2463	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
2464	return reg->type == PTR_TO_FLOW_KEYS;
2465}
2466
2467static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
2468				   const struct bpf_reg_state *reg,
2469				   int off, int size, bool strict)
2470{
2471	struct tnum reg_off;
2472	int ip_align;
2473
2474	/* Byte size accesses are always allowed. */
2475	if (!strict || size == 1)
2476		return 0;
2477
2478	/* For platforms that do not have a Kconfig enabling
2479	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
2480	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
2481	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
2482	 * to this code only in strict mode where we want to emulate
2483	 * the NET_IP_ALIGN==2 checking.  Therefore use an
2484	 * unconditional IP align value of '2'.
2485	 */
2486	ip_align = 2;
2487
2488	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
2489	if (!tnum_is_aligned(reg_off, size)) {
2490		char tn_buf[48];
2491
2492		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2493		verbose(env,
2494			"misaligned packet access off %d+%s+%d+%d size %d\n",
2495			ip_align, tn_buf, reg->off, off, size);
2496		return -EACCES;
2497	}
2498
2499	return 0;
2500}
2501
2502static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
2503				       const struct bpf_reg_state *reg,
2504				       const char *pointer_desc,
2505				       int off, int size, bool strict)
2506{
2507	struct tnum reg_off;
2508
2509	/* Byte size accesses are always allowed. */
2510	if (!strict || size == 1)
2511		return 0;
2512
2513	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
2514	if (!tnum_is_aligned(reg_off, size)) {
2515		char tn_buf[48];
2516
2517		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2518		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
2519			pointer_desc, tn_buf, reg->off, off, size);
2520		return -EACCES;
2521	}
2522
2523	return 0;
2524}
2525
2526static int check_ptr_alignment(struct bpf_verifier_env *env,
2527			       const struct bpf_reg_state *reg, int off,
2528			       int size, bool strict_alignment_once)
2529{
2530	bool strict = env->strict_alignment || strict_alignment_once;
2531	const char *pointer_desc = "";
2532
2533	switch (reg->type) {
2534	case PTR_TO_PACKET:
2535	case PTR_TO_PACKET_META:
2536		/* Special case, because of NET_IP_ALIGN. Given metadata sits
2537		 * right in front, treat it the very same way.
2538		 */
2539		return check_pkt_ptr_alignment(env, reg, off, size, strict);
2540	case PTR_TO_FLOW_KEYS:
2541		pointer_desc = "flow keys ";
2542		break;
2543	case PTR_TO_MAP_VALUE:
2544		pointer_desc = "value ";
2545		break;
2546	case PTR_TO_CTX:
2547		pointer_desc = "context ";
2548		break;
2549	case PTR_TO_STACK:
2550		pointer_desc = "stack ";
2551		/* The stack spill tracking logic in check_stack_write()
2552		 * and check_stack_read() relies on stack accesses being
2553		 * aligned.
2554		 */
2555		strict = true;
2556		break;
2557	case PTR_TO_SOCKET:
2558		pointer_desc = "sock ";
2559		break;
2560	case PTR_TO_SOCK_COMMON:
2561		pointer_desc = "sock_common ";
2562		break;
2563	case PTR_TO_TCP_SOCK:
2564		pointer_desc = "tcp_sock ";
2565		break;
2566	case PTR_TO_XDP_SOCK:
2567		pointer_desc = "xdp_sock ";
2568		break;
2569	default:
2570		break;
2571	}
2572	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
2573					   strict);
2574}
2575
2576static int update_stack_depth(struct bpf_verifier_env *env,
2577			      const struct bpf_func_state *func,
2578			      int off)
2579{
2580	u16 stack = env->subprog_info[func->subprogno].stack_depth;
2581
2582	if (stack >= -off)
2583		return 0;
2584
2585	/* update known max for given subprogram */
2586	env->subprog_info[func->subprogno].stack_depth = -off;
2587	return 0;
2588}
2589
2590/* starting from main bpf function walk all instructions of the function
2591 * and recursively walk all callees that given function can call.
2592 * Ignore jump and exit insns.
2593 * Since recursion is prevented by check_cfg() this algorithm
2594 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
2595 */
2596static int check_max_stack_depth(struct bpf_verifier_env *env)
2597{
2598	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
2599	struct bpf_subprog_info *subprog = env->subprog_info;
2600	struct bpf_insn *insn = env->prog->insnsi;
2601	int ret_insn[MAX_CALL_FRAMES];
2602	int ret_prog[MAX_CALL_FRAMES];
2603
2604process_func:
2605	/* round up to 32-bytes, since this is granularity
2606	 * of interpreter stack size
2607	 */
2608	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
2609	if (depth > MAX_BPF_STACK) {
2610		verbose(env, "combined stack size of %d calls is %d. Too large\n",
2611			frame + 1, depth);
2612		return -EACCES;
2613	}
2614continue_func:
2615	subprog_end = subprog[idx + 1].start;
2616	for (; i < subprog_end; i++) {
2617		if (insn[i].code != (BPF_JMP | BPF_CALL))
2618			continue;
2619		if (insn[i].src_reg != BPF_PSEUDO_CALL)
2620			continue;
2621		/* remember insn and function to return to */
2622		ret_insn[frame] = i + 1;
2623		ret_prog[frame] = idx;
2624
2625		/* find the callee */
2626		i = i + insn[i].imm + 1;
2627		idx = find_subprog(env, i);
2628		if (idx < 0) {
2629			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
2630				  i);
2631			return -EFAULT;
2632		}
2633		frame++;
2634		if (frame >= MAX_CALL_FRAMES) {
2635			verbose(env, "the call stack of %d frames is too deep !\n",
2636				frame);
2637			return -E2BIG;
2638		}
2639		goto process_func;
2640	}
2641	/* end of for() loop means the last insn of the 'subprog'
2642	 * was reached. Doesn't matter whether it was JA or EXIT
2643	 */
2644	if (frame == 0)
2645		return 0;
2646	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
2647	frame--;
2648	i = ret_insn[frame];
2649	idx = ret_prog[frame];
2650	goto continue_func;
2651}
2652
2653#ifndef CONFIG_BPF_JIT_ALWAYS_ON
2654static int get_callee_stack_depth(struct bpf_verifier_env *env,
2655				  const struct bpf_insn *insn, int idx)
2656{
2657	int start = idx + insn->imm + 1, subprog;
2658
2659	subprog = find_subprog(env, start);
2660	if (subprog < 0) {
2661		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
2662			  start);
2663		return -EFAULT;
2664	}
2665	return env->subprog_info[subprog].stack_depth;
2666}
2667#endif
2668
2669static int check_ctx_reg(struct bpf_verifier_env *env,
2670			 const struct bpf_reg_state *reg, int regno)
2671{
2672	/* Access to ctx or passing it to a helper is only allowed in
2673	 * its original, unmodified form.
2674	 */
2675
2676	if (reg->off) {
2677		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
2678			regno, reg->off);
2679		return -EACCES;
2680	}
2681
2682	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2683		char tn_buf[48];
2684
2685		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2686		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
2687		return -EACCES;
2688	}
2689
2690	return 0;
2691}
2692
2693static int check_tp_buffer_access(struct bpf_verifier_env *env,
2694				  const struct bpf_reg_state *reg,
2695				  int regno, int off, int size)
2696{
2697	if (off < 0) {
2698		verbose(env,
2699			"R%d invalid tracepoint buffer access: off=%d, size=%d",
2700			regno, off, size);
2701		return -EACCES;
2702	}
2703	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
2704		char tn_buf[48];
2705
2706		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2707		verbose(env,
2708			"R%d invalid variable buffer offset: off=%d, var_off=%s",
2709			regno, off, tn_buf);
2710		return -EACCES;
2711	}
2712	if (off + size > env->prog->aux->max_tp_access)
2713		env->prog->aux->max_tp_access = off + size;
2714
2715	return 0;
2716}
2717
2718
2719/* truncate register to smaller size (in bytes)
2720 * must be called with size < BPF_REG_SIZE
2721 */
2722static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
2723{
2724	u64 mask;
2725
2726	/* clear high bits in bit representation */
2727	reg->var_off = tnum_cast(reg->var_off, size);
2728
2729	/* fix arithmetic bounds */
2730	mask = ((u64)1 << (size * 8)) - 1;
2731	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
2732		reg->umin_value &= mask;
2733		reg->umax_value &= mask;
2734	} else {
2735		reg->umin_value = 0;
2736		reg->umax_value = mask;
2737	}
2738	reg->smin_value = reg->umin_value;
2739	reg->smax_value = reg->umax_value;
2740}
2741
2742/* check whether memory at (regno + off) is accessible for t = (read | write)
2743 * if t==write, value_regno is a register which value is stored into memory
2744 * if t==read, value_regno is a register which will receive the value from memory
2745 * if t==write && value_regno==-1, some unknown value is stored into memory
2746 * if t==read && value_regno==-1, don't care what we read from memory
2747 */
2748static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
2749			    int off, int bpf_size, enum bpf_access_type t,
2750			    int value_regno, bool strict_alignment_once)
2751{
2752	struct bpf_reg_state *regs = cur_regs(env);
2753	struct bpf_reg_state *reg = regs + regno;
2754	struct bpf_func_state *state;
2755	int size, err = 0;
2756
2757	size = bpf_size_to_bytes(bpf_size);
2758	if (size < 0)
2759		return size;
2760
2761	/* alignment checks will add in reg->off themselves */
2762	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
2763	if (err)
2764		return err;
2765
2766	/* for access checks, reg->off is just part of off */
2767	off += reg->off;
2768
2769	if (reg->type == PTR_TO_MAP_VALUE) {
2770		if (t == BPF_WRITE && value_regno >= 0 &&
2771		    is_pointer_value(env, value_regno)) {
2772			verbose(env, "R%d leaks addr into map\n", value_regno);
2773			return -EACCES;
2774		}
2775		err = check_map_access_type(env, regno, off, size, t);
2776		if (err)
2777			return err;
2778		err = check_map_access(env, regno, off, size, false);
2779		if (!err && t == BPF_READ && value_regno >= 0)
2780			mark_reg_unknown(env, regs, value_regno);
2781
2782	} else if (reg->type == PTR_TO_CTX) {
2783		enum bpf_reg_type reg_type = SCALAR_VALUE;
2784
2785		if (t == BPF_WRITE && value_regno >= 0 &&
2786		    is_pointer_value(env, value_regno)) {
2787			verbose(env, "R%d leaks addr into ctx\n", value_regno);
2788			return -EACCES;
2789		}
2790
2791		err = check_ctx_reg(env, reg, regno);
2792		if (err < 0)
2793			return err;
2794
2795		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type);
2796		if (!err && t == BPF_READ && value_regno >= 0) {
2797			/* ctx access returns either a scalar, or a
2798			 * PTR_TO_PACKET[_META,_END]. In the latter
2799			 * case, we know the offset is zero.
2800			 */
2801			if (reg_type == SCALAR_VALUE) {
2802				mark_reg_unknown(env, regs, value_regno);
2803			} else {
2804				mark_reg_known_zero(env, regs,
2805						    value_regno);
2806				if (reg_type_may_be_null(reg_type))
2807					regs[value_regno].id = ++env->id_gen;
2808				/* A load of ctx field could have different
2809				 * actual load size with the one encoded in the
2810				 * insn. When the dst is PTR, it is for sure not
2811				 * a sub-register.
2812				 */
2813				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
2814			}
2815			regs[value_regno].type = reg_type;
2816		}
2817
2818	} else if (reg->type == PTR_TO_STACK) {
2819		off += reg->var_off.value;
2820		err = check_stack_access(env, reg, off, size);
2821		if (err)
2822			return err;
2823
2824		state = func(env, reg);
2825		err = update_stack_depth(env, state, off);
2826		if (err)
2827			return err;
2828
2829		if (t == BPF_WRITE)
2830			err = check_stack_write(env, state, off, size,
2831						value_regno, insn_idx);
2832		else
2833			err = check_stack_read(env, state, off, size,
2834					       value_regno);
2835	} else if (reg_is_pkt_pointer(reg)) {
2836		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
2837			verbose(env, "cannot write into packet\n");
2838			return -EACCES;
2839		}
2840		if (t == BPF_WRITE && value_regno >= 0 &&
2841		    is_pointer_value(env, value_regno)) {
2842			verbose(env, "R%d leaks addr into packet\n",
2843				value_regno);
2844			return -EACCES;
2845		}
2846		err = check_packet_access(env, regno, off, size, false);
2847		if (!err && t == BPF_READ && value_regno >= 0)
2848			mark_reg_unknown(env, regs, value_regno);
2849	} else if (reg->type == PTR_TO_FLOW_KEYS) {
2850		if (t == BPF_WRITE && value_regno >= 0 &&
2851		    is_pointer_value(env, value_regno)) {
2852			verbose(env, "R%d leaks addr into flow keys\n",
2853				value_regno);
2854			return -EACCES;
2855		}
2856
2857		err = check_flow_keys_access(env, off, size);
2858		if (!err && t == BPF_READ && value_regno >= 0)
2859			mark_reg_unknown(env, regs, value_regno);
2860	} else if (type_is_sk_pointer(reg->type)) {
2861		if (t == BPF_WRITE) {
2862			verbose(env, "R%d cannot write into %s\n",
2863				regno, reg_type_str[reg->type]);
2864			return -EACCES;
2865		}
2866		err = check_sock_access(env, insn_idx, regno, off, size, t);
2867		if (!err && value_regno >= 0)
2868			mark_reg_unknown(env, regs, value_regno);
2869	} else if (reg->type == PTR_TO_TP_BUFFER) {
2870		err = check_tp_buffer_access(env, reg, regno, off, size);
2871		if (!err && t == BPF_READ && value_regno >= 0)
2872			mark_reg_unknown(env, regs, value_regno);
2873	} else {
2874		verbose(env, "R%d invalid mem access '%s'\n", regno,
2875			reg_type_str[reg->type]);
2876		return -EACCES;
2877	}
2878
2879	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
2880	    regs[value_regno].type == SCALAR_VALUE) {
2881		/* b/h/w load zero-extends, mark upper bits as known 0 */
2882		coerce_reg_to_size(&regs[value_regno], size);
2883	}
2884	return err;
2885}
2886
2887static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
2888{
2889	int err;
2890
2891	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
2892	    insn->imm != 0) {
2893		verbose(env, "BPF_XADD uses reserved fields\n");
2894		return -EINVAL;
2895	}
2896
2897	/* check src1 operand */
2898	err = check_reg_arg(env, insn->src_reg, SRC_OP);
2899	if (err)
2900		return err;
2901
2902	/* check src2 operand */
2903	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
2904	if (err)
2905		return err;
2906
2907	if (is_pointer_value(env, insn->src_reg)) {
2908		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
2909		return -EACCES;
2910	}
2911
2912	if (is_ctx_reg(env, insn->dst_reg) ||
2913	    is_pkt_reg(env, insn->dst_reg) ||
2914	    is_flow_key_reg(env, insn->dst_reg) ||
2915	    is_sk_reg(env, insn->dst_reg)) {
2916		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
2917			insn->dst_reg,
2918			reg_type_str[reg_state(env, insn->dst_reg)->type]);
2919		return -EACCES;
2920	}
2921
2922	/* check whether atomic_add can read the memory */
2923	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
2924			       BPF_SIZE(insn->code), BPF_READ, -1, true);
2925	if (err)
2926		return err;
2927
2928	/* check whether atomic_add can write into the same memory */
2929	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
2930				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
2931}
2932
2933static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
2934				  int off, int access_size,
2935				  bool zero_size_allowed)
2936{
2937	struct bpf_reg_state *reg = reg_state(env, regno);
2938
2939	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
2940	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
2941		if (tnum_is_const(reg->var_off)) {
2942			verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
2943				regno, off, access_size);
2944		} else {
2945			char tn_buf[48];
2946
2947			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2948			verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
2949				regno, tn_buf, access_size);
2950		}
2951		return -EACCES;
2952	}
2953	return 0;
2954}
2955
2956/* when register 'regno' is passed into function that will read 'access_size'
2957 * bytes from that pointer, make sure that it's within stack boundary
2958 * and all elements of stack are initialized.
2959 * Unlike most pointer bounds-checking functions, this one doesn't take an
2960 * 'off' argument, so it has to add in reg->off itself.
2961 */
2962static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
2963				int access_size, bool zero_size_allowed,
2964				struct bpf_call_arg_meta *meta)
2965{
2966	struct bpf_reg_state *reg = reg_state(env, regno);
2967	struct bpf_func_state *state = func(env, reg);
2968	int err, min_off, max_off, i, j, slot, spi;
2969
2970	if (reg->type != PTR_TO_STACK) {
2971		/* Allow zero-byte read from NULL, regardless of pointer type */
2972		if (zero_size_allowed && access_size == 0 &&
2973		    register_is_null(reg))
2974			return 0;
2975
2976		verbose(env, "R%d type=%s expected=%s\n", regno,
2977			reg_type_str[reg->type],
2978			reg_type_str[PTR_TO_STACK]);
2979		return -EACCES;
2980	}
2981
2982	if (tnum_is_const(reg->var_off)) {
2983		min_off = max_off = reg->var_off.value + reg->off;
2984		err = __check_stack_boundary(env, regno, min_off, access_size,
2985					     zero_size_allowed);
2986		if (err)
2987			return err;
2988	} else {
2989		/* Variable offset is prohibited for unprivileged mode for
2990		 * simplicity since it requires corresponding support in
2991		 * Spectre masking for stack ALU.
2992		 * See also retrieve_ptr_limit().
2993		 */
2994		if (!env->allow_ptr_leaks) {
2995			char tn_buf[48];
2996
2997			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2998			verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
2999				regno, tn_buf);
3000			return -EACCES;
3001		}
3002		/* Only initialized buffer on stack is allowed to be accessed
3003		 * with variable offset. With uninitialized buffer it's hard to
3004		 * guarantee that whole memory is marked as initialized on
3005		 * helper return since specific bounds are unknown what may
3006		 * cause uninitialized stack leaking.
3007		 */
3008		if (meta && meta->raw_mode)
3009			meta = NULL;
3010
3011		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3012		    reg->smax_value <= -BPF_MAX_VAR_OFF) {
3013			verbose(env, "R%d unbounded indirect variable offset stack access\n",
3014				regno);
3015			return -EACCES;
3016		}
3017		min_off = reg->smin_value + reg->off;
3018		max_off = reg->smax_value + reg->off;
3019		err = __check_stack_boundary(env, regno, min_off, access_size,
3020					     zero_size_allowed);
3021		if (err) {
3022			verbose(env, "R%d min value is outside of stack bound\n",
3023				regno);
3024			return err;
3025		}
3026		err = __check_stack_boundary(env, regno, max_off, access_size,
3027					     zero_size_allowed);
3028		if (err) {
3029			verbose(env, "R%d max value is outside of stack bound\n",
3030				regno);
3031			return err;
3032		}
3033	}
3034
3035	if (meta && meta->raw_mode) {
3036		meta->access_size = access_size;
3037		meta->regno = regno;
3038		return 0;
3039	}
3040
3041	for (i = min_off; i < max_off + access_size; i++) {
3042		u8 *stype;
3043
3044		slot = -i - 1;
3045		spi = slot / BPF_REG_SIZE;
3046		if (state->allocated_stack <= slot)
3047			goto err;
3048		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
3049		if (*stype == STACK_MISC)
3050			goto mark;
3051		if (*stype == STACK_ZERO) {
3052			/* helper can write anything into the stack */
3053			*stype = STACK_MISC;
3054			goto mark;
3055		}
3056		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
3057		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
3058			__mark_reg_unknown(&state->stack[spi].spilled_ptr);
3059			for (j = 0; j < BPF_REG_SIZE; j++)
3060				state->stack[spi].slot_type[j] = STACK_MISC;
3061			goto mark;
3062		}
3063
3064err:
3065		if (tnum_is_const(reg->var_off)) {
3066			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
3067				min_off, i - min_off, access_size);
3068		} else {
3069			char tn_buf[48];
3070
3071			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3072			verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
3073				tn_buf, i - min_off, access_size);
3074		}
3075		return -EACCES;
3076mark:
3077		/* reading any byte out of 8-byte 'spill_slot' will cause
3078		 * the whole slot to be marked as 'read'
3079		 */
3080		mark_reg_read(env, &state->stack[spi].spilled_ptr,
3081			      state->stack[spi].spilled_ptr.parent,
3082			      REG_LIVE_READ64);
3083	}
3084	return update_stack_depth(env, state, min_off);
3085}
3086
3087static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
3088				   int access_size, bool zero_size_allowed,
3089				   struct bpf_call_arg_meta *meta)
3090{
3091	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3092
3093	switch (reg->type) {
3094	case PTR_TO_PACKET:
3095	case PTR_TO_PACKET_META:
3096		return check_packet_access(env, regno, reg->off, access_size,
3097					   zero_size_allowed);
3098	case PTR_TO_MAP_VALUE:
3099		if (check_map_access_type(env, regno, reg->off, access_size,
3100					  meta && meta->raw_mode ? BPF_WRITE :
3101					  BPF_READ))
3102			return -EACCES;
3103		return check_map_access(env, regno, reg->off, access_size,
3104					zero_size_allowed);
3105	default: /* scalar_value|ptr_to_stack or invalid ptr */
3106		return check_stack_boundary(env, regno, access_size,
3107					    zero_size_allowed, meta);
3108	}
3109}
3110
3111/* Implementation details:
3112 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
3113 * Two bpf_map_lookups (even with the same key) will have different reg->id.
3114 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
3115 * value_or_null->value transition, since the verifier only cares about
3116 * the range of access to valid map value pointer and doesn't care about actual
3117 * address of the map element.
3118 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
3119 * reg->id > 0 after value_or_null->value transition. By doing so
3120 * two bpf_map_lookups will be considered two different pointers that
3121 * point to different bpf_spin_locks.
3122 * The verifier allows taking only one bpf_spin_lock at a time to avoid
3123 * dead-locks.
3124 * Since only one bpf_spin_lock is allowed the checks are simpler than
3125 * reg_is_refcounted() logic. The verifier needs to remember only
3126 * one spin_lock instead of array of acquired_refs.
3127 * cur_state->active_spin_lock remembers which map value element got locked
3128 * and clears it after bpf_spin_unlock.
3129 */
3130static int process_spin_lock(struct bpf_verifier_env *env, int regno,
3131			     bool is_lock)
3132{
3133	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3134	struct bpf_verifier_state *cur = env->cur_state;
3135	bool is_const = tnum_is_const(reg->var_off);
3136	struct bpf_map *map = reg->map_ptr;
3137	u64 val = reg->var_off.value;
3138
3139	if (reg->type != PTR_TO_MAP_VALUE) {
3140		verbose(env, "R%d is not a pointer to map_value\n", regno);
3141		return -EINVAL;
3142	}
3143	if (!is_const) {
3144		verbose(env,
3145			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
3146			regno);
3147		return -EINVAL;
3148	}
3149	if (!map->btf) {
3150		verbose(env,
3151			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
3152			map->name);
3153		return -EINVAL;
3154	}
3155	if (!map_value_has_spin_lock(map)) {
3156		if (map->spin_lock_off == -E2BIG)
3157			verbose(env,
3158				"map '%s' has more than one 'struct bpf_spin_lock'\n",
3159				map->name);
3160		else if (map->spin_lock_off == -ENOENT)
3161			verbose(env,
3162				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
3163				map->name);
3164		else
3165			verbose(env,
3166				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
3167				map->name);
3168		return -EINVAL;
3169	}
3170	if (map->spin_lock_off != val + reg->off) {
3171		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
3172			val + reg->off);
3173		return -EINVAL;
3174	}
3175	if (is_lock) {
3176		if (cur->active_spin_lock) {
3177			verbose(env,
3178				"Locking two bpf_spin_locks are not allowed\n");
3179			return -EINVAL;
3180		}
3181		cur->active_spin_lock = reg->id;
3182	} else {
3183		if (!cur->active_spin_lock) {
3184			verbose(env, "bpf_spin_unlock without taking a lock\n");
3185			return -EINVAL;
3186		}
3187		if (cur->active_spin_lock != reg->id) {
3188			verbose(env, "bpf_spin_unlock of different lock\n");
3189			return -EINVAL;
3190		}
3191		cur->active_spin_lock = 0;
3192	}
3193	return 0;
3194}
3195
3196static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
3197{
3198	return type == ARG_PTR_TO_MEM ||
3199	       type == ARG_PTR_TO_MEM_OR_NULL ||
3200	       type == ARG_PTR_TO_UNINIT_MEM;
3201}
3202
3203static bool arg_type_is_mem_size(enum bpf_arg_type type)
3204{
3205	return type == ARG_CONST_SIZE ||
3206	       type == ARG_CONST_SIZE_OR_ZERO;
3207}
3208
3209static bool arg_type_is_int_ptr(enum bpf_arg_type type)
3210{
3211	return type == ARG_PTR_TO_INT ||
3212	       type == ARG_PTR_TO_LONG;
3213}
3214
3215static int int_ptr_type_to_size(enum bpf_arg_type type)
3216{
3217	if (type == ARG_PTR_TO_INT)
3218		return sizeof(u32);
3219	else if (type == ARG_PTR_TO_LONG)
3220		return sizeof(u64);
3221
3222	return -EINVAL;
3223}
3224
3225static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
3226			  enum bpf_arg_type arg_type,
3227			  struct bpf_call_arg_meta *meta)
3228{
3229	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
3230	enum bpf_reg_type expected_type, type = reg->type;
3231	int err = 0;
3232
3233	if (arg_type == ARG_DONTCARE)
3234		return 0;
3235
3236	err = check_reg_arg(env, regno, SRC_OP);
3237	if (err)
3238		return err;
3239
3240	if (arg_type == ARG_ANYTHING) {
3241		if (is_pointer_value(env, regno)) {
3242			verbose(env, "R%d leaks addr into helper function\n",
3243				regno);
3244			return -EACCES;
3245		}
3246		return 0;
3247	}
3248
3249	if (type_is_pkt_pointer(type) &&
3250	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
3251		verbose(env, "helper access to the packet is not allowed\n");
3252		return -EACCES;
3253	}
3254
3255	if (arg_type == ARG_PTR_TO_MAP_KEY ||
3256	    arg_type == ARG_PTR_TO_MAP_VALUE ||
3257	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
3258	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
3259		expected_type = PTR_TO_STACK;
3260		if (register_is_null(reg) &&
3261		    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
3262			/* final test in check_stack_boundary() */;
3263		else if (!type_is_pkt_pointer(type) &&
3264			 type != PTR_TO_MAP_VALUE &&
3265			 type != expected_type)
3266			goto err_type;
3267	} else if (arg_type == ARG_CONST_SIZE ||
3268		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
3269		expected_type = SCALAR_VALUE;
3270		if (type != expected_type)
3271			goto err_type;
3272	} else if (arg_type == ARG_CONST_MAP_PTR) {
3273		expected_type = CONST_PTR_TO_MAP;
3274		if (type != expected_type)
3275			goto err_type;
3276	} else if (arg_type == ARG_PTR_TO_CTX) {
3277		expected_type = PTR_TO_CTX;
3278		if (type != expected_type)
3279			goto err_type;
3280		err = check_ctx_reg(env, reg, regno);
3281		if (err < 0)
3282			return err;
3283	} else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
3284		expected_type = PTR_TO_SOCK_COMMON;
3285		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
3286		if (!type_is_sk_pointer(type))
3287			goto err_type;
3288		if (reg->ref_obj_id) {
3289			if (meta->ref_obj_id) {
3290				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
3291					regno, reg->ref_obj_id,
3292					meta->ref_obj_id);
3293				return -EFAULT;
3294			}
3295			meta->ref_obj_id = reg->ref_obj_id;
3296		}
3297	} else if (arg_type == ARG_PTR_TO_SOCKET) {
3298		expected_type = PTR_TO_SOCKET;
3299		if (type != expected_type)
3300			goto err_type;
3301	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
3302		if (meta->func_id == BPF_FUNC_spin_lock) {
3303			if (process_spin_lock(env, regno, true))
3304				return -EACCES;
3305		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
3306			if (process_spin_lock(env, regno, false))
3307				return -EACCES;
3308		} else {
3309			verbose(env, "verifier internal error\n");
3310			return -EFAULT;
3311		}
3312	} else if (arg_type_is_mem_ptr(arg_type)) {
3313		expected_type = PTR_TO_STACK;
3314		/* One exception here. In case function allows for NULL to be
3315		 * passed in as argument, it's a SCALAR_VALUE type. Final test
3316		 * happens during stack boundary checking.
3317		 */
3318		if (register_is_null(reg) &&
3319		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
3320			/* final test in check_stack_boundary() */;
3321		else if (!type_is_pkt_pointer(type) &&
3322			 type != PTR_TO_MAP_VALUE &&
3323			 type != expected_type)
3324			goto err_type;
3325		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
3326	} else if (arg_type_is_int_ptr(arg_type)) {
3327		expected_type = PTR_TO_STACK;
3328		if (!type_is_pkt_pointer(type) &&
3329		    type != PTR_TO_MAP_VALUE &&
3330		    type != expected_type)
3331			goto err_type;
3332	} else {
3333		verbose(env, "unsupported arg_type %d\n", arg_type);
3334		return -EFAULT;
3335	}
3336
3337	if (arg_type == ARG_CONST_MAP_PTR) {
3338		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
3339		meta->map_ptr = reg->map_ptr;
3340	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
3341		/* bpf_map_xxx(..., map_ptr, ..., key) call:
3342		 * check that [key, key + map->key_size) are within
3343		 * stack limits and initialized
3344		 */
3345		if (!meta->map_ptr) {
3346			/* in function declaration map_ptr must come before
3347			 * map_key, so that it's verified and known before
3348			 * we have to check map_key here. Otherwise it means
3349			 * that kernel subsystem misconfigured verifier
3350			 */
3351			verbose(env, "invalid map_ptr to access map->key\n");
3352			return -EACCES;
3353		}
3354		err = check_helper_mem_access(env, regno,
3355					      meta->map_ptr->key_size, false,
3356					      NULL);
3357	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
3358		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
3359		    !register_is_null(reg)) ||
3360		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
3361		/* bpf_map_xxx(..., map_ptr, ..., value) call:
3362		 * check [value, value + map->value_size) validity
3363		 */
3364		if (!meta->map_ptr) {
3365			/* kernel subsystem misconfigured verifier */
3366			verbose(env, "invalid map_ptr to access map->value\n");
3367			return -EACCES;
3368		}
3369		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
3370		err = check_helper_mem_access(env, regno,
3371					      meta->map_ptr->value_size, false,
3372					      meta);
3373	} else if (arg_type_is_mem_size(arg_type)) {
3374		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
3375
3376		/* remember the mem_size which may be used later
3377		 * to refine return values.
3378		 */
3379		meta->msize_smax_value = reg->smax_value;
3380		meta->msize_umax_value = reg->umax_value;
3381
3382		/* The register is SCALAR_VALUE; the access check
3383		 * happens using its boundaries.
3384		 */
3385		if (!tnum_is_const(reg->var_off))
3386			/* For unprivileged variable accesses, disable raw
3387			 * mode so that the program is required to
3388			 * initialize all the memory that the helper could
3389			 * just partially fill up.
3390			 */
3391			meta = NULL;
3392
3393		if (reg->smin_value < 0) {
3394			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
3395				regno);
3396			return -EACCES;
3397		}
3398
3399		if (reg->umin_value == 0) {
3400			err = check_helper_mem_access(env, regno - 1, 0,
3401						      zero_size_allowed,
3402						      meta);
3403			if (err)
3404				return err;
3405		}
3406
3407		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
3408			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
3409				regno);
3410			return -EACCES;
3411		}
3412		err = check_helper_mem_access(env, regno - 1,
3413					      reg->umax_value,
3414					      zero_size_allowed, meta);
3415		if (!err)
3416			err = mark_chain_precision(env, regno);
3417	} else if (arg_type_is_int_ptr(arg_type)) {
3418		int size = int_ptr_type_to_size(arg_type);
3419
3420		err = check_helper_mem_access(env, regno, size, false, meta);
3421		if (err)
3422			return err;
3423		err = check_ptr_alignment(env, reg, 0, size, true);
3424	}
3425
3426	return err;
3427err_type:
3428	verbose(env, "R%d type=%s expected=%s\n", regno,
3429		reg_type_str[type], reg_type_str[expected_type]);
3430	return -EACCES;
3431}
3432
3433static int check_map_func_compatibility(struct bpf_verifier_env *env,
3434					struct bpf_map *map, int func_id)
3435{
3436	if (!map)
3437		return 0;
3438
3439	/* We need a two way check, first is from map perspective ... */
3440	switch (map->map_type) {
3441	case BPF_MAP_TYPE_PROG_ARRAY:
3442		if (func_id != BPF_FUNC_tail_call)
3443			goto error;
3444		break;
3445	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
3446		if (func_id != BPF_FUNC_perf_event_read &&
3447		    func_id != BPF_FUNC_perf_event_output &&
3448		    func_id != BPF_FUNC_perf_event_read_value)
3449			goto error;
3450		break;
3451	case BPF_MAP_TYPE_STACK_TRACE:
3452		if (func_id != BPF_FUNC_get_stackid)
3453			goto error;
3454		break;
3455	case BPF_MAP_TYPE_CGROUP_ARRAY:
3456		if (func_id != BPF_FUNC_skb_under_cgroup &&
3457		    func_id != BPF_FUNC_current_task_under_cgroup)
3458			goto error;
3459		break;
3460	case BPF_MAP_TYPE_CGROUP_STORAGE:
3461	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
3462		if (func_id != BPF_FUNC_get_local_storage)
3463			goto error;
3464		break;
3465	case BPF_MAP_TYPE_DEVMAP:
3466	case BPF_MAP_TYPE_DEVMAP_HASH:
3467		if (func_id != BPF_FUNC_redirect_map &&
3468		    func_id != BPF_FUNC_map_lookup_elem)
3469			goto error;
3470		break;
3471	/* Restrict bpf side of cpumap and xskmap, open when use-cases
3472	 * appear.
3473	 */
3474	case BPF_MAP_TYPE_CPUMAP:
3475		if (func_id != BPF_FUNC_redirect_map)
3476			goto error;
3477		break;
3478	case BPF_MAP_TYPE_XSKMAP:
3479		if (func_id != BPF_FUNC_redirect_map &&
3480		    func_id != BPF_FUNC_map_lookup_elem)
3481			goto error;
3482		break;
3483	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
3484	case BPF_MAP_TYPE_HASH_OF_MAPS:
3485		if (func_id != BPF_FUNC_map_lookup_elem)
3486			goto error;
3487		break;
3488	case BPF_MAP_TYPE_SOCKMAP:
3489		if (func_id != BPF_FUNC_sk_redirect_map &&
3490		    func_id != BPF_FUNC_sock_map_update &&
3491		    func_id != BPF_FUNC_map_delete_elem &&
3492		    func_id != BPF_FUNC_msg_redirect_map)
3493			goto error;
3494		break;
3495	case BPF_MAP_TYPE_SOCKHASH:
3496		if (func_id != BPF_FUNC_sk_redirect_hash &&
3497		    func_id != BPF_FUNC_sock_hash_update &&
3498		    func_id != BPF_FUNC_map_delete_elem &&
3499		    func_id != BPF_FUNC_msg_redirect_hash)
3500			goto error;
3501		break;
3502	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
3503		if (func_id != BPF_FUNC_sk_select_reuseport)
3504			goto error;
3505		break;
3506	case BPF_MAP_TYPE_QUEUE:
3507	case BPF_MAP_TYPE_STACK:
3508		if (func_id != BPF_FUNC_map_peek_elem &&
3509		    func_id != BPF_FUNC_map_pop_elem &&
3510		    func_id != BPF_FUNC_map_push_elem)
3511			goto error;
3512		break;
3513	case BPF_MAP_TYPE_SK_STORAGE:
3514		if (func_id != BPF_FUNC_sk_storage_get &&
3515		    func_id != BPF_FUNC_sk_storage_delete)
3516			goto error;
3517		break;
3518	default:
3519		break;
3520	}
3521
3522	/* ... and second from the function itself. */
3523	switch (func_id) {
3524	case BPF_FUNC_tail_call:
3525		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
3526			goto error;
3527		if (env->subprog_cnt > 1) {
3528			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
3529			return -EINVAL;
3530		}
3531		break;
3532	case BPF_FUNC_perf_event_read:
3533	case BPF_FUNC_perf_event_output:
3534	case BPF_FUNC_perf_event_read_value:
3535		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
3536			goto error;
3537		break;
3538	case BPF_FUNC_get_stackid:
3539		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
3540			goto error;
3541		break;
3542	case BPF_FUNC_current_task_under_cgroup:
3543	case BPF_FUNC_skb_under_cgroup:
3544		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
3545			goto error;
3546		break;
3547	case BPF_FUNC_redirect_map:
3548		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
3549		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
3550		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
3551		    map->map_type != BPF_MAP_TYPE_XSKMAP)
3552			goto error;
3553		break;
3554	case BPF_FUNC_sk_redirect_map:
3555	case BPF_FUNC_msg_redirect_map:
3556	case BPF_FUNC_sock_map_update:
3557		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
3558			goto error;
3559		break;
3560	case BPF_FUNC_sk_redirect_hash:
3561	case BPF_FUNC_msg_redirect_hash:
3562	case BPF_FUNC_sock_hash_update:
3563		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
3564			goto error;
3565		break;
3566	case BPF_FUNC_get_local_storage:
3567		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
3568		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
3569			goto error;
3570		break;
3571	case BPF_FUNC_sk_select_reuseport:
3572		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
3573			goto error;
3574		break;
3575	case BPF_FUNC_map_peek_elem:
3576	case BPF_FUNC_map_pop_elem:
3577	case BPF_FUNC_map_push_elem:
3578		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
3579		    map->map_type != BPF_MAP_TYPE_STACK)
3580			goto error;
3581		break;
3582	case BPF_FUNC_sk_storage_get:
3583	case BPF_FUNC_sk_storage_delete:
3584		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
3585			goto error;
3586		break;
3587	default:
3588		break;
3589	}
3590
3591	return 0;
3592error:
3593	verbose(env, "cannot pass map_type %d into func %s#%d\n",
3594		map->map_type, func_id_name(func_id), func_id);
3595	return -EINVAL;
3596}
3597
3598static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
3599{
3600	int count = 0;
3601
3602	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
3603		count++;
3604	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
3605		count++;
3606	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
3607		count++;
3608	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
3609		count++;
3610	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
3611		count++;
3612
3613	/* We only support one arg being in raw mode at the moment,
3614	 * which is sufficient for the helper functions we have
3615	 * right now.
3616	 */
3617	return count <= 1;
3618}
3619
3620static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
3621				    enum bpf_arg_type arg_next)
3622{
3623	return (arg_type_is_mem_ptr(arg_curr) &&
3624	        !arg_type_is_mem_size(arg_next)) ||
3625	       (!arg_type_is_mem_ptr(arg_curr) &&
3626		arg_type_is_mem_size(arg_next));
3627}
3628
3629static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
3630{
3631	/* bpf_xxx(..., buf, len) call will access 'len'
3632	 * bytes from memory 'buf'. Both arg types need
3633	 * to be paired, so make sure there's no buggy
3634	 * helper function specification.
3635	 */
3636	if (arg_type_is_mem_size(fn->arg1_type) ||
3637	    arg_type_is_mem_ptr(fn->arg5_type)  ||
3638	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
3639	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
3640	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
3641	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
3642		return false;
3643
3644	return true;
3645}
3646
3647static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
3648{
3649	int count = 0;
3650
3651	if (arg_type_may_be_refcounted(fn->arg1_type))
3652		count++;
3653	if (arg_type_may_be_refcounted(fn->arg2_type))
3654		count++;
3655	if (arg_type_may_be_refcounted(fn->arg3_type))
3656		count++;
3657	if (arg_type_may_be_refcounted(fn->arg4_type))
3658		count++;
3659	if (arg_type_may_be_refcounted(fn->arg5_type))
3660		count++;
3661
3662	/* A reference acquiring function cannot acquire
3663	 * another refcounted ptr.
3664	 */
3665	if (is_acquire_function(func_id) && count)
3666		return false;
3667
3668	/* We only support one arg being unreferenced at the moment,
3669	 * which is sufficient for the helper functions we have right now.
3670	 */
3671	return count <= 1;
3672}
3673
3674static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
3675{
3676	return check_raw_mode_ok(fn) &&
3677	       check_arg_pair_ok(fn) &&
3678	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
3679}
3680
3681/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
3682 * are now invalid, so turn them into unknown SCALAR_VALUE.
3683 */
3684static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
3685				     struct bpf_func_state *state)
3686{
3687	struct bpf_reg_state *regs = state->regs, *reg;
3688	int i;
3689
3690	for (i = 0; i < MAX_BPF_REG; i++)
3691		if (reg_is_pkt_pointer_any(&regs[i]))
3692			mark_reg_unknown(env, regs, i);
3693
3694	bpf_for_each_spilled_reg(i, state, reg) {
3695		if (!reg)
3696			continue;
3697		if (reg_is_pkt_pointer_any(reg))
3698			__mark_reg_unknown(reg);
3699	}
3700}
3701
3702static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
3703{
3704	struct bpf_verifier_state *vstate = env->cur_state;
3705	int i;
3706
3707	for (i = 0; i <= vstate->curframe; i++)
3708		__clear_all_pkt_pointers(env, vstate->frame[i]);
3709}
3710
3711static void release_reg_references(struct bpf_verifier_env *env,
3712				   struct bpf_func_state *state,
3713				   int ref_obj_id)
3714{
3715	struct bpf_reg_state *regs = state->regs, *reg;
3716	int i;
3717
3718	for (i = 0; i < MAX_BPF_REG; i++)
3719		if (regs[i].ref_obj_id == ref_obj_id)
3720			mark_reg_unknown(env, regs, i);
3721
3722	bpf_for_each_spilled_reg(i, state, reg) {
3723		if (!reg)
3724			continue;
3725		if (reg->ref_obj_id == ref_obj_id)
3726			__mark_reg_unknown(reg);
3727	}
3728}
3729
3730/* The pointer with the specified id has released its reference to kernel
3731 * resources. Identify all copies of the same pointer and clear the reference.
3732 */
3733static int release_reference(struct bpf_verifier_env *env,
3734			     int ref_obj_id)
3735{
3736	struct bpf_verifier_state *vstate = env->cur_state;
3737	int err;
3738	int i;
3739
3740	err = release_reference_state(cur_func(env), ref_obj_id);
3741	if (err)
3742		return err;
3743
3744	for (i = 0; i <= vstate->curframe; i++)
3745		release_reg_references(env, vstate->frame[i], ref_obj_id);
3746
3747	return 0;
3748}
3749
3750static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
3751			   int *insn_idx)
3752{
3753	struct bpf_verifier_state *state = env->cur_state;
3754	struct bpf_func_state *caller, *callee;
3755	int i, err, subprog, target_insn;
3756
3757	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
3758		verbose(env, "the call stack of %d frames is too deep\n",
3759			state->curframe + 2);
3760		return -E2BIG;
3761	}
3762
3763	target_insn = *insn_idx + insn->imm;
3764	subprog = find_subprog(env, target_insn + 1);
3765	if (subprog < 0) {
3766		verbose(env, "verifier bug. No program starts at insn %d\n",
3767			target_insn + 1);
3768		return -EFAULT;
3769	}
3770
3771	caller = state->frame[state->curframe];
3772	if (state->frame[state->curframe + 1]) {
3773		verbose(env, "verifier bug. Frame %d already allocated\n",
3774			state->curframe + 1);
3775		return -EFAULT;
3776	}
3777
3778	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
3779	if (!callee)
3780		return -ENOMEM;
3781	state->frame[state->curframe + 1] = callee;
3782
3783	/* callee cannot access r0, r6 - r9 for reading and has to write
3784	 * into its own stack before reading from it.
3785	 * callee can read/write into caller's stack
3786	 */
3787	init_func_state(env, callee,
3788			/* remember the callsite, it will be used by bpf_exit */
3789			*insn_idx /* callsite */,
3790			state->curframe + 1 /* frameno within this callchain */,
3791			subprog /* subprog number within this prog */);
3792
3793	/* Transfer references to the callee */
3794	err = transfer_reference_state(callee, caller);
3795	if (err)
3796		return err;
3797
3798	/* copy r1 - r5 args that callee can access.  The copy includes parent
3799	 * pointers, which connects us up to the liveness chain
3800	 */
3801	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3802		callee->regs[i] = caller->regs[i];
3803
3804	/* after the call registers r0 - r5 were scratched */
3805	for (i = 0; i < CALLER_SAVED_REGS; i++) {
3806		mark_reg_not_init(env, caller->regs, caller_saved[i]);
3807		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
3808	}
3809
3810	/* only increment it after check_reg_arg() finished */
3811	state->curframe++;
3812
3813	/* and go analyze first insn of the callee */
3814	*insn_idx = target_insn;
3815
3816	if (env->log.level & BPF_LOG_LEVEL) {
3817		verbose(env, "caller:\n");
3818		print_verifier_state(env, caller);
3819		verbose(env, "callee:\n");
3820		print_verifier_state(env, callee);
3821	}
3822	return 0;
3823}
3824
3825static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
3826{
3827	struct bpf_verifier_state *state = env->cur_state;
3828	struct bpf_func_state *caller, *callee;
3829	struct bpf_reg_state *r0;
3830	int err;
3831
3832	callee = state->frame[state->curframe];
3833	r0 = &callee->regs[BPF_REG_0];
3834	if (r0->type == PTR_TO_STACK) {
3835		/* technically it's ok to return caller's stack pointer
3836		 * (or caller's caller's pointer) back to the caller,
3837		 * since these pointers are valid. Only current stack
3838		 * pointer will be invalid as soon as function exits,
3839		 * but let's be conservative
3840		 */
3841		verbose(env, "cannot return stack pointer to the caller\n");
3842		return -EINVAL;
3843	}
3844
3845	state->curframe--;
3846	caller = state->frame[state->curframe];
3847	/* return to the caller whatever r0 had in the callee */
3848	caller->regs[BPF_REG_0] = *r0;
3849
3850	/* Transfer references to the caller */
3851	err = transfer_reference_state(caller, callee);
3852	if (err)
3853		return err;
3854
3855	*insn_idx = callee->callsite + 1;
3856	if (env->log.level & BPF_LOG_LEVEL) {
3857		verbose(env, "returning from callee:\n");
3858		print_verifier_state(env, callee);
3859		verbose(env, "to caller at %d:\n", *insn_idx);
3860		print_verifier_state(env, caller);
3861	}
3862	/* clear everything in the callee */
3863	free_func_state(callee);
3864	state->frame[state->curframe + 1] = NULL;
3865	return 0;
3866}
3867
3868static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
3869				   int func_id,
3870				   struct bpf_call_arg_meta *meta)
3871{
3872	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
3873
3874	if (ret_type != RET_INTEGER ||
3875	    (func_id != BPF_FUNC_get_stack &&
3876	     func_id != BPF_FUNC_probe_read_str))
3877		return;
3878
3879	ret_reg->smax_value = meta->msize_smax_value;
3880	ret_reg->umax_value = meta->msize_umax_value;
3881	__reg_deduce_bounds(ret_reg);
3882	__reg_bound_offset(ret_reg);
3883}
3884
3885static int
3886record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
3887		int func_id, int insn_idx)
3888{
3889	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
3890	struct bpf_map *map = meta->map_ptr;
3891
3892	if (func_id != BPF_FUNC_tail_call &&
3893	    func_id != BPF_FUNC_map_lookup_elem &&
3894	    func_id != BPF_FUNC_map_update_elem &&
3895	    func_id != BPF_FUNC_map_delete_elem &&
3896	    func_id != BPF_FUNC_map_push_elem &&
3897	    func_id != BPF_FUNC_map_pop_elem &&
3898	    func_id != BPF_FUNC_map_peek_elem)
3899		return 0;
3900
3901	if (map == NULL) {
3902		verbose(env, "kernel subsystem misconfigured verifier\n");
3903		return -EINVAL;
3904	}
3905
3906	/* In case of read-only, some additional restrictions
3907	 * need to be applied in order to prevent altering the
3908	 * state of the map from program side.
3909	 */
3910	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
3911	    (func_id == BPF_FUNC_map_delete_elem ||
3912	     func_id == BPF_FUNC_map_update_elem ||
3913	     func_id == BPF_FUNC_map_push_elem ||
3914	     func_id == BPF_FUNC_map_pop_elem)) {
3915		verbose(env, "write into map forbidden\n");
3916		return -EACCES;
3917	}
3918
3919	if (!BPF_MAP_PTR(aux->map_state))
3920		bpf_map_ptr_store(aux, meta->map_ptr,
3921				  meta->map_ptr->unpriv_array);
3922	else if (BPF_MAP_PTR(aux->map_state) != meta->map_ptr)
3923		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
3924				  meta->map_ptr->unpriv_array);
3925	return 0;
3926}
3927
3928static int check_reference_leak(struct bpf_verifier_env *env)
3929{
3930	struct bpf_func_state *state = cur_func(env);
3931	int i;
3932
3933	for (i = 0; i < state->acquired_refs; i++) {
3934		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
3935			state->refs[i].id, state->refs[i].insn_idx);
3936	}
3937	return state->acquired_refs ? -EINVAL : 0;
3938}
3939
3940static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
3941{
3942	const struct bpf_func_proto *fn = NULL;
3943	struct bpf_reg_state *regs;
3944	struct bpf_call_arg_meta meta;
3945	bool changes_data;
3946	int i, err;
3947
3948	/* find function prototype */
3949	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
3950		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
3951			func_id);
3952		return -EINVAL;
3953	}
3954
3955	if (env->ops->get_func_proto)
3956		fn = env->ops->get_func_proto(func_id, env->prog);
3957	if (!fn) {
3958		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
3959			func_id);
3960		return -EINVAL;
3961	}
3962
3963	/* eBPF programs must be GPL compatible to use GPL-ed functions */
3964	if (!env->prog->gpl_compatible && fn->gpl_only) {
3965		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
3966		return -EINVAL;
3967	}
3968
3969	/* With LD_ABS/IND some JITs save/restore skb from r1. */
3970	changes_data = bpf_helper_changes_pkt_data(fn->func);
3971	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
3972		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
3973			func_id_name(func_id), func_id);
3974		return -EINVAL;
3975	}
3976
3977	memset(&meta, 0, sizeof(meta));
3978	meta.pkt_access = fn->pkt_access;
3979
3980	err = check_func_proto(fn, func_id);
3981	if (err) {
3982		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
3983			func_id_name(func_id), func_id);
3984		return err;
3985	}
3986
3987	meta.func_id = func_id;
3988	/* check args */
3989	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
3990	if (err)
3991		return err;
3992	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
3993	if (err)
3994		return err;
3995	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
3996	if (err)
3997		return err;
3998	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
3999	if (err)
4000		return err;
4001	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
4002	if (err)
4003		return err;
4004
4005	err = record_func_map(env, &meta, func_id, insn_idx);
4006	if (err)
4007		return err;
4008
4009	/* Mark slots with STACK_MISC in case of raw mode, stack offset
4010	 * is inferred from register state.
4011	 */
4012	for (i = 0; i < meta.access_size; i++) {
4013		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
4014				       BPF_WRITE, -1, false);
4015		if (err)
4016			return err;
4017	}
4018
4019	if (func_id == BPF_FUNC_tail_call) {
4020		err = check_reference_leak(env);
4021		if (err) {
4022			verbose(env, "tail_call would lead to reference leak\n");
4023			return err;
4024		}
4025	} else if (is_release_function(func_id)) {
4026		err = release_reference(env, meta.ref_obj_id);
4027		if (err) {
4028			verbose(env, "func %s#%d reference has not been acquired before\n",
4029				func_id_name(func_id), func_id);
4030			return err;
4031		}
4032	}
4033
4034	regs = cur_regs(env);
4035
4036	/* check that flags argument in get_local_storage(map, flags) is 0,
4037	 * this is required because get_local_storage() can't return an error.
4038	 */
4039	if (func_id == BPF_FUNC_get_local_storage &&
4040	    !register_is_null(&regs[BPF_REG_2])) {
4041		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
4042		return -EINVAL;
4043	}
4044
4045	/* reset caller saved regs */
4046	for (i = 0; i < CALLER_SAVED_REGS; i++) {
4047		mark_reg_not_init(env, regs, caller_saved[i]);
4048		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
4049	}
4050
4051	/* helper call returns 64-bit value. */
4052	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
4053
4054	/* update return register (already marked as written above) */
4055	if (fn->ret_type == RET_INTEGER) {
4056		/* sets type to SCALAR_VALUE */
4057		mark_reg_unknown(env, regs, BPF_REG_0);
4058	} else if (fn->ret_type == RET_VOID) {
4059		regs[BPF_REG_0].type = NOT_INIT;
4060	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
4061		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
4062		/* There is no offset yet applied, variable or fixed */
4063		mark_reg_known_zero(env, regs, BPF_REG_0);
4064		/* remember map_ptr, so that check_map_access()
4065		 * can check 'value_size' boundary of memory access
4066		 * to map element returned from bpf_map_lookup_elem()
4067		 */
4068		if (meta.map_ptr == NULL) {
4069			verbose(env,
4070				"kernel subsystem misconfigured verifier\n");
4071			return -EINVAL;
4072		}
4073		regs[BPF_REG_0].map_ptr = meta.map_ptr;
4074		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
4075			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
4076			if (map_value_has_spin_lock(meta.map_ptr))
4077				regs[BPF_REG_0].id = ++env->id_gen;
4078		} else {
4079			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
4080			regs[BPF_REG_0].id = ++env->id_gen;
4081		}
4082	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
4083		mark_reg_known_zero(env, regs, BPF_REG_0);
4084		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
4085		regs[BPF_REG_0].id = ++env->id_gen;
4086	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
4087		mark_reg_known_zero(env, regs, BPF_REG_0);
4088		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
4089		regs[BPF_REG_0].id = ++env->id_gen;
4090	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
4091		mark_reg_known_zero(env, regs, BPF_REG_0);
4092		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
4093		regs[BPF_REG_0].id = ++env->id_gen;
4094	} else {
4095		verbose(env, "unknown return type %d of func %s#%d\n",
4096			fn->ret_type, func_id_name(func_id), func_id);
4097		return -EINVAL;
4098	}
4099
4100	if (is_ptr_cast_function(func_id)) {
4101		/* For release_reference() */
4102		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
4103	} else if (is_acquire_function(func_id)) {
4104		int id = acquire_reference_state(env, insn_idx);
4105
4106		if (id < 0)
4107			return id;
4108		/* For mark_ptr_or_null_reg() */
4109		regs[BPF_REG_0].id = id;
4110		/* For release_reference() */
4111		regs[BPF_REG_0].ref_obj_id = id;
4112	}
4113
4114	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
4115
4116	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
4117	if (err)
4118		return err;
4119
4120	if (func_id == BPF_FUNC_get_stack && !env->prog->has_callchain_buf) {
4121		const char *err_str;
4122
4123#ifdef CONFIG_PERF_EVENTS
4124		err = get_callchain_buffers(sysctl_perf_event_max_stack);
4125		err_str = "cannot get callchain buffer for func %s#%d\n";
4126#else
4127		err = -ENOTSUPP;
4128		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
4129#endif
4130		if (err) {
4131			verbose(env, err_str, func_id_name(func_id), func_id);
4132			return err;
4133		}
4134
4135		env->prog->has_callchain_buf = true;
4136	}
4137
4138	if (changes_data)
4139		clear_all_pkt_pointers(env);
4140	return 0;
4141}
4142
4143static bool signed_add_overflows(s64 a, s64 b)
4144{
4145	/* Do the add in u64, where overflow is well-defined */
4146	s64 res = (s64)((u64)a + (u64)b);
4147
4148	if (b < 0)
4149		return res > a;
4150	return res < a;
4151}
4152
4153static bool signed_sub_overflows(s64 a, s64 b)
4154{
4155	/* Do the sub in u64, where overflow is well-defined */
4156	s64 res = (s64)((u64)a - (u64)b);
4157
4158	if (b < 0)
4159		return res < a;
4160	return res > a;
4161}
4162
4163static bool check_reg_sane_offset(struct bpf_verifier_env *env,
4164				  const struct bpf_reg_state *reg,
4165				  enum bpf_reg_type type)
4166{
4167	bool known = tnum_is_const(reg->var_off);
4168	s64 val = reg->var_off.value;
4169	s64 smin = reg->smin_value;
4170
4171	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
4172		verbose(env, "math between %s pointer and %lld is not allowed\n",
4173			reg_type_str[type], val);
4174		return false;
4175	}
4176
4177	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
4178		verbose(env, "%s pointer offset %d is not allowed\n",
4179			reg_type_str[type], reg->off);
4180		return false;
4181	}
4182
4183	if (smin == S64_MIN) {
4184		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
4185			reg_type_str[type]);
4186		return false;
4187	}
4188
4189	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
4190		verbose(env, "value %lld makes %s pointer be out of bounds\n",
4191			smin, reg_type_str[type]);
4192		return false;
4193	}
4194
4195	return true;
4196}
4197
4198static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
4199{
4200	return &env->insn_aux_data[env->insn_idx];
4201}
4202
4203static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
4204			      u32 *ptr_limit, u8 opcode, bool off_is_neg)
4205{
4206	bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
4207			    (opcode == BPF_SUB && !off_is_neg);
4208	u32 off;
4209
4210	switch (ptr_reg->type) {
4211	case PTR_TO_STACK:
4212		/* Indirect variable offset stack access is prohibited in
4213		 * unprivileged mode so it's not handled here.
4214		 */
4215		off = ptr_reg->off + ptr_reg->var_off.value;
4216		if (mask_to_left)
4217			*ptr_limit = MAX_BPF_STACK + off;
4218		else
4219			*ptr_limit = -off;
4220		return 0;
4221	case PTR_TO_MAP_VALUE:
4222		if (mask_to_left) {
4223			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
4224		} else {
4225			off = ptr_reg->smin_value + ptr_reg->off;
4226			*ptr_limit = ptr_reg->map_ptr->value_size - off;
4227		}
4228		return 0;
4229	default:
4230		return -EINVAL;
4231	}
4232}
4233
4234static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
4235				    const struct bpf_insn *insn)
4236{
4237	return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
4238}
4239
4240static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
4241				       u32 alu_state, u32 alu_limit)
4242{
4243	/* If we arrived here from different branches with different
4244	 * state or limits to sanitize, then this won't work.
4245	 */
4246	if (aux->alu_state &&
4247	    (aux->alu_state != alu_state ||
4248	     aux->alu_limit != alu_limit))
4249		return -EACCES;
4250
4251	/* Corresponding fixup done in fixup_bpf_calls(). */
4252	aux->alu_state = alu_state;
4253	aux->alu_limit = alu_limit;
4254	return 0;
4255}
4256
4257static int sanitize_val_alu(struct bpf_verifier_env *env,
4258			    struct bpf_insn *insn)
4259{
4260	struct bpf_insn_aux_data *aux = cur_aux(env);
4261
4262	if (can_skip_alu_sanitation(env, insn))
4263		return 0;
4264
4265	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
4266}
4267
4268static int sanitize_ptr_alu(struct bpf_verifier_env *env,
4269			    struct bpf_insn *insn,
4270			    const struct bpf_reg_state *ptr_reg,
4271			    struct bpf_reg_state *dst_reg,
4272			    bool off_is_neg)
4273{
4274	struct bpf_verifier_state *vstate = env->cur_state;
4275	struct bpf_insn_aux_data *aux = cur_aux(env);
4276	bool ptr_is_dst_reg = ptr_reg == dst_reg;
4277	u8 opcode = BPF_OP(insn->code);
4278	u32 alu_state, alu_limit;
4279	struct bpf_reg_state tmp;
4280	bool ret;
4281
4282	if (can_skip_alu_sanitation(env, insn))
4283		return 0;
4284
4285	/* We already marked aux for masking from non-speculative
4286	 * paths, thus we got here in the first place. We only care
4287	 * to explore bad access from here.
4288	 */
4289	if (vstate->speculative)
4290		goto do_sim;
4291
4292	alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
4293	alu_state |= ptr_is_dst_reg ?
4294		     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
4295
4296	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
4297		return 0;
4298	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
4299		return -EACCES;
4300do_sim:
4301	/* Simulate and find potential out-of-bounds access under
4302	 * speculative execution from truncation as a result of
4303	 * masking when off was not within expected range. If off
4304	 * sits in dst, then we temporarily need to move ptr there
4305	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
4306	 * for cases where we use K-based arithmetic in one direction
4307	 * and truncated reg-based in the other in order to explore
4308	 * bad access.
4309	 */
4310	if (!ptr_is_dst_reg) {
4311		tmp = *dst_reg;
4312		*dst_reg = *ptr_reg;
4313	}
4314	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
4315	if (!ptr_is_dst_reg && ret)
4316		*dst_reg = tmp;
4317	return !ret ? -EFAULT : 0;
4318}
4319
4320/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
4321 * Caller should also handle BPF_MOV case separately.
4322 * If we return -EACCES, caller may want to try again treating pointer as a
4323 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
4324 */
4325static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
4326				   struct bpf_insn *insn,
4327				   const struct bpf_reg_state *ptr_reg,
4328				   const struct bpf_reg_state *off_reg)
4329{
4330	struct bpf_verifier_state *vstate = env->cur_state;
4331	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4332	struct bpf_reg_state *regs = state->regs, *dst_reg;
4333	bool known = tnum_is_const(off_reg->var_off);
4334	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
4335	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
4336	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
4337	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
4338	u32 dst = insn->dst_reg, src = insn->src_reg;
4339	u8 opcode = BPF_OP(insn->code);
4340	int ret;
4341
4342	dst_reg = &regs[dst];
4343
4344	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
4345	    smin_val > smax_val || umin_val > umax_val) {
4346		/* Taint dst register if offset had invalid bounds derived from
4347		 * e.g. dead branches.
4348		 */
4349		__mark_reg_unknown(dst_reg);
4350		return 0;
4351	}
4352
4353	if (BPF_CLASS(insn->code) != BPF_ALU64) {
4354		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
4355		verbose(env,
4356			"R%d 32-bit pointer arithmetic prohibited\n",
4357			dst);
4358		return -EACCES;
4359	}
4360
4361	switch (ptr_reg->type) {
4362	case PTR_TO_MAP_VALUE_OR_NULL:
4363		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
4364			dst, reg_type_str[ptr_reg->type]);
4365		return -EACCES;
4366	case CONST_PTR_TO_MAP:
4367	case PTR_TO_PACKET_END:
4368	case PTR_TO_SOCKET:
4369	case PTR_TO_SOCKET_OR_NULL:
4370	case PTR_TO_SOCK_COMMON:
4371	case PTR_TO_SOCK_COMMON_OR_NULL:
4372	case PTR_TO_TCP_SOCK:
4373	case PTR_TO_TCP_SOCK_OR_NULL:
4374	case PTR_TO_XDP_SOCK:
4375		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
4376			dst, reg_type_str[ptr_reg->type]);
4377		return -EACCES;
4378	case PTR_TO_MAP_VALUE:
4379		if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
4380			verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
4381				off_reg == dst_reg ? dst : src);
4382			return -EACCES;
4383		}
4384		/* fall-through */
4385	default:
4386		break;
4387	}
4388
4389	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
4390	 * The id may be overwritten later if we create a new variable offset.
4391	 */
4392	dst_reg->type = ptr_reg->type;
4393	dst_reg->id = ptr_reg->id;
4394
4395	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
4396	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
4397		return -EINVAL;
4398
4399	switch (opcode) {
4400	case BPF_ADD:
4401		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
4402		if (ret < 0) {
4403			verbose(env, "R%d tried to add from different maps or paths\n", dst);
4404			return ret;
4405		}
4406		/* We can take a fixed offset as long as it doesn't overflow
4407		 * the s32 'off' field
4408		 */
4409		if (known && (ptr_reg->off + smin_val ==
4410			      (s64)(s32)(ptr_reg->off + smin_val))) {
4411			/* pointer += K.  Accumulate it into fixed offset */
4412			dst_reg->smin_value = smin_ptr;
4413			dst_reg->smax_value = smax_ptr;
4414			dst_reg->umin_value = umin_ptr;
4415			dst_reg->umax_value = umax_ptr;
4416			dst_reg->var_off = ptr_reg->var_off;
4417			dst_reg->off = ptr_reg->off + smin_val;
4418			dst_reg->raw = ptr_reg->raw;
4419			break;
4420		}
4421		/* A new variable offset is created.  Note that off_reg->off
4422		 * == 0, since it's a scalar.
4423		 * dst_reg gets the pointer type and since some positive
4424		 * integer value was added to the pointer, give it a new 'id'
4425		 * if it's a PTR_TO_PACKET.
4426		 * this creates a new 'base' pointer, off_reg (variable) gets
4427		 * added into the variable offset, and we copy the fixed offset
4428		 * from ptr_reg.
4429		 */
4430		if (signed_add_overflows(smin_ptr, smin_val) ||
4431		    signed_add_overflows(smax_ptr, smax_val)) {
4432			dst_reg->smin_value = S64_MIN;
4433			dst_reg->smax_value = S64_MAX;
4434		} else {
4435			dst_reg->smin_value = smin_ptr + smin_val;
4436			dst_reg->smax_value = smax_ptr + smax_val;
4437		}
4438		if (umin_ptr + umin_val < umin_ptr ||
4439		    umax_ptr + umax_val < umax_ptr) {
4440			dst_reg->umin_value = 0;
4441			dst_reg->umax_value = U64_MAX;
4442		} else {
4443			dst_reg->umin_value = umin_ptr + umin_val;
4444			dst_reg->umax_value = umax_ptr + umax_val;
4445		}
4446		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
4447		dst_reg->off = ptr_reg->off;
4448		dst_reg->raw = ptr_reg->raw;
4449		if (reg_is_pkt_pointer(ptr_reg)) {
4450			dst_reg->id = ++env->id_gen;
4451			/* something was added to pkt_ptr, set range to zero */
4452			dst_reg->raw = 0;
4453		}
4454		break;
4455	case BPF_SUB:
4456		ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
4457		if (ret < 0) {
4458			verbose(env, "R%d tried to sub from different maps or paths\n", dst);
4459			return ret;
4460		}
4461		if (dst_reg == off_reg) {
4462			/* scalar -= pointer.  Creates an unknown scalar */
4463			verbose(env, "R%d tried to subtract pointer from scalar\n",
4464				dst);
4465			return -EACCES;
4466		}
4467		/* We don't allow subtraction from FP, because (according to
4468		 * test_verifier.c test "invalid fp arithmetic", JITs might not
4469		 * be able to deal with it.
4470		 */
4471		if (ptr_reg->type == PTR_TO_STACK) {
4472			verbose(env, "R%d subtraction from stack pointer prohibited\n",
4473				dst);
4474			return -EACCES;
4475		}
4476		if (known && (ptr_reg->off - smin_val ==
4477			      (s64)(s32)(ptr_reg->off - smin_val))) {
4478			/* pointer -= K.  Subtract it from fixed offset */
4479			dst_reg->smin_value = smin_ptr;
4480			dst_reg->smax_value = smax_ptr;
4481			dst_reg->umin_value = umin_ptr;
4482			dst_reg->umax_value = umax_ptr;
4483			dst_reg->var_off = ptr_reg->var_off;
4484			dst_reg->id = ptr_reg->id;
4485			dst_reg->off = ptr_reg->off - smin_val;
4486			dst_reg->raw = ptr_reg->raw;
4487			break;
4488		}
4489		/* A new variable offset is created.  If the subtrahend is known
4490		 * nonnegative, then any reg->range we had before is still good.
4491		 */
4492		if (signed_sub_overflows(smin_ptr, smax_val) ||
4493		    signed_sub_overflows(smax_ptr, smin_val)) {
4494			/* Overflow possible, we know nothing */
4495			dst_reg->smin_value = S64_MIN;
4496			dst_reg->smax_value = S64_MAX;
4497		} else {
4498			dst_reg->smin_value = smin_ptr - smax_val;
4499			dst_reg->smax_value = smax_ptr - smin_val;
4500		}
4501		if (umin_ptr < umax_val) {
4502			/* Overflow possible, we know nothing */
4503			dst_reg->umin_value = 0;
4504			dst_reg->umax_value = U64_MAX;
4505		} else {
4506			/* Cannot overflow (as long as bounds are consistent) */
4507			dst_reg->umin_value = umin_ptr - umax_val;
4508			dst_reg->umax_value = umax_ptr - umin_val;
4509		}
4510		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
4511		dst_reg->off = ptr_reg->off;
4512		dst_reg->raw = ptr_reg->raw;
4513		if (reg_is_pkt_pointer(ptr_reg)) {
4514			dst_reg->id = ++env->id_gen;
4515			/* something was added to pkt_ptr, set range to zero */
4516			if (smin_val < 0)
4517				dst_reg->raw = 0;
4518		}
4519		break;
4520	case BPF_AND:
4521	case BPF_OR:
4522	case BPF_XOR:
4523		/* bitwise ops on pointers are troublesome, prohibit. */
4524		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
4525			dst, bpf_alu_string[opcode >> 4]);
4526		return -EACCES;
4527	default:
4528		/* other operators (e.g. MUL,LSH) produce non-pointer results */
4529		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
4530			dst, bpf_alu_string[opcode >> 4]);
4531		return -EACCES;
4532	}
4533
4534	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
4535		return -EINVAL;
4536
4537	__update_reg_bounds(dst_reg);
4538	__reg_deduce_bounds(dst_reg);
4539	__reg_bound_offset(dst_reg);
4540
4541	/* For unprivileged we require that resulting offset must be in bounds
4542	 * in order to be able to sanitize access later on.
4543	 */
4544	if (!env->allow_ptr_leaks) {
4545		if (dst_reg->type == PTR_TO_MAP_VALUE &&
4546		    check_map_access(env, dst, dst_reg->off, 1, false)) {
4547			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
4548				"prohibited for !root\n", dst);
4549			return -EACCES;
4550		} else if (dst_reg->type == PTR_TO_STACK &&
4551			   check_stack_access(env, dst_reg, dst_reg->off +
4552					      dst_reg->var_off.value, 1)) {
4553			verbose(env, "R%d stack pointer arithmetic goes out of range, "
4554				"prohibited for !root\n", dst);
4555			return -EACCES;
4556		}
4557	}
4558
4559	return 0;
4560}
4561
4562/* WARNING: This function does calculations on 64-bit values, but the actual
4563 * execution may occur on 32-bit values. Therefore, things like bitshifts
4564 * need extra checks in the 32-bit case.
4565 */
4566static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
4567				      struct bpf_insn *insn,
4568				      struct bpf_reg_state *dst_reg,
4569				      struct bpf_reg_state src_reg)
4570{
4571	struct bpf_reg_state *regs = cur_regs(env);
4572	u8 opcode = BPF_OP(insn->code);
4573	bool src_known, dst_known;
4574	s64 smin_val, smax_val;
4575	u64 umin_val, umax_val;
4576	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
4577	u32 dst = insn->dst_reg;
4578	int ret;
4579
4580	if (insn_bitness == 32) {
4581		/* Relevant for 32-bit RSH: Information can propagate towards
4582		 * LSB, so it isn't sufficient to only truncate the output to
4583		 * 32 bits.
4584		 */
4585		coerce_reg_to_size(dst_reg, 4);
4586		coerce_reg_to_size(&src_reg, 4);
4587	}
4588
4589	smin_val = src_reg.smin_value;
4590	smax_val = src_reg.smax_value;
4591	umin_val = src_reg.umin_value;
4592	umax_val = src_reg.umax_value;
4593	src_known = tnum_is_const(src_reg.var_off);
4594	dst_known = tnum_is_const(dst_reg->var_off);
4595
4596	if ((src_known && (smin_val != smax_val || umin_val != umax_val)) ||
4597	    smin_val > smax_val || umin_val > umax_val) {
4598		/* Taint dst register if offset had invalid bounds derived from
4599		 * e.g. dead branches.
4600		 */
4601		__mark_reg_unknown(dst_reg);
4602		return 0;
4603	}
4604
4605	if (!src_known &&
4606	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
4607		__mark_reg_unknown(dst_reg);
4608		return 0;
4609	}
4610
4611	switch (opcode) {
4612	case BPF_ADD:
4613		ret = sanitize_val_alu(env, insn);
4614		if (ret < 0) {
4615			verbose(env, "R%d tried to add from different pointers or scalars\n", dst);
4616			return ret;
4617		}
4618		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
4619		    signed_add_overflows(dst_reg->smax_value, smax_val)) {
4620			dst_reg->smin_value = S64_MIN;
4621			dst_reg->smax_value = S64_MAX;
4622		} else {
4623			dst_reg->smin_value += smin_val;
4624			dst_reg->smax_value += smax_val;
4625		}
4626		if (dst_reg->umin_value + umin_val < umin_val ||
4627		    dst_reg->umax_value + umax_val < umax_val) {
4628			dst_reg->umin_value = 0;
4629			dst_reg->umax_value = U64_MAX;
4630		} else {
4631			dst_reg->umin_value += umin_val;
4632			dst_reg->umax_value += umax_val;
4633		}
4634		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
4635		break;
4636	case BPF_SUB:
4637		ret = sanitize_val_alu(env, insn);
4638		if (ret < 0) {
4639			verbose(env, "R%d tried to sub from different pointers or scalars\n", dst);
4640			return ret;
4641		}
4642		if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
4643		    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
4644			/* Overflow possible, we know nothing */
4645			dst_reg->smin_value = S64_MIN;
4646			dst_reg->smax_value = S64_MAX;
4647		} else {
4648			dst_reg->smin_value -= smax_val;
4649			dst_reg->smax_value -= smin_val;
4650		}
4651		if (dst_reg->umin_value < umax_val) {
4652			/* Overflow possible, we know nothing */
4653			dst_reg->umin_value = 0;
4654			dst_reg->umax_value = U64_MAX;
4655		} else {
4656			/* Cannot overflow (as long as bounds are consistent) */
4657			dst_reg->umin_value -= umax_val;
4658			dst_reg->umax_value -= umin_val;
4659		}
4660		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
4661		break;
4662	case BPF_MUL:
4663		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
4664		if (smin_val < 0 || dst_reg->smin_value < 0) {
4665			/* Ain't nobody got time to multiply that sign */
4666			__mark_reg_unbounded(dst_reg);
4667			__update_reg_bounds(dst_reg);
4668			break;
4669		}
4670		/* Both values are positive, so we can work with unsigned and
4671		 * copy the result to signed (unless it exceeds S64_MAX).
4672		 */
4673		if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
4674			/* Potential overflow, we know nothing */
4675			__mark_reg_unbounded(dst_reg);
4676			/* (except what we can learn from the var_off) */
4677			__update_reg_bounds(dst_reg);
4678			break;
4679		}
4680		dst_reg->umin_value *= umin_val;
4681		dst_reg->umax_value *= umax_val;
4682		if (dst_reg->umax_value > S64_MAX) {
4683			/* Overflow possible, we know nothing */
4684			dst_reg->smin_value = S64_MIN;
4685			dst_reg->smax_value = S64_MAX;
4686		} else {
4687			dst_reg->smin_value = dst_reg->umin_value;
4688			dst_reg->smax_value = dst_reg->umax_value;
4689		}
4690		break;
4691	case BPF_AND:
4692		if (src_known && dst_known) {
4693			__mark_reg_known(dst_reg, dst_reg->var_off.value &
4694						  src_reg.var_off.value);
4695			break;
4696		}
4697		/* We get our minimum from the var_off, since that's inherently
4698		 * bitwise.  Our maximum is the minimum of the operands' maxima.
4699		 */
4700		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
4701		dst_reg->umin_value = dst_reg->var_off.value;
4702		dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
4703		if (dst_reg->smin_value < 0 || smin_val < 0) {
4704			/* Lose signed bounds when ANDing negative numbers,
4705			 * ain't nobody got time for that.
4706			 */
4707			dst_reg->smin_value = S64_MIN;
4708			dst_reg->smax_value = S64_MAX;
4709		} else {
4710			/* ANDing two positives gives a positive, so safe to
4711			 * cast result into s64.
4712			 */
4713			dst_reg->smin_value = dst_reg->umin_value;
4714			dst_reg->smax_value = dst_reg->umax_value;
4715		}
4716		/* We may learn something more from the var_off */
4717		__update_reg_bounds(dst_reg);
4718		break;
4719	case BPF_OR:
4720		if (src_known && dst_known) {
4721			__mark_reg_known(dst_reg, dst_reg->var_off.value |
4722						  src_reg.var_off.value);
4723			break;
4724		}
4725		/* We get our maximum from the var_off, and our minimum is the
4726		 * maximum of the operands' minima
4727		 */
4728		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
4729		dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
4730		dst_reg->umax_value = dst_reg->var_off.value |
4731				      dst_reg->var_off.mask;
4732		if (dst_reg->smin_value < 0 || smin_val < 0) {
4733			/* Lose signed bounds when ORing negative numbers,
4734			 * ain't nobody got time for that.
4735			 */
4736			dst_reg->smin_value = S64_MIN;
4737			dst_reg->smax_value = S64_MAX;
4738		} else {
4739			/* ORing two positives gives a positive, so safe to
4740			 * cast result into s64.
4741			 */
4742			dst_reg->smin_value = dst_reg->umin_value;
4743			dst_reg->smax_value = dst_reg->umax_value;
4744		}
4745		/* We may learn something more from the var_off */
4746		__update_reg_bounds(dst_reg);
4747		break;
4748	case BPF_LSH:
4749		if (umax_val >= insn_bitness) {
4750			/* Shifts greater than 31 or 63 are undefined.
4751			 * This includes shifts by a negative number.
4752			 */
4753			mark_reg_unknown(env, regs, insn->dst_reg);
4754			break;
4755		}
4756		/* We lose all sign bit information (except what we can pick
4757		 * up from var_off)
4758		 */
4759		dst_reg->smin_value = S64_MIN;
4760		dst_reg->smax_value = S64_MAX;
4761		/* If we might shift our top bit out, then we know nothing */
4762		if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
4763			dst_reg->umin_value = 0;
4764			dst_reg->umax_value = U64_MAX;
4765		} else {
4766			dst_reg->umin_value <<= umin_val;
4767			dst_reg->umax_value <<= umax_val;
4768		}
4769		dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
4770		/* We may learn something more from the var_off */
4771		__update_reg_bounds(dst_reg);
4772		break;
4773	case BPF_RSH:
4774		if (umax_val >= insn_bitness) {
4775			/* Shifts greater than 31 or 63 are undefined.
4776			 * This includes shifts by a negative number.
4777			 */
4778			mark_reg_unknown(env, regs, insn->dst_reg);
4779			break;
4780		}
4781		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
4782		 * be negative, then either:
4783		 * 1) src_reg might be zero, so the sign bit of the result is
4784		 *    unknown, so we lose our signed bounds
4785		 * 2) it's known negative, thus the unsigned bounds capture the
4786		 *    signed bounds
4787		 * 3) the signed bounds cross zero, so they tell us nothing
4788		 *    about the result
4789		 * If the value in dst_reg is known nonnegative, then again the
4790		 * unsigned bounts capture the signed bounds.
4791		 * Thus, in all cases it suffices to blow away our signed bounds
4792		 * and rely on inferring new ones from the unsigned bounds and
4793		 * var_off of the result.
4794		 */
4795		dst_reg->smin_value = S64_MIN;
4796		dst_reg->smax_value = S64_MAX;
4797		dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
4798		dst_reg->umin_value >>= umax_val;
4799		dst_reg->umax_value >>= umin_val;
4800		/* We may learn something more from the var_off */
4801		__update_reg_bounds(dst_reg);
4802		break;
4803	case BPF_ARSH:
4804		if (umax_val >= insn_bitness) {
4805			/* Shifts greater than 31 or 63 are undefined.
4806			 * This includes shifts by a negative number.
4807			 */
4808			mark_reg_unknown(env, regs, insn->dst_reg);
4809			break;
4810		}
4811
4812		/* Upon reaching here, src_known is true and
4813		 * umax_val is equal to umin_val.
4814		 */
4815		dst_reg->smin_value >>= umin_val;
4816		dst_reg->smax_value >>= umin_val;
4817		dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val);
4818
4819		/* blow away the dst_reg umin_value/umax_value and rely on
4820		 * dst_reg var_off to refine the result.
4821		 */
4822		dst_reg->umin_value = 0;
4823		dst_reg->umax_value = U64_MAX;
4824		__update_reg_bounds(dst_reg);
4825		break;
4826	default:
4827		mark_reg_unknown(env, regs, insn->dst_reg);
4828		break;
4829	}
4830
4831	if (BPF_CLASS(insn->code) != BPF_ALU64) {
4832		/* 32-bit ALU ops are (32,32)->32 */
4833		coerce_reg_to_size(dst_reg, 4);
4834	}
4835
4836	__reg_deduce_bounds(dst_reg);
4837	__reg_bound_offset(dst_reg);
4838	return 0;
4839}
4840
4841/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
4842 * and var_off.
4843 */
4844static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
4845				   struct bpf_insn *insn)
4846{
4847	struct bpf_verifier_state *vstate = env->cur_state;
4848	struct bpf_func_state *state = vstate->frame[vstate->curframe];
4849	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
4850	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
4851	u8 opcode = BPF_OP(insn->code);
4852	int err;
4853
4854	dst_reg = &regs[insn->dst_reg];
4855	src_reg = NULL;
4856	if (dst_reg->type != SCALAR_VALUE)
4857		ptr_reg = dst_reg;
4858	if (BPF_SRC(insn->code) == BPF_X) {
4859		src_reg = &regs[insn->src_reg];
4860		if (src_reg->type != SCALAR_VALUE) {
4861			if (dst_reg->type != SCALAR_VALUE) {
4862				/* Combining two pointers by any ALU op yields
4863				 * an arbitrary scalar. Disallow all math except
4864				 * pointer subtraction
4865				 */
4866				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
4867					mark_reg_unknown(env, regs, insn->dst_reg);
4868					return 0;
4869				}
4870				verbose(env, "R%d pointer %s pointer prohibited\n",
4871					insn->dst_reg,
4872					bpf_alu_string[opcode >> 4]);
4873				return -EACCES;
4874			} else {
4875				/* scalar += pointer
4876				 * This is legal, but we have to reverse our
4877				 * src/dest handling in computing the range
4878				 */
4879				err = mark_chain_precision(env, insn->dst_reg);
4880				if (err)
4881					return err;
4882				return adjust_ptr_min_max_vals(env, insn,
4883							       src_reg, dst_reg);
4884			}
4885		} else if (ptr_reg) {
4886			/* pointer += scalar */
4887			err = mark_chain_precision(env, insn->src_reg);
4888			if (err)
4889				return err;
4890			return adjust_ptr_min_max_vals(env, insn,
4891						       dst_reg, src_reg);
4892		}
4893	} else {
4894		/* Pretend the src is a reg with a known value, since we only
4895		 * need to be able to read from this state.
4896		 */
4897		off_reg.type = SCALAR_VALUE;
4898		__mark_reg_known(&off_reg, insn->imm);
4899		src_reg = &off_reg;
4900		if (ptr_reg) /* pointer += K */
4901			return adjust_ptr_min_max_vals(env, insn,
4902						       ptr_reg, src_reg);
4903	}
4904
4905	/* Got here implies adding two SCALAR_VALUEs */
4906	if (WARN_ON_ONCE(ptr_reg)) {
4907		print_verifier_state(env, state);
4908		verbose(env, "verifier internal error: unexpected ptr_reg\n");
4909		return -EINVAL;
4910	}
4911	if (WARN_ON(!src_reg)) {
4912		print_verifier_state(env, state);
4913		verbose(env, "verifier internal error: no src_reg\n");
4914		return -EINVAL;
4915	}
4916	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
4917}
4918
4919/* check validity of 32-bit and 64-bit arithmetic operations */
4920static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
4921{
4922	struct bpf_reg_state *regs = cur_regs(env);
4923	u8 opcode = BPF_OP(insn->code);
4924	int err;
4925
4926	if (opcode == BPF_END || opcode == BPF_NEG) {
4927		if (opcode == BPF_NEG) {
4928			if (BPF_SRC(insn->code) != 0 ||
4929			    insn->src_reg != BPF_REG_0 ||
4930			    insn->off != 0 || insn->imm != 0) {
4931				verbose(env, "BPF_NEG uses reserved fields\n");
4932				return -EINVAL;
4933			}
4934		} else {
4935			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
4936			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
4937			    BPF_CLASS(insn->code) == BPF_ALU64) {
4938				verbose(env, "BPF_END uses reserved fields\n");
4939				return -EINVAL;
4940			}
4941		}
4942
4943		/* check src operand */
4944		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
4945		if (err)
4946			return err;
4947
4948		if (is_pointer_value(env, insn->dst_reg)) {
4949			verbose(env, "R%d pointer arithmetic prohibited\n",
4950				insn->dst_reg);
4951			return -EACCES;
4952		}
4953
4954		/* check dest operand */
4955		err = check_reg_arg(env, insn->dst_reg, DST_OP);
4956		if (err)
4957			return err;
4958
4959	} else if (opcode == BPF_MOV) {
4960
4961		if (BPF_SRC(insn->code) == BPF_X) {
4962			if (insn->imm != 0 || insn->off != 0) {
4963				verbose(env, "BPF_MOV uses reserved fields\n");
4964				return -EINVAL;
4965			}
4966
4967			/* check src operand */
4968			err = check_reg_arg(env, insn->src_reg, SRC_OP);
4969			if (err)
4970				return err;
4971		} else {
4972			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
4973				verbose(env, "BPF_MOV uses reserved fields\n");
4974				return -EINVAL;
4975			}
4976		}
4977
4978		/* check dest operand, mark as required later */
4979		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
4980		if (err)
4981			return err;
4982
4983		if (BPF_SRC(insn->code) == BPF_X) {
4984			struct bpf_reg_state *src_reg = regs + insn->src_reg;
4985			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
4986
4987			if (BPF_CLASS(insn->code) == BPF_ALU64) {
4988				/* case: R1 = R2
4989				 * copy register state to dest reg
4990				 */
4991				*dst_reg = *src_reg;
4992				dst_reg->live |= REG_LIVE_WRITTEN;
4993				dst_reg->subreg_def = DEF_NOT_SUBREG;
4994			} else {
4995				/* R1 = (u32) R2 */
4996				if (is_pointer_value(env, insn->src_reg)) {
4997					verbose(env,
4998						"R%d partial copy of pointer\n",
4999						insn->src_reg);
5000					return -EACCES;
5001				} else if (src_reg->type == SCALAR_VALUE) {
5002					*dst_reg = *src_reg;
5003					dst_reg->live |= REG_LIVE_WRITTEN;
5004					dst_reg->subreg_def = env->insn_idx + 1;
5005				} else {
5006					mark_reg_unknown(env, regs,
5007							 insn->dst_reg);
5008				}
5009				coerce_reg_to_size(dst_reg, 4);
5010			}
5011		} else {
5012			/* case: R = imm
5013			 * remember the value we stored into this reg
5014			 */
5015			/* clear any state __mark_reg_known doesn't set */
5016			mark_reg_unknown(env, regs, insn->dst_reg);
5017			regs[insn->dst_reg].type = SCALAR_VALUE;
5018			if (BPF_CLASS(insn->code) == BPF_ALU64) {
5019				__mark_reg_known(regs + insn->dst_reg,
5020						 insn->imm);
5021			} else {
5022				__mark_reg_known(regs + insn->dst_reg,
5023						 (u32)insn->imm);
5024			}
5025		}
5026
5027	} else if (opcode > BPF_END) {
5028		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
5029		return -EINVAL;
5030
5031	} else {	/* all other ALU ops: and, sub, xor, add, ... */
5032
5033		if (BPF_SRC(insn->code) == BPF_X) {
5034			if (insn->imm != 0 || insn->off != 0) {
5035				verbose(env, "BPF_ALU uses reserved fields\n");
5036				return -EINVAL;
5037			}
5038			/* check src1 operand */
5039			err = check_reg_arg(env, insn->src_reg, SRC_OP);
5040			if (err)
5041				return err;
5042		} else {
5043			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
5044				verbose(env, "BPF_ALU uses reserved fields\n");
5045				return -EINVAL;
5046			}
5047		}
5048
5049		/* check src2 operand */
5050		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5051		if (err)
5052			return err;
5053
5054		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
5055		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
5056			verbose(env, "div by zero\n");
5057			return -EINVAL;
5058		}
5059
5060		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
5061		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
5062			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
5063
5064			if (insn->imm < 0 || insn->imm >= size) {
5065				verbose(env, "invalid shift %d\n", insn->imm);
5066				return -EINVAL;
5067			}
5068		}
5069
5070		/* check dest operand */
5071		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
5072		if (err)
5073			return err;
5074
5075		return adjust_reg_min_max_vals(env, insn);
5076	}
5077
5078	return 0;
5079}
5080
5081static void __find_good_pkt_pointers(struct bpf_func_state *state,
5082				     struct bpf_reg_state *dst_reg,
5083				     enum bpf_reg_type type, u16 new_range)
5084{
5085	struct bpf_reg_state *reg;
5086	int i;
5087
5088	for (i = 0; i < MAX_BPF_REG; i++) {
5089		reg = &state->regs[i];
5090		if (reg->type == type && reg->id == dst_reg->id)
5091			/* keep the maximum range already checked */
5092			reg->range = max(reg->range, new_range);
5093	}
5094
5095	bpf_for_each_spilled_reg(i, state, reg) {
5096		if (!reg)
5097			continue;
5098		if (reg->type == type && reg->id == dst_reg->id)
5099			reg->range = max(reg->range, new_range);
5100	}
5101}
5102
5103static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
5104				   struct bpf_reg_state *dst_reg,
5105				   enum bpf_reg_type type,
5106				   bool range_right_open)
5107{
5108	u16 new_range;
5109	int i;
5110
5111	if (dst_reg->off < 0 ||
5112	    (dst_reg->off == 0 && range_right_open))
5113		/* This doesn't give us any range */
5114		return;
5115
5116	if (dst_reg->umax_value > MAX_PACKET_OFF ||
5117	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
5118		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
5119		 * than pkt_end, but that's because it's also less than pkt.
5120		 */
5121		return;
5122
5123	new_range = dst_reg->off;
5124	if (range_right_open)
5125		new_range--;
5126
5127	/* Examples for register markings:
5128	 *
5129	 * pkt_data in dst register:
5130	 *
5131	 *   r2 = r3;
5132	 *   r2 += 8;
5133	 *   if (r2 > pkt_end) goto <handle exception>
5134	 *   <access okay>
5135	 *
5136	 *   r2 = r3;
5137	 *   r2 += 8;
5138	 *   if (r2 < pkt_end) goto <access okay>
5139	 *   <handle exception>
5140	 *
5141	 *   Where:
5142	 *     r2 == dst_reg, pkt_end == src_reg
5143	 *     r2=pkt(id=n,off=8,r=0)
5144	 *     r3=pkt(id=n,off=0,r=0)
5145	 *
5146	 * pkt_data in src register:
5147	 *
5148	 *   r2 = r3;
5149	 *   r2 += 8;
5150	 *   if (pkt_end >= r2) goto <access okay>
5151	 *   <handle exception>
5152	 *
5153	 *   r2 = r3;
5154	 *   r2 += 8;
5155	 *   if (pkt_end <= r2) goto <handle exception>
5156	 *   <access okay>
5157	 *
5158	 *   Where:
5159	 *     pkt_end == dst_reg, r2 == src_reg
5160	 *     r2=pkt(id=n,off=8,r=0)
5161	 *     r3=pkt(id=n,off=0,r=0)
5162	 *
5163	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
5164	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
5165	 * and [r3, r3 + 8-1) respectively is safe to access depending on
5166	 * the check.
5167	 */
5168
5169	/* If our ids match, then we must have the same max_value.  And we
5170	 * don't care about the other reg's fixed offset, since if it's too big
5171	 * the range won't allow anything.
5172	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
5173	 */
5174	for (i = 0; i <= vstate->curframe; i++)
5175		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
5176					 new_range);
5177}
5178
5179/* compute branch direction of the expression "if (reg opcode val) goto target;"
5180 * and return:
5181 *  1 - branch will be taken and "goto target" will be executed
5182 *  0 - branch will not be taken and fall-through to next insn
5183 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value range [0,10]
5184 */
5185static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
5186			   bool is_jmp32)
5187{
5188	struct bpf_reg_state reg_lo;
5189	s64 sval;
5190
5191	if (__is_pointer_value(false, reg))
5192		return -1;
5193
5194	if (is_jmp32) {
5195		reg_lo = *reg;
5196		reg = &reg_lo;
5197		/* For JMP32, only low 32 bits are compared, coerce_reg_to_size
5198		 * could truncate high bits and update umin/umax according to
5199		 * information of low bits.
5200		 */
5201		coerce_reg_to_size(reg, 4);
5202		/* smin/smax need special handling. For example, after coerce,
5203		 * if smin_value is 0x00000000ffffffffLL, the value is -1 when
5204		 * used as operand to JMP32. It is a negative number from s32's
5205		 * point of view, while it is a positive number when seen as
5206		 * s64. The smin/smax are kept as s64, therefore, when used with
5207		 * JMP32, they need to be transformed into s32, then sign
5208		 * extended back to s64.
5209		 *
5210		 * Also, smin/smax were copied from umin/umax. If umin/umax has
5211		 * different sign bit, then min/max relationship doesn't
5212		 * maintain after casting into s32, for this case, set smin/smax
5213		 * to safest range.
5214		 */
5215		if ((reg->umax_value ^ reg->umin_value) &
5216		    (1ULL << 31)) {
5217			reg->smin_value = S32_MIN;
5218			reg->smax_value = S32_MAX;
5219		}
5220		reg->smin_value = (s64)(s32)reg->smin_value;
5221		reg->smax_value = (s64)(s32)reg->smax_value;
5222
5223		val = (u32)val;
5224		sval = (s64)(s32)val;
5225	} else {
5226		sval = (s64)val;
5227	}
5228
5229	switch (opcode) {
5230	case BPF_JEQ:
5231		if (tnum_is_const(reg->var_off))
5232			return !!tnum_equals_const(reg->var_off, val);
5233		break;
5234	case BPF_JNE:
5235		if (tnum_is_const(reg->var_off))
5236			return !tnum_equals_const(reg->var_off, val);
5237		break;
5238	case BPF_JSET:
5239		if ((~reg->var_off.mask & reg->var_off.value) & val)
5240			return 1;
5241		if (!((reg->var_off.mask | reg->var_off.value) & val))
5242			return 0;
5243		break;
5244	case BPF_JGT:
5245		if (reg->umin_value > val)
5246			return 1;
5247		else if (reg->umax_value <= val)
5248			return 0;
5249		break;
5250	case BPF_JSGT:
5251		if (reg->smin_value > sval)
5252			return 1;
5253		else if (reg->smax_value < sval)
5254			return 0;
5255		break;
5256	case BPF_JLT:
5257		if (reg->umax_value < val)
5258			return 1;
5259		else if (reg->umin_value >= val)
5260			return 0;
5261		break;
5262	case BPF_JSLT:
5263		if (reg->smax_value < sval)
5264			return 1;
5265		else if (reg->smin_value >= sval)
5266			return 0;
5267		break;
5268	case BPF_JGE:
5269		if (reg->umin_value >= val)
5270			return 1;
5271		else if (reg->umax_value < val)
5272			return 0;
5273		break;
5274	case BPF_JSGE:
5275		if (reg->smin_value >= sval)
5276			return 1;
5277		else if (reg->smax_value < sval)
5278			return 0;
5279		break;
5280	case BPF_JLE:
5281		if (reg->umax_value <= val)
5282			return 1;
5283		else if (reg->umin_value > val)
5284			return 0;
5285		break;
5286	case BPF_JSLE:
5287		if (reg->smax_value <= sval)
5288			return 1;
5289		else if (reg->smin_value > sval)
5290			return 0;
5291		break;
5292	}
5293
5294	return -1;
5295}
5296
5297/* Generate min value of the high 32-bit from TNUM info. */
5298static u64 gen_hi_min(struct tnum var)
5299{
5300	return var.value & ~0xffffffffULL;
5301}
5302
5303/* Generate max value of the high 32-bit from TNUM info. */
5304static u64 gen_hi_max(struct tnum var)
5305{
5306	return (var.value | var.mask) & ~0xffffffffULL;
5307}
5308
5309/* Return true if VAL is compared with a s64 sign extended from s32, and they
5310 * are with the same signedness.
5311 */
5312static bool cmp_val_with_extended_s64(s64 sval, struct bpf_reg_state *reg)
5313{
5314	return ((s32)sval >= 0 &&
5315		reg->smin_value >= 0 && reg->smax_value <= S32_MAX) ||
5316	       ((s32)sval < 0 &&
5317		reg->smax_value <= 0 && reg->smin_value >= S32_MIN);
5318}
5319
5320/* Adjusts the register min/max values in the case that the dst_reg is the
5321 * variable register that we are working on, and src_reg is a constant or we're
5322 * simply doing a BPF_K check.
5323 * In JEQ/JNE cases we also adjust the var_off values.
5324 */
5325static void reg_set_min_max(struct bpf_reg_state *true_reg,
5326			    struct bpf_reg_state *false_reg, u64 val,
5327			    u8 opcode, bool is_jmp32)
5328{
5329	s64 sval;
5330
5331	/* If the dst_reg is a pointer, we can't learn anything about its
5332	 * variable offset from the compare (unless src_reg were a pointer into
5333	 * the same object, but we don't bother with that.
5334	 * Since false_reg and true_reg have the same type by construction, we
5335	 * only need to check one of them for pointerness.
5336	 */
5337	if (__is_pointer_value(false, false_reg))
5338		return;
5339
5340	val = is_jmp32 ? (u32)val : val;
5341	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
5342
5343	switch (opcode) {
5344	case BPF_JEQ:
5345	case BPF_JNE:
5346	{
5347		struct bpf_reg_state *reg =
5348			opcode == BPF_JEQ ? true_reg : false_reg;
5349
5350		/* For BPF_JEQ, if this is false we know nothing Jon Snow, but
5351		 * if it is true we know the value for sure. Likewise for
5352		 * BPF_JNE.
5353		 */
5354		if (is_jmp32) {
5355			u64 old_v = reg->var_off.value;
5356			u64 hi_mask = ~0xffffffffULL;
5357
5358			reg->var_off.value = (old_v & hi_mask) | val;
5359			reg->var_off.mask &= hi_mask;
5360		} else {
5361			__mark_reg_known(reg, val);
5362		}
5363		break;
5364	}
5365	case BPF_JSET:
5366		false_reg->var_off = tnum_and(false_reg->var_off,
5367					      tnum_const(~val));
5368		if (is_power_of_2(val))
5369			true_reg->var_off = tnum_or(true_reg->var_off,
5370						    tnum_const(val));
5371		break;
5372	case BPF_JGE:
5373	case BPF_JGT:
5374	{
5375		u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
5376		u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
5377
5378		if (is_jmp32) {
5379			false_umax += gen_hi_max(false_reg->var_off);
5380			true_umin += gen_hi_min(true_reg->var_off);
5381		}
5382		false_reg->umax_value = min(false_reg->umax_value, false_umax);
5383		true_reg->umin_value = max(true_reg->umin_value, true_umin);
5384		break;
5385	}
5386	case BPF_JSGE:
5387	case BPF_JSGT:
5388	{
5389		s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
5390		s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
5391
5392		/* If the full s64 was not sign-extended from s32 then don't
5393		 * deduct further info.
5394		 */
5395		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5396			break;
5397		false_reg->smax_value = min(false_reg->smax_value, false_smax);
5398		true_reg->smin_value = max(true_reg->smin_value, true_smin);
5399		break;
5400	}
5401	case BPF_JLE:
5402	case BPF_JLT:
5403	{
5404		u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
5405		u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
5406
5407		if (is_jmp32) {
5408			false_umin += gen_hi_min(false_reg->var_off);
5409			true_umax += gen_hi_max(true_reg->var_off);
5410		}
5411		false_reg->umin_value = max(false_reg->umin_value, false_umin);
5412		true_reg->umax_value = min(true_reg->umax_value, true_umax);
5413		break;
5414	}
5415	case BPF_JSLE:
5416	case BPF_JSLT:
5417	{
5418		s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
5419		s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
5420
5421		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5422			break;
5423		false_reg->smin_value = max(false_reg->smin_value, false_smin);
5424		true_reg->smax_value = min(true_reg->smax_value, true_smax);
5425		break;
5426	}
5427	default:
5428		break;
5429	}
5430
5431	__reg_deduce_bounds(false_reg);
5432	__reg_deduce_bounds(true_reg);
5433	/* We might have learned some bits from the bounds. */
5434	__reg_bound_offset(false_reg);
5435	__reg_bound_offset(true_reg);
5436	/* Intersecting with the old var_off might have improved our bounds
5437	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
5438	 * then new var_off is (0; 0x7f...fc) which improves our umax.
5439	 */
5440	__update_reg_bounds(false_reg);
5441	__update_reg_bounds(true_reg);
5442}
5443
5444/* Same as above, but for the case that dst_reg holds a constant and src_reg is
5445 * the variable reg.
5446 */
5447static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
5448				struct bpf_reg_state *false_reg, u64 val,
5449				u8 opcode, bool is_jmp32)
5450{
5451	s64 sval;
5452
5453	if (__is_pointer_value(false, false_reg))
5454		return;
5455
5456	val = is_jmp32 ? (u32)val : val;
5457	sval = is_jmp32 ? (s64)(s32)val : (s64)val;
5458
5459	switch (opcode) {
5460	case BPF_JEQ:
5461	case BPF_JNE:
5462	{
5463		struct bpf_reg_state *reg =
5464			opcode == BPF_JEQ ? true_reg : false_reg;
5465
5466		if (is_jmp32) {
5467			u64 old_v = reg->var_off.value;
5468			u64 hi_mask = ~0xffffffffULL;
5469
5470			reg->var_off.value = (old_v & hi_mask) | val;
5471			reg->var_off.mask &= hi_mask;
5472		} else {
5473			__mark_reg_known(reg, val);
5474		}
5475		break;
5476	}
5477	case BPF_JSET:
5478		false_reg->var_off = tnum_and(false_reg->var_off,
5479					      tnum_const(~val));
5480		if (is_power_of_2(val))
5481			true_reg->var_off = tnum_or(true_reg->var_off,
5482						    tnum_const(val));
5483		break;
5484	case BPF_JGE:
5485	case BPF_JGT:
5486	{
5487		u64 false_umin = opcode == BPF_JGT ? val    : val + 1;
5488		u64 true_umax = opcode == BPF_JGT ? val - 1 : val;
5489
5490		if (is_jmp32) {
5491			false_umin += gen_hi_min(false_reg->var_off);
5492			true_umax += gen_hi_max(true_reg->var_off);
5493		}
5494		false_reg->umin_value = max(false_reg->umin_value, false_umin);
5495		true_reg->umax_value = min(true_reg->umax_value, true_umax);
5496		break;
5497	}
5498	case BPF_JSGE:
5499	case BPF_JSGT:
5500	{
5501		s64 false_smin = opcode == BPF_JSGT ? sval    : sval + 1;
5502		s64 true_smax = opcode == BPF_JSGT ? sval - 1 : sval;
5503
5504		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5505			break;
5506		false_reg->smin_value = max(false_reg->smin_value, false_smin);
5507		true_reg->smax_value = min(true_reg->smax_value, true_smax);
5508		break;
5509	}
5510	case BPF_JLE:
5511	case BPF_JLT:
5512	{
5513		u64 false_umax = opcode == BPF_JLT ? val    : val - 1;
5514		u64 true_umin = opcode == BPF_JLT ? val + 1 : val;
5515
5516		if (is_jmp32) {
5517			false_umax += gen_hi_max(false_reg->var_off);
5518			true_umin += gen_hi_min(true_reg->var_off);
5519		}
5520		false_reg->umax_value = min(false_reg->umax_value, false_umax);
5521		true_reg->umin_value = max(true_reg->umin_value, true_umin);
5522		break;
5523	}
5524	case BPF_JSLE:
5525	case BPF_JSLT:
5526	{
5527		s64 false_smax = opcode == BPF_JSLT ? sval    : sval - 1;
5528		s64 true_smin = opcode == BPF_JSLT ? sval + 1 : sval;
5529
5530		if (is_jmp32 && !cmp_val_with_extended_s64(sval, false_reg))
5531			break;
5532		false_reg->smax_value = min(false_reg->smax_value, false_smax);
5533		true_reg->smin_value = max(true_reg->smin_value, true_smin);
5534		break;
5535	}
5536	default:
5537		break;
5538	}
5539
5540	__reg_deduce_bounds(false_reg);
5541	__reg_deduce_bounds(true_reg);
5542	/* We might have learned some bits from the bounds. */
5543	__reg_bound_offset(false_reg);
5544	__reg_bound_offset(true_reg);
5545	/* Intersecting with the old var_off might have improved our bounds
5546	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
5547	 * then new var_off is (0; 0x7f...fc) which improves our umax.
5548	 */
5549	__update_reg_bounds(false_reg);
5550	__update_reg_bounds(true_reg);
5551}
5552
5553/* Regs are known to be equal, so intersect their min/max/var_off */
5554static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
5555				  struct bpf_reg_state *dst_reg)
5556{
5557	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
5558							dst_reg->umin_value);
5559	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
5560							dst_reg->umax_value);
5561	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
5562							dst_reg->smin_value);
5563	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
5564							dst_reg->smax_value);
5565	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
5566							     dst_reg->var_off);
5567	/* We might have learned new bounds from the var_off. */
5568	__update_reg_bounds(src_reg);
5569	__update_reg_bounds(dst_reg);
5570	/* We might have learned something about the sign bit. */
5571	__reg_deduce_bounds(src_reg);
5572	__reg_deduce_bounds(dst_reg);
5573	/* We might have learned some bits from the bounds. */
5574	__reg_bound_offset(src_reg);
5575	__reg_bound_offset(dst_reg);
5576	/* Intersecting with the old var_off might have improved our bounds
5577	 * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
5578	 * then new var_off is (0; 0x7f...fc) which improves our umax.
5579	 */
5580	__update_reg_bounds(src_reg);
5581	__update_reg_bounds(dst_reg);
5582}
5583
5584static void reg_combine_min_max(struct bpf_reg_state *true_src,
5585				struct bpf_reg_state *true_dst,
5586				struct bpf_reg_state *false_src,
5587				struct bpf_reg_state *false_dst,
5588				u8 opcode)
5589{
5590	switch (opcode) {
5591	case BPF_JEQ:
5592		__reg_combine_min_max(true_src, true_dst);
5593		break;
5594	case BPF_JNE:
5595		__reg_combine_min_max(false_src, false_dst);
5596		break;
5597	}
5598}
5599
5600static void mark_ptr_or_null_reg(struct bpf_func_state *state,
5601				 struct bpf_reg_state *reg, u32 id,
5602				 bool is_null)
5603{
5604	if (reg_type_may_be_null(reg->type) && reg->id == id) {
5605		/* Old offset (both fixed and variable parts) should
5606		 * have been known-zero, because we don't allow pointer
5607		 * arithmetic on pointers that might be NULL.
5608		 */
5609		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
5610				 !tnum_equals_const(reg->var_off, 0) ||
5611				 reg->off)) {
5612			__mark_reg_known_zero(reg);
5613			reg->off = 0;
5614		}
5615		if (is_null) {
5616			reg->type = SCALAR_VALUE;
5617		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
5618			if (reg->map_ptr->inner_map_meta) {
5619				reg->type = CONST_PTR_TO_MAP;
5620				reg->map_ptr = reg->map_ptr->inner_map_meta;
5621			} else if (reg->map_ptr->map_type ==
5622				   BPF_MAP_TYPE_XSKMAP) {
5623				reg->type = PTR_TO_XDP_SOCK;
5624			} else {
5625				reg->type = PTR_TO_MAP_VALUE;
5626			}
5627		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
5628			reg->type = PTR_TO_SOCKET;
5629		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
5630			reg->type = PTR_TO_SOCK_COMMON;
5631		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
5632			reg->type = PTR_TO_TCP_SOCK;
5633		}
5634		if (is_null) {
5635			/* We don't need id and ref_obj_id from this point
5636			 * onwards anymore, thus we should better reset it,
5637			 * so that state pruning has chances to take effect.
5638			 */
5639			reg->id = 0;
5640			reg->ref_obj_id = 0;
5641		} else if (!reg_may_point_to_spin_lock(reg)) {
5642			/* For not-NULL ptr, reg->ref_obj_id will be reset
5643			 * in release_reg_references().
5644			 *
5645			 * reg->id is still used by spin_lock ptr. Other
5646			 * than spin_lock ptr type, reg->id can be reset.
5647			 */
5648			reg->id = 0;
5649		}
5650	}
5651}
5652
5653static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
5654				    bool is_null)
5655{
5656	struct bpf_reg_state *reg;
5657	int i;
5658
5659	for (i = 0; i < MAX_BPF_REG; i++)
5660		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
5661
5662	bpf_for_each_spilled_reg(i, state, reg) {
5663		if (!reg)
5664			continue;
5665		mark_ptr_or_null_reg(state, reg, id, is_null);
5666	}
5667}
5668
5669/* The logic is similar to find_good_pkt_pointers(), both could eventually
5670 * be folded together at some point.
5671 */
5672static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
5673				  bool is_null)
5674{
5675	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5676	struct bpf_reg_state *regs = state->regs;
5677	u32 ref_obj_id = regs[regno].ref_obj_id;
5678	u32 id = regs[regno].id;
5679	int i;
5680
5681	if (ref_obj_id && ref_obj_id == id && is_null)
5682		/* regs[regno] is in the " == NULL" branch.
5683		 * No one could have freed the reference state before
5684		 * doing the NULL check.
5685		 */
5686		WARN_ON_ONCE(release_reference_state(state, id));
5687
5688	for (i = 0; i <= vstate->curframe; i++)
5689		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
5690}
5691
5692static bool try_match_pkt_pointers(const struct bpf_insn *insn,
5693				   struct bpf_reg_state *dst_reg,
5694				   struct bpf_reg_state *src_reg,
5695				   struct bpf_verifier_state *this_branch,
5696				   struct bpf_verifier_state *other_branch)
5697{
5698	if (BPF_SRC(insn->code) != BPF_X)
5699		return false;
5700
5701	/* Pointers are always 64-bit. */
5702	if (BPF_CLASS(insn->code) == BPF_JMP32)
5703		return false;
5704
5705	switch (BPF_OP(insn->code)) {
5706	case BPF_JGT:
5707		if ((dst_reg->type == PTR_TO_PACKET &&
5708		     src_reg->type == PTR_TO_PACKET_END) ||
5709		    (dst_reg->type == PTR_TO_PACKET_META &&
5710		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
5711			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
5712			find_good_pkt_pointers(this_branch, dst_reg,
5713					       dst_reg->type, false);
5714		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
5715			    src_reg->type == PTR_TO_PACKET) ||
5716			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
5717			    src_reg->type == PTR_TO_PACKET_META)) {
5718			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
5719			find_good_pkt_pointers(other_branch, src_reg,
5720					       src_reg->type, true);
5721		} else {
5722			return false;
5723		}
5724		break;
5725	case BPF_JLT:
5726		if ((dst_reg->type == PTR_TO_PACKET &&
5727		     src_reg->type == PTR_TO_PACKET_END) ||
5728		    (dst_reg->type == PTR_TO_PACKET_META &&
5729		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
5730			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
5731			find_good_pkt_pointers(other_branch, dst_reg,
5732					       dst_reg->type, true);
5733		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
5734			    src_reg->type == PTR_TO_PACKET) ||
5735			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
5736			    src_reg->type == PTR_TO_PACKET_META)) {
5737			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
5738			find_good_pkt_pointers(this_branch, src_reg,
5739					       src_reg->type, false);
5740		} else {
5741			return false;
5742		}
5743		break;
5744	case BPF_JGE:
5745		if ((dst_reg->type == PTR_TO_PACKET &&
5746		     src_reg->type == PTR_TO_PACKET_END) ||
5747		    (dst_reg->type == PTR_TO_PACKET_META &&
5748		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
5749			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
5750			find_good_pkt_pointers(this_branch, dst_reg,
5751					       dst_reg->type, true);
5752		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
5753			    src_reg->type == PTR_TO_PACKET) ||
5754			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
5755			    src_reg->type == PTR_TO_PACKET_META)) {
5756			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
5757			find_good_pkt_pointers(other_branch, src_reg,
5758					       src_reg->type, false);
5759		} else {
5760			return false;
5761		}
5762		break;
5763	case BPF_JLE:
5764		if ((dst_reg->type == PTR_TO_PACKET &&
5765		     src_reg->type == PTR_TO_PACKET_END) ||
5766		    (dst_reg->type == PTR_TO_PACKET_META &&
5767		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
5768			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
5769			find_good_pkt_pointers(other_branch, dst_reg,
5770					       dst_reg->type, false);
5771		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
5772			    src_reg->type == PTR_TO_PACKET) ||
5773			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
5774			    src_reg->type == PTR_TO_PACKET_META)) {
5775			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
5776			find_good_pkt_pointers(this_branch, src_reg,
5777					       src_reg->type, true);
5778		} else {
5779			return false;
5780		}
5781		break;
5782	default:
5783		return false;
5784	}
5785
5786	return true;
5787}
5788
5789static int check_cond_jmp_op(struct bpf_verifier_env *env,
5790			     struct bpf_insn *insn, int *insn_idx)
5791{
5792	struct bpf_verifier_state *this_branch = env->cur_state;
5793	struct bpf_verifier_state *other_branch;
5794	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
5795	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
5796	u8 opcode = BPF_OP(insn->code);
5797	bool is_jmp32;
5798	int pred = -1;
5799	int err;
5800
5801	/* Only conditional jumps are expected to reach here. */
5802	if (opcode == BPF_JA || opcode > BPF_JSLE) {
5803		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
5804		return -EINVAL;
5805	}
5806
5807	if (BPF_SRC(insn->code) == BPF_X) {
5808		if (insn->imm != 0) {
5809			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
5810			return -EINVAL;
5811		}
5812
5813		/* check src1 operand */
5814		err = check_reg_arg(env, insn->src_reg, SRC_OP);
5815		if (err)
5816			return err;
5817
5818		if (is_pointer_value(env, insn->src_reg)) {
5819			verbose(env, "R%d pointer comparison prohibited\n",
5820				insn->src_reg);
5821			return -EACCES;
5822		}
5823		src_reg = &regs[insn->src_reg];
5824	} else {
5825		if (insn->src_reg != BPF_REG_0) {
5826			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
5827			return -EINVAL;
5828		}
5829	}
5830
5831	/* check src2 operand */
5832	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
5833	if (err)
5834		return err;
5835
5836	dst_reg = &regs[insn->dst_reg];
5837	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
5838
5839	if (BPF_SRC(insn->code) == BPF_K)
5840		pred = is_branch_taken(dst_reg, insn->imm,
5841				       opcode, is_jmp32);
5842	else if (src_reg->type == SCALAR_VALUE &&
5843		 tnum_is_const(src_reg->var_off))
5844		pred = is_branch_taken(dst_reg, src_reg->var_off.value,
5845				       opcode, is_jmp32);
5846	if (pred >= 0) {
5847		err = mark_chain_precision(env, insn->dst_reg);
5848		if (BPF_SRC(insn->code) == BPF_X && !err)
5849			err = mark_chain_precision(env, insn->src_reg);
5850		if (err)
5851			return err;
5852	}
5853	if (pred == 1) {
5854		/* only follow the goto, ignore fall-through */
5855		*insn_idx += insn->off;
5856		return 0;
5857	} else if (pred == 0) {
5858		/* only follow fall-through branch, since
5859		 * that's where the program will go
5860		 */
5861		return 0;
5862	}
5863
5864	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
5865				  false);
5866	if (!other_branch)
5867		return -EFAULT;
5868	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
5869
5870	/* detect if we are comparing against a constant value so we can adjust
5871	 * our min/max values for our dst register.
5872	 * this is only legit if both are scalars (or pointers to the same
5873	 * object, I suppose, but we don't support that right now), because
5874	 * otherwise the different base pointers mean the offsets aren't
5875	 * comparable.
5876	 */
5877	if (BPF_SRC(insn->code) == BPF_X) {
5878		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
5879		struct bpf_reg_state lo_reg0 = *dst_reg;
5880		struct bpf_reg_state lo_reg1 = *src_reg;
5881		struct bpf_reg_state *src_lo, *dst_lo;
5882
5883		dst_lo = &lo_reg0;
5884		src_lo = &lo_reg1;
5885		coerce_reg_to_size(dst_lo, 4);
5886		coerce_reg_to_size(src_lo, 4);
5887
5888		if (dst_reg->type == SCALAR_VALUE &&
5889		    src_reg->type == SCALAR_VALUE) {
5890			if (tnum_is_const(src_reg->var_off) ||
5891			    (is_jmp32 && tnum_is_const(src_lo->var_off)))
5892				reg_set_min_max(&other_branch_regs[insn->dst_reg],
5893						dst_reg,
5894						is_jmp32
5895						? src_lo->var_off.value
5896						: src_reg->var_off.value,
5897						opcode, is_jmp32);
5898			else if (tnum_is_const(dst_reg->var_off) ||
5899				 (is_jmp32 && tnum_is_const(dst_lo->var_off)))
5900				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
5901						    src_reg,
5902						    is_jmp32
5903						    ? dst_lo->var_off.value
5904						    : dst_reg->var_off.value,
5905						    opcode, is_jmp32);
5906			else if (!is_jmp32 &&
5907				 (opcode == BPF_JEQ || opcode == BPF_JNE))
5908				/* Comparing for equality, we can combine knowledge */
5909				reg_combine_min_max(&other_branch_regs[insn->src_reg],
5910						    &other_branch_regs[insn->dst_reg],
5911						    src_reg, dst_reg, opcode);
5912		}
5913	} else if (dst_reg->type == SCALAR_VALUE) {
5914		reg_set_min_max(&other_branch_regs[insn->dst_reg],
5915					dst_reg, insn->imm, opcode, is_jmp32);
5916	}
5917
5918	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
5919	 * NOTE: these optimizations below are related with pointer comparison
5920	 *       which will never be JMP32.
5921	 */
5922	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
5923	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
5924	    reg_type_may_be_null(dst_reg->type)) {
5925		/* Mark all identical registers in each branch as either
5926		 * safe or unknown depending R == 0 or R != 0 conditional.
5927		 */
5928		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
5929				      opcode == BPF_JNE);
5930		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
5931				      opcode == BPF_JEQ);
5932	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
5933					   this_branch, other_branch) &&
5934		   is_pointer_value(env, insn->dst_reg)) {
5935		verbose(env, "R%d pointer comparison prohibited\n",
5936			insn->dst_reg);
5937		return -EACCES;
5938	}
5939	if (env->log.level & BPF_LOG_LEVEL)
5940		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
5941	return 0;
5942}
5943
5944/* verify BPF_LD_IMM64 instruction */
5945static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
5946{
5947	struct bpf_insn_aux_data *aux = cur_aux(env);
5948	struct bpf_reg_state *regs = cur_regs(env);
5949	struct bpf_map *map;
5950	int err;
5951
5952	if (BPF_SIZE(insn->code) != BPF_DW) {
5953		verbose(env, "invalid BPF_LD_IMM insn\n");
5954		return -EINVAL;
5955	}
5956	if (insn->off != 0) {
5957		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
5958		return -EINVAL;
5959	}
5960
5961	err = check_reg_arg(env, insn->dst_reg, DST_OP);
5962	if (err)
5963		return err;
5964
5965	if (insn->src_reg == 0) {
5966		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
5967
5968		regs[insn->dst_reg].type = SCALAR_VALUE;
5969		__mark_reg_known(&regs[insn->dst_reg], imm);
5970		return 0;
5971	}
5972
5973	map = env->used_maps[aux->map_index];
5974	mark_reg_known_zero(env, regs, insn->dst_reg);
5975	regs[insn->dst_reg].map_ptr = map;
5976
5977	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
5978		regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
5979		regs[insn->dst_reg].off = aux->map_off;
5980		if (map_value_has_spin_lock(map))
5981			regs[insn->dst_reg].id = ++env->id_gen;
5982	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
5983		regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
5984	} else {
5985		verbose(env, "bpf verifier is misconfigured\n");
5986		return -EINVAL;
5987	}
5988
5989	return 0;
5990}
5991
5992static bool may_access_skb(enum bpf_prog_type type)
5993{
5994	switch (type) {
5995	case BPF_PROG_TYPE_SOCKET_FILTER:
5996	case BPF_PROG_TYPE_SCHED_CLS:
5997	case BPF_PROG_TYPE_SCHED_ACT:
5998		return true;
5999	default:
6000		return false;
6001	}
6002}
6003
6004/* verify safety of LD_ABS|LD_IND instructions:
6005 * - they can only appear in the programs where ctx == skb
6006 * - since they are wrappers of function calls, they scratch R1-R5 registers,
6007 *   preserve R6-R9, and store return value into R0
6008 *
6009 * Implicit input:
6010 *   ctx == skb == R6 == CTX
6011 *
6012 * Explicit input:
6013 *   SRC == any register
6014 *   IMM == 32-bit immediate
6015 *
6016 * Output:
6017 *   R0 - 8/16/32-bit skb data converted to cpu endianness
6018 */
6019static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
6020{
6021	struct bpf_reg_state *regs = cur_regs(env);
6022	u8 mode = BPF_MODE(insn->code);
6023	int i, err;
6024
6025	if (!may_access_skb(env->prog->type)) {
6026		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
6027		return -EINVAL;
6028	}
6029
6030	if (!env->ops->gen_ld_abs) {
6031		verbose(env, "bpf verifier is misconfigured\n");
6032		return -EINVAL;
6033	}
6034
6035	if (env->subprog_cnt > 1) {
6036		/* when program has LD_ABS insn JITs and interpreter assume
6037		 * that r1 == ctx == skb which is not the case for callees
6038		 * that can have arbitrary arguments. It's problematic
6039		 * for main prog as well since JITs would need to analyze
6040		 * all functions in order to make proper register save/restore
6041		 * decisions in the main prog. Hence disallow LD_ABS with calls
6042		 */
6043		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
6044		return -EINVAL;
6045	}
6046
6047	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
6048	    BPF_SIZE(insn->code) == BPF_DW ||
6049	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
6050		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
6051		return -EINVAL;
6052	}
6053
6054	/* check whether implicit source operand (register R6) is readable */
6055	err = check_reg_arg(env, BPF_REG_6, SRC_OP);
6056	if (err)
6057		return err;
6058
6059	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
6060	 * gen_ld_abs() may terminate the program at runtime, leading to
6061	 * reference leak.
6062	 */
6063	err = check_reference_leak(env);
6064	if (err) {
6065		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
6066		return err;
6067	}
6068
6069	if (env->cur_state->active_spin_lock) {
6070		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
6071		return -EINVAL;
6072	}
6073
6074	if (regs[BPF_REG_6].type != PTR_TO_CTX) {
6075		verbose(env,
6076			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
6077		return -EINVAL;
6078	}
6079
6080	if (mode == BPF_IND) {
6081		/* check explicit source operand */
6082		err = check_reg_arg(env, insn->src_reg, SRC_OP);
6083		if (err)
6084			return err;
6085	}
6086
6087	/* reset caller saved regs to unreadable */
6088	for (i = 0; i < CALLER_SAVED_REGS; i++) {
6089		mark_reg_not_init(env, regs, caller_saved[i]);
6090		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
6091	}
6092
6093	/* mark destination R0 register as readable, since it contains
6094	 * the value fetched from the packet.
6095	 * Already marked as written above.
6096	 */
6097	mark_reg_unknown(env, regs, BPF_REG_0);
6098	/* ld_abs load up to 32-bit skb data. */
6099	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
6100	return 0;
6101}
6102
6103static int check_return_code(struct bpf_verifier_env *env)
6104{
6105	struct tnum enforce_attach_type_range = tnum_unknown;
6106	struct bpf_reg_state *reg;
6107	struct tnum range = tnum_range(0, 1);
6108
6109	switch (env->prog->type) {
6110	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
6111		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
6112		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
6113			range = tnum_range(1, 1);
6114		break;
6115	case BPF_PROG_TYPE_CGROUP_SKB:
6116		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
6117			range = tnum_range(0, 3);
6118			enforce_attach_type_range = tnum_range(2, 3);
6119		}
6120		break;
6121	case BPF_PROG_TYPE_CGROUP_SOCK:
6122	case BPF_PROG_TYPE_SOCK_OPS:
6123	case BPF_PROG_TYPE_CGROUP_DEVICE:
6124	case BPF_PROG_TYPE_CGROUP_SYSCTL:
6125	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
6126		break;
6127	default:
6128		return 0;
6129	}
6130
6131	reg = cur_regs(env) + BPF_REG_0;
6132	if (reg->type != SCALAR_VALUE) {
6133		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
6134			reg_type_str[reg->type]);
6135		return -EINVAL;
6136	}
6137
6138	if (!tnum_in(range, reg->var_off)) {
6139		char tn_buf[48];
6140
6141		verbose(env, "At program exit the register R0 ");
6142		if (!tnum_is_unknown(reg->var_off)) {
6143			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6144			verbose(env, "has value %s", tn_buf);
6145		} else {
6146			verbose(env, "has unknown scalar value");
6147		}
6148		tnum_strn(tn_buf, sizeof(tn_buf), range);
6149		verbose(env, " should have been in %s\n", tn_buf);
6150		return -EINVAL;
6151	}
6152
6153	if (!tnum_is_unknown(enforce_attach_type_range) &&
6154	    tnum_in(enforce_attach_type_range, reg->var_off))
6155		env->prog->enforce_expected_attach_type = 1;
6156	return 0;
6157}
6158
6159/* non-recursive DFS pseudo code
6160 * 1  procedure DFS-iterative(G,v):
6161 * 2      label v as discovered
6162 * 3      let S be a stack
6163 * 4      S.push(v)
6164 * 5      while S is not empty
6165 * 6            t <- S.pop()
6166 * 7            if t is what we're looking for:
6167 * 8                return t
6168 * 9            for all edges e in G.adjacentEdges(t) do
6169 * 10               if edge e is already labelled
6170 * 11                   continue with the next edge
6171 * 12               w <- G.adjacentVertex(t,e)
6172 * 13               if vertex w is not discovered and not explored
6173 * 14                   label e as tree-edge
6174 * 15                   label w as discovered
6175 * 16                   S.push(w)
6176 * 17                   continue at 5
6177 * 18               else if vertex w is discovered
6178 * 19                   label e as back-edge
6179 * 20               else
6180 * 21                   // vertex w is explored
6181 * 22                   label e as forward- or cross-edge
6182 * 23           label t as explored
6183 * 24           S.pop()
6184 *
6185 * convention:
6186 * 0x10 - discovered
6187 * 0x11 - discovered and fall-through edge labelled
6188 * 0x12 - discovered and fall-through and branch edges labelled
6189 * 0x20 - explored
6190 */
6191
6192enum {
6193	DISCOVERED = 0x10,
6194	EXPLORED = 0x20,
6195	FALLTHROUGH = 1,
6196	BRANCH = 2,
6197};
6198
6199static u32 state_htab_size(struct bpf_verifier_env *env)
6200{
6201	return env->prog->len;
6202}
6203
6204static struct bpf_verifier_state_list **explored_state(
6205					struct bpf_verifier_env *env,
6206					int idx)
6207{
6208	struct bpf_verifier_state *cur = env->cur_state;
6209	struct bpf_func_state *state = cur->frame[cur->curframe];
6210
6211	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
6212}
6213
6214static void init_explored_state(struct bpf_verifier_env *env, int idx)
6215{
6216	env->insn_aux_data[idx].prune_point = true;
6217}
6218
6219/* t, w, e - match pseudo-code above:
6220 * t - index of current instruction
6221 * w - next instruction
6222 * e - edge
6223 */
6224static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
6225		     bool loop_ok)
6226{
6227	int *insn_stack = env->cfg.insn_stack;
6228	int *insn_state = env->cfg.insn_state;
6229
6230	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
6231		return 0;
6232
6233	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
6234		return 0;
6235
6236	if (w < 0 || w >= env->prog->len) {
6237		verbose_linfo(env, t, "%d: ", t);
6238		verbose(env, "jump out of range from insn %d to %d\n", t, w);
6239		return -EINVAL;
6240	}
6241
6242	if (e == BRANCH)
6243		/* mark branch target for state pruning */
6244		init_explored_state(env, w);
6245
6246	if (insn_state[w] == 0) {
6247		/* tree-edge */
6248		insn_state[t] = DISCOVERED | e;
6249		insn_state[w] = DISCOVERED;
6250		if (env->cfg.cur_stack >= env->prog->len)
6251			return -E2BIG;
6252		insn_stack[env->cfg.cur_stack++] = w;
6253		return 1;
6254	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
6255		if (loop_ok && env->allow_ptr_leaks)
6256			return 0;
6257		verbose_linfo(env, t, "%d: ", t);
6258		verbose_linfo(env, w, "%d: ", w);
6259		verbose(env, "back-edge from insn %d to %d\n", t, w);
6260		return -EINVAL;
6261	} else if (insn_state[w] == EXPLORED) {
6262		/* forward- or cross-edge */
6263		insn_state[t] = DISCOVERED | e;
6264	} else {
6265		verbose(env, "insn state internal bug\n");
6266		return -EFAULT;
6267	}
6268	return 0;
6269}
6270
6271/* non-recursive depth-first-search to detect loops in BPF program
6272 * loop == back-edge in directed graph
6273 */
6274static int check_cfg(struct bpf_verifier_env *env)
6275{
6276	struct bpf_insn *insns = env->prog->insnsi;
6277	int insn_cnt = env->prog->len;
6278	int *insn_stack, *insn_state;
6279	int ret = 0;
6280	int i, t;
6281
6282	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
6283	if (!insn_state)
6284		return -ENOMEM;
6285
6286	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
6287	if (!insn_stack) {
6288		kvfree(insn_state);
6289		return -ENOMEM;
6290	}
6291
6292	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
6293	insn_stack[0] = 0; /* 0 is the first instruction */
6294	env->cfg.cur_stack = 1;
6295
6296peek_stack:
6297	if (env->cfg.cur_stack == 0)
6298		goto check_state;
6299	t = insn_stack[env->cfg.cur_stack - 1];
6300
6301	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
6302	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
6303		u8 opcode = BPF_OP(insns[t].code);
6304
6305		if (opcode == BPF_EXIT) {
6306			goto mark_explored;
6307		} else if (opcode == BPF_CALL) {
6308			ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
6309			if (ret == 1)
6310				goto peek_stack;
6311			else if (ret < 0)
6312				goto err_free;
6313			if (t + 1 < insn_cnt)
6314				init_explored_state(env, t + 1);
6315			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
6316				init_explored_state(env, t);
6317				ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
6318						env, false);
6319				if (ret == 1)
6320					goto peek_stack;
6321				else if (ret < 0)
6322					goto err_free;
6323			}
6324		} else if (opcode == BPF_JA) {
6325			if (BPF_SRC(insns[t].code) != BPF_K) {
6326				ret = -EINVAL;
6327				goto err_free;
6328			}
6329			/* unconditional jump with single edge */
6330			ret = push_insn(t, t + insns[t].off + 1,
6331					FALLTHROUGH, env, true);
6332			if (ret == 1)
6333				goto peek_stack;
6334			else if (ret < 0)
6335				goto err_free;
6336			/* unconditional jmp is not a good pruning point,
6337			 * but it's marked, since backtracking needs
6338			 * to record jmp history in is_state_visited().
6339			 */
6340			init_explored_state(env, t + insns[t].off + 1);
6341			/* tell verifier to check for equivalent states
6342			 * after every call and jump
6343			 */
6344			if (t + 1 < insn_cnt)
6345				init_explored_state(env, t + 1);
6346		} else {
6347			/* conditional jump with two edges */
6348			init_explored_state(env, t);
6349			ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
6350			if (ret == 1)
6351				goto peek_stack;
6352			else if (ret < 0)
6353				goto err_free;
6354
6355			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
6356			if (ret == 1)
6357				goto peek_stack;
6358			else if (ret < 0)
6359				goto err_free;
6360		}
6361	} else {
6362		/* all other non-branch instructions with single
6363		 * fall-through edge
6364		 */
6365		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
6366		if (ret == 1)
6367			goto peek_stack;
6368		else if (ret < 0)
6369			goto err_free;
6370	}
6371
6372mark_explored:
6373	insn_state[t] = EXPLORED;
6374	if (env->cfg.cur_stack-- <= 0) {
6375		verbose(env, "pop stack internal bug\n");
6376		ret = -EFAULT;
6377		goto err_free;
6378	}
6379	goto peek_stack;
6380
6381check_state:
6382	for (i = 0; i < insn_cnt; i++) {
6383		if (insn_state[i] != EXPLORED) {
6384			verbose(env, "unreachable insn %d\n", i);
6385			ret = -EINVAL;
6386			goto err_free;
6387		}
6388	}
6389	ret = 0; /* cfg looks good */
6390
6391err_free:
6392	kvfree(insn_state);
6393	kvfree(insn_stack);
6394	env->cfg.insn_state = env->cfg.insn_stack = NULL;
6395	return ret;
6396}
6397
6398/* The minimum supported BTF func info size */
6399#define MIN_BPF_FUNCINFO_SIZE	8
6400#define MAX_FUNCINFO_REC_SIZE	252
6401
6402static int check_btf_func(struct bpf_verifier_env *env,
6403			  const union bpf_attr *attr,
6404			  union bpf_attr __user *uattr)
6405{
6406	u32 i, nfuncs, urec_size, min_size;
6407	u32 krec_size = sizeof(struct bpf_func_info);
6408	struct bpf_func_info *krecord;
6409	const struct btf_type *type;
6410	struct bpf_prog *prog;
6411	const struct btf *btf;
6412	void __user *urecord;
6413	u32 prev_offset = 0;
6414	int ret = 0;
6415
6416	nfuncs = attr->func_info_cnt;
6417	if (!nfuncs)
6418		return 0;
6419
6420	if (nfuncs != env->subprog_cnt) {
6421		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
6422		return -EINVAL;
6423	}
6424
6425	urec_size = attr->func_info_rec_size;
6426	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
6427	    urec_size > MAX_FUNCINFO_REC_SIZE ||
6428	    urec_size % sizeof(u32)) {
6429		verbose(env, "invalid func info rec size %u\n", urec_size);
6430		return -EINVAL;
6431	}
6432
6433	prog = env->prog;
6434	btf = prog->aux->btf;
6435
6436	urecord = u64_to_user_ptr(attr->func_info);
6437	min_size = min_t(u32, krec_size, urec_size);
6438
6439	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
6440	if (!krecord)
6441		return -ENOMEM;
6442
6443	for (i = 0; i < nfuncs; i++) {
6444		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
6445		if (ret) {
6446			if (ret == -E2BIG) {
6447				verbose(env, "nonzero tailing record in func info");
6448				/* set the size kernel expects so loader can zero
6449				 * out the rest of the record.
6450				 */
6451				if (put_user(min_size, &uattr->func_info_rec_size))
6452					ret = -EFAULT;
6453			}
6454			goto err_free;
6455		}
6456
6457		if (copy_from_user(&krecord[i], urecord, min_size)) {
6458			ret = -EFAULT;
6459			goto err_free;
6460		}
6461
6462		/* check insn_off */
6463		if (i == 0) {
6464			if (krecord[i].insn_off) {
6465				verbose(env,
6466					"nonzero insn_off %u for the first func info record",
6467					krecord[i].insn_off);
6468				ret = -EINVAL;
6469				goto err_free;
6470			}
6471		} else if (krecord[i].insn_off <= prev_offset) {
6472			verbose(env,
6473				"same or smaller insn offset (%u) than previous func info record (%u)",
6474				krecord[i].insn_off, prev_offset);
6475			ret = -EINVAL;
6476			goto err_free;
6477		}
6478
6479		if (env->subprog_info[i].start != krecord[i].insn_off) {
6480			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
6481			ret = -EINVAL;
6482			goto err_free;
6483		}
6484
6485		/* check type_id */
6486		type = btf_type_by_id(btf, krecord[i].type_id);
6487		if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC) {
6488			verbose(env, "invalid type id %d in func info",
6489				krecord[i].type_id);
6490			ret = -EINVAL;
6491			goto err_free;
6492		}
6493
6494		prev_offset = krecord[i].insn_off;
6495		urecord += urec_size;
6496	}
6497
6498	prog->aux->func_info = krecord;
6499	prog->aux->func_info_cnt = nfuncs;
6500	return 0;
6501
6502err_free:
6503	kvfree(krecord);
6504	return ret;
6505}
6506
6507static void adjust_btf_func(struct bpf_verifier_env *env)
6508{
6509	int i;
6510
6511	if (!env->prog->aux->func_info)
6512		return;
6513
6514	for (i = 0; i < env->subprog_cnt; i++)
6515		env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start;
6516}
6517
6518#define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
6519		sizeof(((struct bpf_line_info *)(0))->line_col))
6520#define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
6521
6522static int check_btf_line(struct bpf_verifier_env *env,
6523			  const union bpf_attr *attr,
6524			  union bpf_attr __user *uattr)
6525{
6526	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
6527	struct bpf_subprog_info *sub;
6528	struct bpf_line_info *linfo;
6529	struct bpf_prog *prog;
6530	const struct btf *btf;
6531	void __user *ulinfo;
6532	int err;
6533
6534	nr_linfo = attr->line_info_cnt;
6535	if (!nr_linfo)
6536		return 0;
6537
6538	rec_size = attr->line_info_rec_size;
6539	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
6540	    rec_size > MAX_LINEINFO_REC_SIZE ||
6541	    rec_size & (sizeof(u32) - 1))
6542		return -EINVAL;
6543
6544	/* Need to zero it in case the userspace may
6545	 * pass in a smaller bpf_line_info object.
6546	 */
6547	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
6548			 GFP_KERNEL | __GFP_NOWARN);
6549	if (!linfo)
6550		return -ENOMEM;
6551
6552	prog = env->prog;
6553	btf = prog->aux->btf;
6554
6555	s = 0;
6556	sub = env->subprog_info;
6557	ulinfo = u64_to_user_ptr(attr->line_info);
6558	expected_size = sizeof(struct bpf_line_info);
6559	ncopy = min_t(u32, expected_size, rec_size);
6560	for (i = 0; i < nr_linfo; i++) {
6561		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
6562		if (err) {
6563			if (err == -E2BIG) {
6564				verbose(env, "nonzero tailing record in line_info");
6565				if (put_user(expected_size,
6566					     &uattr->line_info_rec_size))
6567					err = -EFAULT;
6568			}
6569			goto err_free;
6570		}
6571
6572		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
6573			err = -EFAULT;
6574			goto err_free;
6575		}
6576
6577		/*
6578		 * Check insn_off to ensure
6579		 * 1) strictly increasing AND
6580		 * 2) bounded by prog->len
6581		 *
6582		 * The linfo[0].insn_off == 0 check logically falls into
6583		 * the later "missing bpf_line_info for func..." case
6584		 * because the first linfo[0].insn_off must be the
6585		 * first sub also and the first sub must have
6586		 * subprog_info[0].start == 0.
6587		 */
6588		if ((i && linfo[i].insn_off <= prev_offset) ||
6589		    linfo[i].insn_off >= prog->len) {
6590			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
6591				i, linfo[i].insn_off, prev_offset,
6592				prog->len);
6593			err = -EINVAL;
6594			goto err_free;
6595		}
6596
6597		if (!prog->insnsi[linfo[i].insn_off].code) {
6598			verbose(env,
6599				"Invalid insn code at line_info[%u].insn_off\n",
6600				i);
6601			err = -EINVAL;
6602			goto err_free;
6603		}
6604
6605		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
6606		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
6607			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
6608			err = -EINVAL;
6609			goto err_free;
6610		}
6611
6612		if (s != env->subprog_cnt) {
6613			if (linfo[i].insn_off == sub[s].start) {
6614				sub[s].linfo_idx = i;
6615				s++;
6616			} else if (sub[s].start < linfo[i].insn_off) {
6617				verbose(env, "missing bpf_line_info for func#%u\n", s);
6618				err = -EINVAL;
6619				goto err_free;
6620			}
6621		}
6622
6623		prev_offset = linfo[i].insn_off;
6624		ulinfo += rec_size;
6625	}
6626
6627	if (s != env->subprog_cnt) {
6628		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
6629			env->subprog_cnt - s, s);
6630		err = -EINVAL;
6631		goto err_free;
6632	}
6633
6634	prog->aux->linfo = linfo;
6635	prog->aux->nr_linfo = nr_linfo;
6636
6637	return 0;
6638
6639err_free:
6640	kvfree(linfo);
6641	return err;
6642}
6643
6644static int check_btf_info(struct bpf_verifier_env *env,
6645			  const union bpf_attr *attr,
6646			  union bpf_attr __user *uattr)
6647{
6648	struct btf *btf;
6649	int err;
6650
6651	if (!attr->func_info_cnt && !attr->line_info_cnt)
6652		return 0;
6653
6654	btf = btf_get_by_fd(attr->prog_btf_fd);
6655	if (IS_ERR(btf))
6656		return PTR_ERR(btf);
6657	env->prog->aux->btf = btf;
6658
6659	err = check_btf_func(env, attr, uattr);
6660	if (err)
6661		return err;
6662
6663	err = check_btf_line(env, attr, uattr);
6664	if (err)
6665		return err;
6666
6667	return 0;
6668}
6669
6670/* check %cur's range satisfies %old's */
6671static bool range_within(struct bpf_reg_state *old,
6672			 struct bpf_reg_state *cur)
6673{
6674	return old->umin_value <= cur->umin_value &&
6675	       old->umax_value >= cur->umax_value &&
6676	       old->smin_value <= cur->smin_value &&
6677	       old->smax_value >= cur->smax_value;
6678}
6679
6680/* Maximum number of register states that can exist at once */
6681#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
6682struct idpair {
6683	u32 old;
6684	u32 cur;
6685};
6686
6687/* If in the old state two registers had the same id, then they need to have
6688 * the same id in the new state as well.  But that id could be different from
6689 * the old state, so we need to track the mapping from old to new ids.
6690 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
6691 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
6692 * regs with a different old id could still have new id 9, we don't care about
6693 * that.
6694 * So we look through our idmap to see if this old id has been seen before.  If
6695 * so, we require the new id to match; otherwise, we add the id pair to the map.
6696 */
6697static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
6698{
6699	unsigned int i;
6700
6701	for (i = 0; i < ID_MAP_SIZE; i++) {
6702		if (!idmap[i].old) {
6703			/* Reached an empty slot; haven't seen this id before */
6704			idmap[i].old = old_id;
6705			idmap[i].cur = cur_id;
6706			return true;
6707		}
6708		if (idmap[i].old == old_id)
6709			return idmap[i].cur == cur_id;
6710	}
6711	/* We ran out of idmap slots, which should be impossible */
6712	WARN_ON_ONCE(1);
6713	return false;
6714}
6715
6716static void clean_func_state(struct bpf_verifier_env *env,
6717			     struct bpf_func_state *st)
6718{
6719	enum bpf_reg_liveness live;
6720	int i, j;
6721
6722	for (i = 0; i < BPF_REG_FP; i++) {
6723		live = st->regs[i].live;
6724		/* liveness must not touch this register anymore */
6725		st->regs[i].live |= REG_LIVE_DONE;
6726		if (!(live & REG_LIVE_READ))
6727			/* since the register is unused, clear its state
6728			 * to make further comparison simpler
6729			 */
6730			__mark_reg_not_init(&st->regs[i]);
6731	}
6732
6733	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
6734		live = st->stack[i].spilled_ptr.live;
6735		/* liveness must not touch this stack slot anymore */
6736		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
6737		if (!(live & REG_LIVE_READ)) {
6738			__mark_reg_not_init(&st->stack[i].spilled_ptr);
6739			for (j = 0; j < BPF_REG_SIZE; j++)
6740				st->stack[i].slot_type[j] = STACK_INVALID;
6741		}
6742	}
6743}
6744
6745static void clean_verifier_state(struct bpf_verifier_env *env,
6746				 struct bpf_verifier_state *st)
6747{
6748	int i;
6749
6750	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
6751		/* all regs in this state in all frames were already marked */
6752		return;
6753
6754	for (i = 0; i <= st->curframe; i++)
6755		clean_func_state(env, st->frame[i]);
6756}
6757
6758/* the parentage chains form a tree.
6759 * the verifier states are added to state lists at given insn and
6760 * pushed into state stack for future exploration.
6761 * when the verifier reaches bpf_exit insn some of the verifer states
6762 * stored in the state lists have their final liveness state already,
6763 * but a lot of states will get revised from liveness point of view when
6764 * the verifier explores other branches.
6765 * Example:
6766 * 1: r0 = 1
6767 * 2: if r1 == 100 goto pc+1
6768 * 3: r0 = 2
6769 * 4: exit
6770 * when the verifier reaches exit insn the register r0 in the state list of
6771 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
6772 * of insn 2 and goes exploring further. At the insn 4 it will walk the
6773 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
6774 *
6775 * Since the verifier pushes the branch states as it sees them while exploring
6776 * the program the condition of walking the branch instruction for the second
6777 * time means that all states below this branch were already explored and
6778 * their final liveness markes are already propagated.
6779 * Hence when the verifier completes the search of state list in is_state_visited()
6780 * we can call this clean_live_states() function to mark all liveness states
6781 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
6782 * will not be used.
6783 * This function also clears the registers and stack for states that !READ
6784 * to simplify state merging.
6785 *
6786 * Important note here that walking the same branch instruction in the callee
6787 * doesn't meant that the states are DONE. The verifier has to compare
6788 * the callsites
6789 */
6790static void clean_live_states(struct bpf_verifier_env *env, int insn,
6791			      struct bpf_verifier_state *cur)
6792{
6793	struct bpf_verifier_state_list *sl;
6794	int i;
6795
6796	sl = *explored_state(env, insn);
6797	while (sl) {
6798		if (sl->state.branches)
6799			goto next;
6800		if (sl->state.insn_idx != insn ||
6801		    sl->state.curframe != cur->curframe)
6802			goto next;
6803		for (i = 0; i <= cur->curframe; i++)
6804			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
6805				goto next;
6806		clean_verifier_state(env, &sl->state);
6807next:
6808		sl = sl->next;
6809	}
6810}
6811
6812/* Returns true if (rold safe implies rcur safe) */
6813static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
6814		    struct idpair *idmap)
6815{
6816	bool equal;
6817
6818	if (!(rold->live & REG_LIVE_READ))
6819		/* explored state didn't use this */
6820		return true;
6821
6822	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
6823
6824	if (rold->type == PTR_TO_STACK)
6825		/* two stack pointers are equal only if they're pointing to
6826		 * the same stack frame, since fp-8 in foo != fp-8 in bar
6827		 */
6828		return equal && rold->frameno == rcur->frameno;
6829
6830	if (equal)
6831		return true;
6832
6833	if (rold->type == NOT_INIT)
6834		/* explored state can't have used this */
6835		return true;
6836	if (rcur->type == NOT_INIT)
6837		return false;
6838	switch (rold->type) {
6839	case SCALAR_VALUE:
6840		if (rcur->type == SCALAR_VALUE) {
6841			if (!rold->precise && !rcur->precise)
6842				return true;
6843			/* new val must satisfy old val knowledge */
6844			return range_within(rold, rcur) &&
6845			       tnum_in(rold->var_off, rcur->var_off);
6846		} else {
6847			/* We're trying to use a pointer in place of a scalar.
6848			 * Even if the scalar was unbounded, this could lead to
6849			 * pointer leaks because scalars are allowed to leak
6850			 * while pointers are not. We could make this safe in
6851			 * special cases if root is calling us, but it's
6852			 * probably not worth the hassle.
6853			 */
6854			return false;
6855		}
6856	case PTR_TO_MAP_VALUE:
6857		/* If the new min/max/var_off satisfy the old ones and
6858		 * everything else matches, we are OK.
6859		 * 'id' is not compared, since it's only used for maps with
6860		 * bpf_spin_lock inside map element and in such cases if
6861		 * the rest of the prog is valid for one map element then
6862		 * it's valid for all map elements regardless of the key
6863		 * used in bpf_map_lookup()
6864		 */
6865		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
6866		       range_within(rold, rcur) &&
6867		       tnum_in(rold->var_off, rcur->var_off);
6868	case PTR_TO_MAP_VALUE_OR_NULL:
6869		/* a PTR_TO_MAP_VALUE could be safe to use as a
6870		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
6871		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
6872		 * checked, doing so could have affected others with the same
6873		 * id, and we can't check for that because we lost the id when
6874		 * we converted to a PTR_TO_MAP_VALUE.
6875		 */
6876		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
6877			return false;
6878		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
6879			return false;
6880		/* Check our ids match any regs they're supposed to */
6881		return check_ids(rold->id, rcur->id, idmap);
6882	case PTR_TO_PACKET_META:
6883	case PTR_TO_PACKET:
6884		if (rcur->type != rold->type)
6885			return false;
6886		/* We must have at least as much range as the old ptr
6887		 * did, so that any accesses which were safe before are
6888		 * still safe.  This is true even if old range < old off,
6889		 * since someone could have accessed through (ptr - k), or
6890		 * even done ptr -= k in a register, to get a safe access.
6891		 */
6892		if (rold->range > rcur->range)
6893			return false;
6894		/* If the offsets don't match, we can't trust our alignment;
6895		 * nor can we be sure that we won't fall out of range.
6896		 */
6897		if (rold->off != rcur->off)
6898			return false;
6899		/* id relations must be preserved */
6900		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
6901			return false;
6902		/* new val must satisfy old val knowledge */
6903		return range_within(rold, rcur) &&
6904		       tnum_in(rold->var_off, rcur->var_off);
6905	case PTR_TO_CTX:
6906	case CONST_PTR_TO_MAP:
6907	case PTR_TO_PACKET_END:
6908	case PTR_TO_FLOW_KEYS:
6909	case PTR_TO_SOCKET:
6910	case PTR_TO_SOCKET_OR_NULL:
6911	case PTR_TO_SOCK_COMMON:
6912	case PTR_TO_SOCK_COMMON_OR_NULL:
6913	case PTR_TO_TCP_SOCK:
6914	case PTR_TO_TCP_SOCK_OR_NULL:
6915	case PTR_TO_XDP_SOCK:
6916		/* Only valid matches are exact, which memcmp() above
6917		 * would have accepted
6918		 */
6919	default:
6920		/* Don't know what's going on, just say it's not safe */
6921		return false;
6922	}
6923
6924	/* Shouldn't get here; if we do, say it's not safe */
6925	WARN_ON_ONCE(1);
6926	return false;
6927}
6928
6929static bool stacksafe(struct bpf_func_state *old,
6930		      struct bpf_func_state *cur,
6931		      struct idpair *idmap)
6932{
6933	int i, spi;
6934
6935	/* walk slots of the explored stack and ignore any additional
6936	 * slots in the current stack, since explored(safe) state
6937	 * didn't use them
6938	 */
6939	for (i = 0; i < old->allocated_stack; i++) {
6940		spi = i / BPF_REG_SIZE;
6941
6942		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
6943			i += BPF_REG_SIZE - 1;
6944			/* explored state didn't use this */
6945			continue;
6946		}
6947
6948		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
6949			continue;
6950
6951		/* explored stack has more populated slots than current stack
6952		 * and these slots were used
6953		 */
6954		if (i >= cur->allocated_stack)
6955			return false;
6956
6957		/* if old state was safe with misc data in the stack
6958		 * it will be safe with zero-initialized stack.
6959		 * The opposite is not true
6960		 */
6961		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
6962		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
6963			continue;
6964		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
6965		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
6966			/* Ex: old explored (safe) state has STACK_SPILL in
6967			 * this stack slot, but current has has STACK_MISC ->
6968			 * this verifier states are not equivalent,
6969			 * return false to continue verification of this path
6970			 */
6971			return false;
6972		if (i % BPF_REG_SIZE)
6973			continue;
6974		if (old->stack[spi].slot_type[0] != STACK_SPILL)
6975			continue;
6976		if (!regsafe(&old->stack[spi].spilled_ptr,
6977			     &cur->stack[spi].spilled_ptr,
6978			     idmap))
6979			/* when explored and current stack slot are both storing
6980			 * spilled registers, check that stored pointers types
6981			 * are the same as well.
6982			 * Ex: explored safe path could have stored
6983			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
6984			 * but current path has stored:
6985			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
6986			 * such verifier states are not equivalent.
6987			 * return false to continue verification of this path
6988			 */
6989			return false;
6990	}
6991	return true;
6992}
6993
6994static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
6995{
6996	if (old->acquired_refs != cur->acquired_refs)
6997		return false;
6998	return !memcmp(old->refs, cur->refs,
6999		       sizeof(*old->refs) * old->acquired_refs);
7000}
7001
7002/* compare two verifier states
7003 *
7004 * all states stored in state_list are known to be valid, since
7005 * verifier reached 'bpf_exit' instruction through them
7006 *
7007 * this function is called when verifier exploring different branches of
7008 * execution popped from the state stack. If it sees an old state that has
7009 * more strict register state and more strict stack state then this execution
7010 * branch doesn't need to be explored further, since verifier already
7011 * concluded that more strict state leads to valid finish.
7012 *
7013 * Therefore two states are equivalent if register state is more conservative
7014 * and explored stack state is more conservative than the current one.
7015 * Example:
7016 *       explored                   current
7017 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
7018 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
7019 *
7020 * In other words if current stack state (one being explored) has more
7021 * valid slots than old one that already passed validation, it means
7022 * the verifier can stop exploring and conclude that current state is valid too
7023 *
7024 * Similarly with registers. If explored state has register type as invalid
7025 * whereas register type in current state is meaningful, it means that
7026 * the current state will reach 'bpf_exit' instruction safely
7027 */
7028static bool func_states_equal(struct bpf_func_state *old,
7029			      struct bpf_func_state *cur)
7030{
7031	struct idpair *idmap;
7032	bool ret = false;
7033	int i;
7034
7035	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
7036	/* If we failed to allocate the idmap, just say it's not safe */
7037	if (!idmap)
7038		return false;
7039
7040	for (i = 0; i < MAX_BPF_REG; i++) {
7041		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
7042			goto out_free;
7043	}
7044
7045	if (!stacksafe(old, cur, idmap))
7046		goto out_free;
7047
7048	if (!refsafe(old, cur))
7049		goto out_free;
7050	ret = true;
7051out_free:
7052	kfree(idmap);
7053	return ret;
7054}
7055
7056static bool states_equal(struct bpf_verifier_env *env,
7057			 struct bpf_verifier_state *old,
7058			 struct bpf_verifier_state *cur)
7059{
7060	int i;
7061
7062	if (old->curframe != cur->curframe)
7063		return false;
7064
7065	/* Verification state from speculative execution simulation
7066	 * must never prune a non-speculative execution one.
7067	 */
7068	if (old->speculative && !cur->speculative)
7069		return false;
7070
7071	if (old->active_spin_lock != cur->active_spin_lock)
7072		return false;
7073
7074	/* for states to be equal callsites have to be the same
7075	 * and all frame states need to be equivalent
7076	 */
7077	for (i = 0; i <= old->curframe; i++) {
7078		if (old->frame[i]->callsite != cur->frame[i]->callsite)
7079			return false;
7080		if (!func_states_equal(old->frame[i], cur->frame[i]))
7081			return false;
7082	}
7083	return true;
7084}
7085
7086/* Return 0 if no propagation happened. Return negative error code if error
7087 * happened. Otherwise, return the propagated bit.
7088 */
7089static int propagate_liveness_reg(struct bpf_verifier_env *env,
7090				  struct bpf_reg_state *reg,
7091				  struct bpf_reg_state *parent_reg)
7092{
7093	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
7094	u8 flag = reg->live & REG_LIVE_READ;
7095	int err;
7096
7097	/* When comes here, read flags of PARENT_REG or REG could be any of
7098	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
7099	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
7100	 */
7101	if (parent_flag == REG_LIVE_READ64 ||
7102	    /* Or if there is no read flag from REG. */
7103	    !flag ||
7104	    /* Or if the read flag from REG is the same as PARENT_REG. */
7105	    parent_flag == flag)
7106		return 0;
7107
7108	err = mark_reg_read(env, reg, parent_reg, flag);
7109	if (err)
7110		return err;
7111
7112	return flag;
7113}
7114
7115/* A write screens off any subsequent reads; but write marks come from the
7116 * straight-line code between a state and its parent.  When we arrive at an
7117 * equivalent state (jump target or such) we didn't arrive by the straight-line
7118 * code, so read marks in the state must propagate to the parent regardless
7119 * of the state's write marks. That's what 'parent == state->parent' comparison
7120 * in mark_reg_read() is for.
7121 */
7122static int propagate_liveness(struct bpf_verifier_env *env,
7123			      const struct bpf_verifier_state *vstate,
7124			      struct bpf_verifier_state *vparent)
7125{
7126	struct bpf_reg_state *state_reg, *parent_reg;
7127	struct bpf_func_state *state, *parent;
7128	int i, frame, err = 0;
7129
7130	if (vparent->curframe != vstate->curframe) {
7131		WARN(1, "propagate_live: parent frame %d current frame %d\n",
7132		     vparent->curframe, vstate->curframe);
7133		return -EFAULT;
7134	}
7135	/* Propagate read liveness of registers... */
7136	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
7137	for (frame = 0; frame <= vstate->curframe; frame++) {
7138		parent = vparent->frame[frame];
7139		state = vstate->frame[frame];
7140		parent_reg = parent->regs;
7141		state_reg = state->regs;
7142		/* We don't need to worry about FP liveness, it's read-only */
7143		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
7144			err = propagate_liveness_reg(env, &state_reg[i],
7145						     &parent_reg[i]);
7146			if (err < 0)
7147				return err;
7148			if (err == REG_LIVE_READ64)
7149				mark_insn_zext(env, &parent_reg[i]);
7150		}
7151
7152		/* Propagate stack slots. */
7153		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
7154			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
7155			parent_reg = &parent->stack[i].spilled_ptr;
7156			state_reg = &state->stack[i].spilled_ptr;
7157			err = propagate_liveness_reg(env, state_reg,
7158						     parent_reg);
7159			if (err < 0)
7160				return err;
7161		}
7162	}
7163	return 0;
7164}
7165
7166/* find precise scalars in the previous equivalent state and
7167 * propagate them into the current state
7168 */
7169static int propagate_precision(struct bpf_verifier_env *env,
7170			       const struct bpf_verifier_state *old)
7171{
7172	struct bpf_reg_state *state_reg;
7173	struct bpf_func_state *state;
7174	int i, err = 0;
7175
7176	state = old->frame[old->curframe];
7177	state_reg = state->regs;
7178	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
7179		if (state_reg->type != SCALAR_VALUE ||
7180		    !state_reg->precise)
7181			continue;
7182		if (env->log.level & BPF_LOG_LEVEL2)
7183			verbose(env, "propagating r%d\n", i);
7184		err = mark_chain_precision(env, i);
7185		if (err < 0)
7186			return err;
7187	}
7188
7189	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
7190		if (state->stack[i].slot_type[0] != STACK_SPILL)
7191			continue;
7192		state_reg = &state->stack[i].spilled_ptr;
7193		if (state_reg->type != SCALAR_VALUE ||
7194		    !state_reg->precise)
7195			continue;
7196		if (env->log.level & BPF_LOG_LEVEL2)
7197			verbose(env, "propagating fp%d\n",
7198				(-i - 1) * BPF_REG_SIZE);
7199		err = mark_chain_precision_stack(env, i);
7200		if (err < 0)
7201			return err;
7202	}
7203	return 0;
7204}
7205
7206static bool states_maybe_looping(struct bpf_verifier_state *old,
7207				 struct bpf_verifier_state *cur)
7208{
7209	struct bpf_func_state *fold, *fcur;
7210	int i, fr = cur->curframe;
7211
7212	if (old->curframe != fr)
7213		return false;
7214
7215	fold = old->frame[fr];
7216	fcur = cur->frame[fr];
7217	for (i = 0; i < MAX_BPF_REG; i++)
7218		if (memcmp(&fold->regs[i], &fcur->regs[i],
7219			   offsetof(struct bpf_reg_state, parent)))
7220			return false;
7221	return true;
7222}
7223
7224
7225static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
7226{
7227	struct bpf_verifier_state_list *new_sl;
7228	struct bpf_verifier_state_list *sl, **pprev;
7229	struct bpf_verifier_state *cur = env->cur_state, *new;
7230	int i, j, err, states_cnt = 0;
7231	bool add_new_state = env->test_state_freq ? true : false;
7232
7233	cur->last_insn_idx = env->prev_insn_idx;
7234	if (!env->insn_aux_data[insn_idx].prune_point)
7235		/* this 'insn_idx' instruction wasn't marked, so we will not
7236		 * be doing state search here
7237		 */
7238		return 0;
7239
7240	/* bpf progs typically have pruning point every 4 instructions
7241	 * http://vger.kernel.org/bpfconf2019.html#session-1
7242	 * Do not add new state for future pruning if the verifier hasn't seen
7243	 * at least 2 jumps and at least 8 instructions.
7244	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
7245	 * In tests that amounts to up to 50% reduction into total verifier
7246	 * memory consumption and 20% verifier time speedup.
7247	 */
7248	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
7249	    env->insn_processed - env->prev_insn_processed >= 8)
7250		add_new_state = true;
7251
7252	pprev = explored_state(env, insn_idx);
7253	sl = *pprev;
7254
7255	clean_live_states(env, insn_idx, cur);
7256
7257	while (sl) {
7258		states_cnt++;
7259		if (sl->state.insn_idx != insn_idx)
7260			goto next;
7261		if (sl->state.branches) {
7262			if (states_maybe_looping(&sl->state, cur) &&
7263			    states_equal(env, &sl->state, cur)) {
7264				verbose_linfo(env, insn_idx, "; ");
7265				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
7266				return -EINVAL;
7267			}
7268			/* if the verifier is processing a loop, avoid adding new state
7269			 * too often, since different loop iterations have distinct
7270			 * states and may not help future pruning.
7271			 * This threshold shouldn't be too low to make sure that
7272			 * a loop with large bound will be rejected quickly.
7273			 * The most abusive loop will be:
7274			 * r1 += 1
7275			 * if r1 < 1000000 goto pc-2
7276			 * 1M insn_procssed limit / 100 == 10k peak states.
7277			 * This threshold shouldn't be too high either, since states
7278			 * at the end of the loop are likely to be useful in pruning.
7279			 */
7280			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
7281			    env->insn_processed - env->prev_insn_processed < 100)
7282				add_new_state = false;
7283			goto miss;
7284		}
7285		if (states_equal(env, &sl->state, cur)) {
7286			sl->hit_cnt++;
7287			/* reached equivalent register/stack state,
7288			 * prune the search.
7289			 * Registers read by the continuation are read by us.
7290			 * If we have any write marks in env->cur_state, they
7291			 * will prevent corresponding reads in the continuation
7292			 * from reaching our parent (an explored_state).  Our
7293			 * own state will get the read marks recorded, but
7294			 * they'll be immediately forgotten as we're pruning
7295			 * this state and will pop a new one.
7296			 */
7297			err = propagate_liveness(env, &sl->state, cur);
7298
7299			/* if previous state reached the exit with precision and
7300			 * current state is equivalent to it (except precsion marks)
7301			 * the precision needs to be propagated back in
7302			 * the current state.
7303			 */
7304			err = err ? : push_jmp_history(env, cur);
7305			err = err ? : propagate_precision(env, &sl->state);
7306			if (err)
7307				return err;
7308			return 1;
7309		}
7310miss:
7311		/* when new state is not going to be added do not increase miss count.
7312		 * Otherwise several loop iterations will remove the state
7313		 * recorded earlier. The goal of these heuristics is to have
7314		 * states from some iterations of the loop (some in the beginning
7315		 * and some at the end) to help pruning.
7316		 */
7317		if (add_new_state)
7318			sl->miss_cnt++;
7319		/* heuristic to determine whether this state is beneficial
7320		 * to keep checking from state equivalence point of view.
7321		 * Higher numbers increase max_states_per_insn and verification time,
7322		 * but do not meaningfully decrease insn_processed.
7323		 */
7324		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
7325			/* the state is unlikely to be useful. Remove it to
7326			 * speed up verification
7327			 */
7328			*pprev = sl->next;
7329			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
7330				u32 br = sl->state.branches;
7331
7332				WARN_ONCE(br,
7333					  "BUG live_done but branches_to_explore %d\n",
7334					  br);
7335				free_verifier_state(&sl->state, false);
7336				kfree(sl);
7337				env->peak_states--;
7338			} else {
7339				/* cannot free this state, since parentage chain may
7340				 * walk it later. Add it for free_list instead to
7341				 * be freed at the end of verification
7342				 */
7343				sl->next = env->free_list;
7344				env->free_list = sl;
7345			}
7346			sl = *pprev;
7347			continue;
7348		}
7349next:
7350		pprev = &sl->next;
7351		sl = *pprev;
7352	}
7353
7354	if (env->max_states_per_insn < states_cnt)
7355		env->max_states_per_insn = states_cnt;
7356
7357	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
7358		return push_jmp_history(env, cur);
7359
7360	if (!add_new_state)
7361		return push_jmp_history(env, cur);
7362
7363	/* There were no equivalent states, remember the current one.
7364	 * Technically the current state is not proven to be safe yet,
7365	 * but it will either reach outer most bpf_exit (which means it's safe)
7366	 * or it will be rejected. When there are no loops the verifier won't be
7367	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
7368	 * again on the way to bpf_exit.
7369	 * When looping the sl->state.branches will be > 0 and this state
7370	 * will not be considered for equivalence until branches == 0.
7371	 */
7372	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
7373	if (!new_sl)
7374		return -ENOMEM;
7375	env->total_states++;
7376	env->peak_states++;
7377	env->prev_jmps_processed = env->jmps_processed;
7378	env->prev_insn_processed = env->insn_processed;
7379
7380	/* add new state to the head of linked list */
7381	new = &new_sl->state;
7382	err = copy_verifier_state(new, cur);
7383	if (err) {
7384		free_verifier_state(new, false);
7385		kfree(new_sl);
7386		return err;
7387	}
7388	new->insn_idx = insn_idx;
7389	WARN_ONCE(new->branches != 1,
7390		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
7391
7392	cur->parent = new;
7393	cur->first_insn_idx = insn_idx;
7394	clear_jmp_history(cur);
7395	new_sl->next = *explored_state(env, insn_idx);
7396	*explored_state(env, insn_idx) = new_sl;
7397	/* connect new state to parentage chain. Current frame needs all
7398	 * registers connected. Only r6 - r9 of the callers are alive (pushed
7399	 * to the stack implicitly by JITs) so in callers' frames connect just
7400	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
7401	 * the state of the call instruction (with WRITTEN set), and r0 comes
7402	 * from callee with its full parentage chain, anyway.
7403	 */
7404	/* clear write marks in current state: the writes we did are not writes
7405	 * our child did, so they don't screen off its reads from us.
7406	 * (There are no read marks in current state, because reads always mark
7407	 * their parent and current state never has children yet.  Only
7408	 * explored_states can get read marks.)
7409	 */
7410	for (j = 0; j <= cur->curframe; j++) {
7411		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
7412			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
7413		for (i = 0; i < BPF_REG_FP; i++)
7414			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
7415	}
7416
7417	/* all stack frames are accessible from callee, clear them all */
7418	for (j = 0; j <= cur->curframe; j++) {
7419		struct bpf_func_state *frame = cur->frame[j];
7420		struct bpf_func_state *newframe = new->frame[j];
7421
7422		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
7423			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
7424			frame->stack[i].spilled_ptr.parent =
7425						&newframe->stack[i].spilled_ptr;
7426		}
7427	}
7428	return 0;
7429}
7430
7431/* Return true if it's OK to have the same insn return a different type. */
7432static bool reg_type_mismatch_ok(enum bpf_reg_type type)
7433{
7434	switch (type) {
7435	case PTR_TO_CTX:
7436	case PTR_TO_SOCKET:
7437	case PTR_TO_SOCKET_OR_NULL:
7438	case PTR_TO_SOCK_COMMON:
7439	case PTR_TO_SOCK_COMMON_OR_NULL:
7440	case PTR_TO_TCP_SOCK:
7441	case PTR_TO_TCP_SOCK_OR_NULL:
7442	case PTR_TO_XDP_SOCK:
7443		return false;
7444	default:
7445		return true;
7446	}
7447}
7448
7449/* If an instruction was previously used with particular pointer types, then we
7450 * need to be careful to avoid cases such as the below, where it may be ok
7451 * for one branch accessing the pointer, but not ok for the other branch:
7452 *
7453 * R1 = sock_ptr
7454 * goto X;
7455 * ...
7456 * R1 = some_other_valid_ptr;
7457 * goto X;
7458 * ...
7459 * R2 = *(u32 *)(R1 + 0);
7460 */
7461static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
7462{
7463	return src != prev && (!reg_type_mismatch_ok(src) ||
7464			       !reg_type_mismatch_ok(prev));
7465}
7466
7467static int do_check(struct bpf_verifier_env *env)
7468{
7469	struct bpf_verifier_state *state;
7470	struct bpf_insn *insns = env->prog->insnsi;
7471	struct bpf_reg_state *regs;
7472	int insn_cnt = env->prog->len;
7473	bool do_print_state = false;
7474	int prev_insn_idx = -1;
7475
7476	env->prev_linfo = NULL;
7477
7478	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
7479	if (!state)
7480		return -ENOMEM;
7481	state->curframe = 0;
7482	state->speculative = false;
7483	state->branches = 1;
7484	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
7485	if (!state->frame[0]) {
7486		kfree(state);
7487		return -ENOMEM;
7488	}
7489	env->cur_state = state;
7490	init_func_state(env, state->frame[0],
7491			BPF_MAIN_FUNC /* callsite */,
7492			0 /* frameno */,
7493			0 /* subprogno, zero == main subprog */);
7494
7495	for (;;) {
7496		struct bpf_insn *insn;
7497		u8 class;
7498		int err;
7499
7500		env->prev_insn_idx = prev_insn_idx;
7501		if (env->insn_idx >= insn_cnt) {
7502			verbose(env, "invalid insn idx %d insn_cnt %d\n",
7503				env->insn_idx, insn_cnt);
7504			return -EFAULT;
7505		}
7506
7507		insn = &insns[env->insn_idx];
7508		class = BPF_CLASS(insn->code);
7509
7510		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
7511			verbose(env,
7512				"BPF program is too large. Processed %d insn\n",
7513				env->insn_processed);
7514			return -E2BIG;
7515		}
7516
7517		err = is_state_visited(env, env->insn_idx);
7518		if (err < 0)
7519			return err;
7520		if (err == 1) {
7521			/* found equivalent state, can prune the search */
7522			if (env->log.level & BPF_LOG_LEVEL) {
7523				if (do_print_state)
7524					verbose(env, "\nfrom %d to %d%s: safe\n",
7525						env->prev_insn_idx, env->insn_idx,
7526						env->cur_state->speculative ?
7527						" (speculative execution)" : "");
7528				else
7529					verbose(env, "%d: safe\n", env->insn_idx);
7530			}
7531			goto process_bpf_exit;
7532		}
7533
7534		if (signal_pending(current))
7535			return -EAGAIN;
7536
7537		if (need_resched())
7538			cond_resched();
7539
7540		if (env->log.level & BPF_LOG_LEVEL2 ||
7541		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
7542			if (env->log.level & BPF_LOG_LEVEL2)
7543				verbose(env, "%d:", env->insn_idx);
7544			else
7545				verbose(env, "\nfrom %d to %d%s:",
7546					env->prev_insn_idx, env->insn_idx,
7547					env->cur_state->speculative ?
7548					" (speculative execution)" : "");
7549			print_verifier_state(env, state->frame[state->curframe]);
7550			do_print_state = false;
7551		}
7552
7553		if (env->log.level & BPF_LOG_LEVEL) {
7554			const struct bpf_insn_cbs cbs = {
7555				.cb_print	= verbose,
7556				.private_data	= env,
7557			};
7558
7559			verbose_linfo(env, env->insn_idx, "; ");
7560			verbose(env, "%d: ", env->insn_idx);
7561			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
7562		}
7563
7564		if (bpf_prog_is_dev_bound(env->prog->aux)) {
7565			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
7566							   env->prev_insn_idx);
7567			if (err)
7568				return err;
7569		}
7570
7571		regs = cur_regs(env);
7572		env->insn_aux_data[env->insn_idx].seen = true;
7573		prev_insn_idx = env->insn_idx;
7574
7575		if (class == BPF_ALU || class == BPF_ALU64) {
7576			err = check_alu_op(env, insn);
7577			if (err)
7578				return err;
7579
7580		} else if (class == BPF_LDX) {
7581			enum bpf_reg_type *prev_src_type, src_reg_type;
7582
7583			/* check for reserved fields is already done */
7584
7585			/* check src operand */
7586			err = check_reg_arg(env, insn->src_reg, SRC_OP);
7587			if (err)
7588				return err;
7589
7590			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7591			if (err)
7592				return err;
7593
7594			src_reg_type = regs[insn->src_reg].type;
7595
7596			/* check that memory (src_reg + off) is readable,
7597			 * the state of dst_reg will be updated by this func
7598			 */
7599			err = check_mem_access(env, env->insn_idx, insn->src_reg,
7600					       insn->off, BPF_SIZE(insn->code),
7601					       BPF_READ, insn->dst_reg, false);
7602			if (err)
7603				return err;
7604
7605			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
7606
7607			if (*prev_src_type == NOT_INIT) {
7608				/* saw a valid insn
7609				 * dst_reg = *(u32 *)(src_reg + off)
7610				 * save type to validate intersecting paths
7611				 */
7612				*prev_src_type = src_reg_type;
7613
7614			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
7615				/* ABuser program is trying to use the same insn
7616				 * dst_reg = *(u32*) (src_reg + off)
7617				 * with different pointer types:
7618				 * src_reg == ctx in one branch and
7619				 * src_reg == stack|map in some other branch.
7620				 * Reject it.
7621				 */
7622				verbose(env, "same insn cannot be used with different pointers\n");
7623				return -EINVAL;
7624			}
7625
7626		} else if (class == BPF_STX) {
7627			enum bpf_reg_type *prev_dst_type, dst_reg_type;
7628
7629			if (BPF_MODE(insn->code) == BPF_XADD) {
7630				err = check_xadd(env, env->insn_idx, insn);
7631				if (err)
7632					return err;
7633				env->insn_idx++;
7634				continue;
7635			}
7636
7637			/* check src1 operand */
7638			err = check_reg_arg(env, insn->src_reg, SRC_OP);
7639			if (err)
7640				return err;
7641			/* check src2 operand */
7642			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7643			if (err)
7644				return err;
7645
7646			dst_reg_type = regs[insn->dst_reg].type;
7647
7648			/* check that memory (dst_reg + off) is writeable */
7649			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
7650					       insn->off, BPF_SIZE(insn->code),
7651					       BPF_WRITE, insn->src_reg, false);
7652			if (err)
7653				return err;
7654
7655			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
7656
7657			if (*prev_dst_type == NOT_INIT) {
7658				*prev_dst_type = dst_reg_type;
7659			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
7660				verbose(env, "same insn cannot be used with different pointers\n");
7661				return -EINVAL;
7662			}
7663
7664		} else if (class == BPF_ST) {
7665			if (BPF_MODE(insn->code) != BPF_MEM ||
7666			    insn->src_reg != BPF_REG_0) {
7667				verbose(env, "BPF_ST uses reserved fields\n");
7668				return -EINVAL;
7669			}
7670			/* check src operand */
7671			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7672			if (err)
7673				return err;
7674
7675			if (is_ctx_reg(env, insn->dst_reg)) {
7676				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
7677					insn->dst_reg,
7678					reg_type_str[reg_state(env, insn->dst_reg)->type]);
7679				return -EACCES;
7680			}
7681
7682			/* check that memory (dst_reg + off) is writeable */
7683			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
7684					       insn->off, BPF_SIZE(insn->code),
7685					       BPF_WRITE, -1, false);
7686			if (err)
7687				return err;
7688
7689		} else if (class == BPF_JMP || class == BPF_JMP32) {
7690			u8 opcode = BPF_OP(insn->code);
7691
7692			env->jmps_processed++;
7693			if (opcode == BPF_CALL) {
7694				if (BPF_SRC(insn->code) != BPF_K ||
7695				    insn->off != 0 ||
7696				    (insn->src_reg != BPF_REG_0 &&
7697				     insn->src_reg != BPF_PSEUDO_CALL) ||
7698				    insn->dst_reg != BPF_REG_0 ||
7699				    class == BPF_JMP32) {
7700					verbose(env, "BPF_CALL uses reserved fields\n");
7701					return -EINVAL;
7702				}
7703
7704				if (env->cur_state->active_spin_lock &&
7705				    (insn->src_reg == BPF_PSEUDO_CALL ||
7706				     insn->imm != BPF_FUNC_spin_unlock)) {
7707					verbose(env, "function calls are not allowed while holding a lock\n");
7708					return -EINVAL;
7709				}
7710				if (insn->src_reg == BPF_PSEUDO_CALL)
7711					err = check_func_call(env, insn, &env->insn_idx);
7712				else
7713					err = check_helper_call(env, insn->imm, env->insn_idx);
7714				if (err)
7715					return err;
7716
7717			} else if (opcode == BPF_JA) {
7718				if (BPF_SRC(insn->code) != BPF_K ||
7719				    insn->imm != 0 ||
7720				    insn->src_reg != BPF_REG_0 ||
7721				    insn->dst_reg != BPF_REG_0 ||
7722				    class == BPF_JMP32) {
7723					verbose(env, "BPF_JA uses reserved fields\n");
7724					return -EINVAL;
7725				}
7726
7727				env->insn_idx += insn->off + 1;
7728				continue;
7729
7730			} else if (opcode == BPF_EXIT) {
7731				if (BPF_SRC(insn->code) != BPF_K ||
7732				    insn->imm != 0 ||
7733				    insn->src_reg != BPF_REG_0 ||
7734				    insn->dst_reg != BPF_REG_0 ||
7735				    class == BPF_JMP32) {
7736					verbose(env, "BPF_EXIT uses reserved fields\n");
7737					return -EINVAL;
7738				}
7739
7740				if (env->cur_state->active_spin_lock) {
7741					verbose(env, "bpf_spin_unlock is missing\n");
7742					return -EINVAL;
7743				}
7744
7745				if (state->curframe) {
7746					/* exit from nested function */
7747					err = prepare_func_exit(env, &env->insn_idx);
7748					if (err)
7749						return err;
7750					do_print_state = true;
7751					continue;
7752				}
7753
7754				err = check_reference_leak(env);
7755				if (err)
7756					return err;
7757
7758				/* eBPF calling convetion is such that R0 is used
7759				 * to return the value from eBPF program.
7760				 * Make sure that it's readable at this time
7761				 * of bpf_exit, which means that program wrote
7762				 * something into it earlier
7763				 */
7764				err = check_reg_arg(env, BPF_REG_0, SRC_OP);
7765				if (err)
7766					return err;
7767
7768				if (is_pointer_value(env, BPF_REG_0)) {
7769					verbose(env, "R0 leaks addr as return value\n");
7770					return -EACCES;
7771				}
7772
7773				err = check_return_code(env);
7774				if (err)
7775					return err;
7776process_bpf_exit:
7777				update_branch_counts(env, env->cur_state);
7778				err = pop_stack(env, &prev_insn_idx,
7779						&env->insn_idx);
7780				if (err < 0) {
7781					if (err != -ENOENT)
7782						return err;
7783					break;
7784				} else {
7785					do_print_state = true;
7786					continue;
7787				}
7788			} else {
7789				err = check_cond_jmp_op(env, insn, &env->insn_idx);
7790				if (err)
7791					return err;
7792			}
7793		} else if (class == BPF_LD) {
7794			u8 mode = BPF_MODE(insn->code);
7795
7796			if (mode == BPF_ABS || mode == BPF_IND) {
7797				err = check_ld_abs(env, insn);
7798				if (err)
7799					return err;
7800
7801			} else if (mode == BPF_IMM) {
7802				err = check_ld_imm(env, insn);
7803				if (err)
7804					return err;
7805
7806				env->insn_idx++;
7807				env->insn_aux_data[env->insn_idx].seen = true;
7808			} else {
7809				verbose(env, "invalid BPF_LD mode\n");
7810				return -EINVAL;
7811			}
7812		} else {
7813			verbose(env, "unknown insn class %d\n", class);
7814			return -EINVAL;
7815		}
7816
7817		env->insn_idx++;
7818	}
7819
7820	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
7821	return 0;
7822}
7823
7824static int check_map_prealloc(struct bpf_map *map)
7825{
7826	return (map->map_type != BPF_MAP_TYPE_HASH &&
7827		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
7828		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
7829		!(map->map_flags & BPF_F_NO_PREALLOC);
7830}
7831
7832static bool is_tracing_prog_type(enum bpf_prog_type type)
7833{
7834	switch (type) {
7835	case BPF_PROG_TYPE_KPROBE:
7836	case BPF_PROG_TYPE_TRACEPOINT:
7837	case BPF_PROG_TYPE_PERF_EVENT:
7838	case BPF_PROG_TYPE_RAW_TRACEPOINT:
7839		return true;
7840	default:
7841		return false;
7842	}
7843}
7844
7845static int check_map_prog_compatibility(struct bpf_verifier_env *env,
7846					struct bpf_map *map,
7847					struct bpf_prog *prog)
7848
7849{
7850	/* Make sure that BPF_PROG_TYPE_PERF_EVENT programs only use
7851	 * preallocated hash maps, since doing memory allocation
7852	 * in overflow_handler can crash depending on where nmi got
7853	 * triggered.
7854	 */
7855	if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
7856		if (!check_map_prealloc(map)) {
7857			verbose(env, "perf_event programs can only use preallocated hash map\n");
7858			return -EINVAL;
7859		}
7860		if (map->inner_map_meta &&
7861		    !check_map_prealloc(map->inner_map_meta)) {
7862			verbose(env, "perf_event programs can only use preallocated inner hash map\n");
7863			return -EINVAL;
7864		}
7865	}
7866
7867	if ((is_tracing_prog_type(prog->type) ||
7868	     prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
7869	    map_value_has_spin_lock(map)) {
7870		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
7871		return -EINVAL;
7872	}
7873
7874	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
7875	    !bpf_offload_prog_map_match(prog, map)) {
7876		verbose(env, "offload device mismatch between prog and map\n");
7877		return -EINVAL;
7878	}
7879
7880	return 0;
7881}
7882
7883static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
7884{
7885	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
7886		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
7887}
7888
7889/* look for pseudo eBPF instructions that access map FDs and
7890 * replace them with actual map pointers
7891 */
7892static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
7893{
7894	struct bpf_insn *insn = env->prog->insnsi;
7895	int insn_cnt = env->prog->len;
7896	int i, j, err;
7897
7898	err = bpf_prog_calc_tag(env->prog);
7899	if (err)
7900		return err;
7901
7902	for (i = 0; i < insn_cnt; i++, insn++) {
7903		if (BPF_CLASS(insn->code) == BPF_LDX &&
7904		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
7905			verbose(env, "BPF_LDX uses reserved fields\n");
7906			return -EINVAL;
7907		}
7908
7909		if (BPF_CLASS(insn->code) == BPF_STX &&
7910		    ((BPF_MODE(insn->code) != BPF_MEM &&
7911		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
7912			verbose(env, "BPF_STX uses reserved fields\n");
7913			return -EINVAL;
7914		}
7915
7916		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
7917			struct bpf_insn_aux_data *aux;
7918			struct bpf_map *map;
7919			struct fd f;
7920			u64 addr;
7921
7922			if (i == insn_cnt - 1 || insn[1].code != 0 ||
7923			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
7924			    insn[1].off != 0) {
7925				verbose(env, "invalid bpf_ld_imm64 insn\n");
7926				return -EINVAL;
7927			}
7928
7929			if (insn[0].src_reg == 0)
7930				/* valid generic load 64-bit imm */
7931				goto next_insn;
7932
7933			/* In final convert_pseudo_ld_imm64() step, this is
7934			 * converted into regular 64-bit imm load insn.
7935			 */
7936			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
7937			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
7938			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
7939			     insn[1].imm != 0)) {
7940				verbose(env,
7941					"unrecognized bpf_ld_imm64 insn\n");
7942				return -EINVAL;
7943			}
7944
7945			f = fdget(insn[0].imm);
7946			map = __bpf_map_get(f);
7947			if (IS_ERR(map)) {
7948				verbose(env, "fd %d is not pointing to valid bpf_map\n",
7949					insn[0].imm);
7950				return PTR_ERR(map);
7951			}
7952
7953			err = check_map_prog_compatibility(env, map, env->prog);
7954			if (err) {
7955				fdput(f);
7956				return err;
7957			}
7958
7959			aux = &env->insn_aux_data[i];
7960			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
7961				addr = (unsigned long)map;
7962			} else {
7963				u32 off = insn[1].imm;
7964
7965				if (off >= BPF_MAX_VAR_OFF) {
7966					verbose(env, "direct value offset of %u is not allowed\n", off);
7967					fdput(f);
7968					return -EINVAL;
7969				}
7970
7971				if (!map->ops->map_direct_value_addr) {
7972					verbose(env, "no direct value access support for this map type\n");
7973					fdput(f);
7974					return -EINVAL;
7975				}
7976
7977				err = map->ops->map_direct_value_addr(map, &addr, off);
7978				if (err) {
7979					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
7980						map->value_size, off);
7981					fdput(f);
7982					return err;
7983				}
7984
7985				aux->map_off = off;
7986				addr += off;
7987			}
7988
7989			insn[0].imm = (u32)addr;
7990			insn[1].imm = addr >> 32;
7991
7992			/* check whether we recorded this map already */
7993			for (j = 0; j < env->used_map_cnt; j++) {
7994				if (env->used_maps[j] == map) {
7995					aux->map_index = j;
7996					fdput(f);
7997					goto next_insn;
7998				}
7999			}
8000
8001			if (env->used_map_cnt >= MAX_USED_MAPS) {
8002				fdput(f);
8003				return -E2BIG;
8004			}
8005
8006			/* hold the map. If the program is rejected by verifier,
8007			 * the map will be released by release_maps() or it
8008			 * will be used by the valid program until it's unloaded
8009			 * and all maps are released in free_used_maps()
8010			 */
8011			map = bpf_map_inc(map, false);
8012			if (IS_ERR(map)) {
8013				fdput(f);
8014				return PTR_ERR(map);
8015			}
8016
8017			aux->map_index = env->used_map_cnt;
8018			env->used_maps[env->used_map_cnt++] = map;
8019
8020			if (bpf_map_is_cgroup_storage(map) &&
8021			    bpf_cgroup_storage_assign(env->prog, map)) {
8022				verbose(env, "only one cgroup storage of each type is allowed\n");
8023				fdput(f);
8024				return -EBUSY;
8025			}
8026
8027			fdput(f);
8028next_insn:
8029			insn++;
8030			i++;
8031			continue;
8032		}
8033
8034		/* Basic sanity check before we invest more work here. */
8035		if (!bpf_opcode_in_insntable(insn->code)) {
8036			verbose(env, "unknown opcode %02x\n", insn->code);
8037			return -EINVAL;
8038		}
8039	}
8040
8041	/* now all pseudo BPF_LD_IMM64 instructions load valid
8042	 * 'struct bpf_map *' into a register instead of user map_fd.
8043	 * These pointers will be used later by verifier to validate map access.
8044	 */
8045	return 0;
8046}
8047
8048/* drop refcnt of maps used by the rejected program */
8049static void release_maps(struct bpf_verifier_env *env)
8050{
8051	enum bpf_cgroup_storage_type stype;
8052	int i;
8053
8054	for_each_cgroup_storage_type(stype) {
8055		if (!env->prog->aux->cgroup_storage[stype])
8056			continue;
8057		bpf_cgroup_storage_release(env->prog,
8058			env->prog->aux->cgroup_storage[stype]);
8059	}
8060
8061	for (i = 0; i < env->used_map_cnt; i++)
8062		bpf_map_put(env->used_maps[i]);
8063}
8064
8065/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
8066static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
8067{
8068	struct bpf_insn *insn = env->prog->insnsi;
8069	int insn_cnt = env->prog->len;
8070	int i;
8071
8072	for (i = 0; i < insn_cnt; i++, insn++)
8073		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
8074			insn->src_reg = 0;
8075}
8076
8077/* single env->prog->insni[off] instruction was replaced with the range
8078 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
8079 * [0, off) and [off, end) to new locations, so the patched range stays zero
8080 */
8081static int adjust_insn_aux_data(struct bpf_verifier_env *env,
8082				struct bpf_prog *new_prog, u32 off, u32 cnt)
8083{
8084	struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
8085	struct bpf_insn *insn = new_prog->insnsi;
8086	u32 prog_len;
8087	int i;
8088
8089	/* aux info at OFF always needs adjustment, no matter fast path
8090	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
8091	 * original insn at old prog.
8092	 */
8093	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
8094
8095	if (cnt == 1)
8096		return 0;
8097	prog_len = new_prog->len;
8098	new_data = vzalloc(array_size(prog_len,
8099				      sizeof(struct bpf_insn_aux_data)));
8100	if (!new_data)
8101		return -ENOMEM;
8102	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
8103	memcpy(new_data + off + cnt - 1, old_data + off,
8104	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
8105	for (i = off; i < off + cnt - 1; i++) {
8106		new_data[i].seen = true;
8107		new_data[i].zext_dst = insn_has_def32(env, insn + i);
8108	}
8109	env->insn_aux_data = new_data;
8110	vfree(old_data);
8111	return 0;
8112}
8113
8114static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
8115{
8116	int i;
8117
8118	if (len == 1)
8119		return;
8120	/* NOTE: fake 'exit' subprog should be updated as well. */
8121	for (i = 0; i <= env->subprog_cnt; i++) {
8122		if (env->subprog_info[i].start <= off)
8123			continue;
8124		env->subprog_info[i].start += len - 1;
8125	}
8126}
8127
8128static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
8129					    const struct bpf_insn *patch, u32 len)
8130{
8131	struct bpf_prog *new_prog;
8132
8133	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
8134	if (IS_ERR(new_prog)) {
8135		if (PTR_ERR(new_prog) == -ERANGE)
8136			verbose(env,
8137				"insn %d cannot be patched due to 16-bit range\n",
8138				env->insn_aux_data[off].orig_idx);
8139		return NULL;
8140	}
8141	if (adjust_insn_aux_data(env, new_prog, off, len))
8142		return NULL;
8143	adjust_subprog_starts(env, off, len);
8144	return new_prog;
8145}
8146
8147static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
8148					      u32 off, u32 cnt)
8149{
8150	int i, j;
8151
8152	/* find first prog starting at or after off (first to remove) */
8153	for (i = 0; i < env->subprog_cnt; i++)
8154		if (env->subprog_info[i].start >= off)
8155			break;
8156	/* find first prog starting at or after off + cnt (first to stay) */
8157	for (j = i; j < env->subprog_cnt; j++)
8158		if (env->subprog_info[j].start >= off + cnt)
8159			break;
8160	/* if j doesn't start exactly at off + cnt, we are just removing
8161	 * the front of previous prog
8162	 */
8163	if (env->subprog_info[j].start != off + cnt)
8164		j--;
8165
8166	if (j > i) {
8167		struct bpf_prog_aux *aux = env->prog->aux;
8168		int move;
8169
8170		/* move fake 'exit' subprog as well */
8171		move = env->subprog_cnt + 1 - j;
8172
8173		memmove(env->subprog_info + i,
8174			env->subprog_info + j,
8175			sizeof(*env->subprog_info) * move);
8176		env->subprog_cnt -= j - i;
8177
8178		/* remove func_info */
8179		if (aux->func_info) {
8180			move = aux->func_info_cnt - j;
8181
8182			memmove(aux->func_info + i,
8183				aux->func_info + j,
8184				sizeof(*aux->func_info) * move);
8185			aux->func_info_cnt -= j - i;
8186			/* func_info->insn_off is set after all code rewrites,
8187			 * in adjust_btf_func() - no need to adjust
8188			 */
8189		}
8190	} else {
8191		/* convert i from "first prog to remove" to "first to adjust" */
8192		if (env->subprog_info[i].start == off)
8193			i++;
8194	}
8195
8196	/* update fake 'exit' subprog as well */
8197	for (; i <= env->subprog_cnt; i++)
8198		env->subprog_info[i].start -= cnt;
8199
8200	return 0;
8201}
8202
8203static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
8204				      u32 cnt)
8205{
8206	struct bpf_prog *prog = env->prog;
8207	u32 i, l_off, l_cnt, nr_linfo;
8208	struct bpf_line_info *linfo;
8209
8210	nr_linfo = prog->aux->nr_linfo;
8211	if (!nr_linfo)
8212		return 0;
8213
8214	linfo = prog->aux->linfo;
8215
8216	/* find first line info to remove, count lines to be removed */
8217	for (i = 0; i < nr_linfo; i++)
8218		if (linfo[i].insn_off >= off)
8219			break;
8220
8221	l_off = i;
8222	l_cnt = 0;
8223	for (; i < nr_linfo; i++)
8224		if (linfo[i].insn_off < off + cnt)
8225			l_cnt++;
8226		else
8227			break;
8228
8229	/* First live insn doesn't match first live linfo, it needs to "inherit"
8230	 * last removed linfo.  prog is already modified, so prog->len == off
8231	 * means no live instructions after (tail of the program was removed).
8232	 */
8233	if (prog->len != off && l_cnt &&
8234	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
8235		l_cnt--;
8236		linfo[--i].insn_off = off + cnt;
8237	}
8238
8239	/* remove the line info which refer to the removed instructions */
8240	if (l_cnt) {
8241		memmove(linfo + l_off, linfo + i,
8242			sizeof(*linfo) * (nr_linfo - i));
8243
8244		prog->aux->nr_linfo -= l_cnt;
8245		nr_linfo = prog->aux->nr_linfo;
8246	}
8247
8248	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
8249	for (i = l_off; i < nr_linfo; i++)
8250		linfo[i].insn_off -= cnt;
8251
8252	/* fix up all subprogs (incl. 'exit') which start >= off */
8253	for (i = 0; i <= env->subprog_cnt; i++)
8254		if (env->subprog_info[i].linfo_idx > l_off) {
8255			/* program may have started in the removed region but
8256			 * may not be fully removed
8257			 */
8258			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
8259				env->subprog_info[i].linfo_idx -= l_cnt;
8260			else
8261				env->subprog_info[i].linfo_idx = l_off;
8262		}
8263
8264	return 0;
8265}
8266
8267static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
8268{
8269	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8270	unsigned int orig_prog_len = env->prog->len;
8271	int err;
8272
8273	if (bpf_prog_is_dev_bound(env->prog->aux))
8274		bpf_prog_offload_remove_insns(env, off, cnt);
8275
8276	err = bpf_remove_insns(env->prog, off, cnt);
8277	if (err)
8278		return err;
8279
8280	err = adjust_subprog_starts_after_remove(env, off, cnt);
8281	if (err)
8282		return err;
8283
8284	err = bpf_adj_linfo_after_remove(env, off, cnt);
8285	if (err)
8286		return err;
8287
8288	memmove(aux_data + off,	aux_data + off + cnt,
8289		sizeof(*aux_data) * (orig_prog_len - off - cnt));
8290
8291	return 0;
8292}
8293
8294/* The verifier does more data flow analysis than llvm and will not
8295 * explore branches that are dead at run time. Malicious programs can
8296 * have dead code too. Therefore replace all dead at-run-time code
8297 * with 'ja -1'.
8298 *
8299 * Just nops are not optimal, e.g. if they would sit at the end of the
8300 * program and through another bug we would manage to jump there, then
8301 * we'd execute beyond program memory otherwise. Returning exception
8302 * code also wouldn't work since we can have subprogs where the dead
8303 * code could be located.
8304 */
8305static void sanitize_dead_code(struct bpf_verifier_env *env)
8306{
8307	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8308	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
8309	struct bpf_insn *insn = env->prog->insnsi;
8310	const int insn_cnt = env->prog->len;
8311	int i;
8312
8313	for (i = 0; i < insn_cnt; i++) {
8314		if (aux_data[i].seen)
8315			continue;
8316		memcpy(insn + i, &trap, sizeof(trap));
8317	}
8318}
8319
8320static bool insn_is_cond_jump(u8 code)
8321{
8322	u8 op;
8323
8324	if (BPF_CLASS(code) == BPF_JMP32)
8325		return true;
8326
8327	if (BPF_CLASS(code) != BPF_JMP)
8328		return false;
8329
8330	op = BPF_OP(code);
8331	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
8332}
8333
8334static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
8335{
8336	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8337	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
8338	struct bpf_insn *insn = env->prog->insnsi;
8339	const int insn_cnt = env->prog->len;
8340	int i;
8341
8342	for (i = 0; i < insn_cnt; i++, insn++) {
8343		if (!insn_is_cond_jump(insn->code))
8344			continue;
8345
8346		if (!aux_data[i + 1].seen)
8347			ja.off = insn->off;
8348		else if (!aux_data[i + 1 + insn->off].seen)
8349			ja.off = 0;
8350		else
8351			continue;
8352
8353		if (bpf_prog_is_dev_bound(env->prog->aux))
8354			bpf_prog_offload_replace_insn(env, i, &ja);
8355
8356		memcpy(insn, &ja, sizeof(ja));
8357	}
8358}
8359
8360static int opt_remove_dead_code(struct bpf_verifier_env *env)
8361{
8362	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
8363	int insn_cnt = env->prog->len;
8364	int i, err;
8365
8366	for (i = 0; i < insn_cnt; i++) {
8367		int j;
8368
8369		j = 0;
8370		while (i + j < insn_cnt && !aux_data[i + j].seen)
8371			j++;
8372		if (!j)
8373			continue;
8374
8375		err = verifier_remove_insns(env, i, j);
8376		if (err)
8377			return err;
8378		insn_cnt = env->prog->len;
8379	}
8380
8381	return 0;
8382}
8383
8384static int opt_remove_nops(struct bpf_verifier_env *env)
8385{
8386	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
8387	struct bpf_insn *insn = env->prog->insnsi;
8388	int insn_cnt = env->prog->len;
8389	int i, err;
8390
8391	for (i = 0; i < insn_cnt; i++) {
8392		if (memcmp(&insn[i], &ja, sizeof(ja)))
8393			continue;
8394
8395		err = verifier_remove_insns(env, i, 1);
8396		if (err)
8397			return err;
8398		insn_cnt--;
8399		i--;
8400	}
8401
8402	return 0;
8403}
8404
8405static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
8406					 const union bpf_attr *attr)
8407{
8408	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
8409	struct bpf_insn_aux_data *aux = env->insn_aux_data;
8410	int i, patch_len, delta = 0, len = env->prog->len;
8411	struct bpf_insn *insns = env->prog->insnsi;
8412	struct bpf_prog *new_prog;
8413	bool rnd_hi32;
8414
8415	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
8416	zext_patch[1] = BPF_ZEXT_REG(0);
8417	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
8418	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
8419	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
8420	for (i = 0; i < len; i++) {
8421		int adj_idx = i + delta;
8422		struct bpf_insn insn;
8423
8424		insn = insns[adj_idx];
8425		if (!aux[adj_idx].zext_dst) {
8426			u8 code, class;
8427			u32 imm_rnd;
8428
8429			if (!rnd_hi32)
8430				continue;
8431
8432			code = insn.code;
8433			class = BPF_CLASS(code);
8434			if (insn_no_def(&insn))
8435				continue;
8436
8437			/* NOTE: arg "reg" (the fourth one) is only used for
8438			 *       BPF_STX which has been ruled out in above
8439			 *       check, it is safe to pass NULL here.
8440			 */
8441			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
8442				if (class == BPF_LD &&
8443				    BPF_MODE(code) == BPF_IMM)
8444					i++;
8445				continue;
8446			}
8447
8448			/* ctx load could be transformed into wider load. */
8449			if (class == BPF_LDX &&
8450			    aux[adj_idx].ptr_type == PTR_TO_CTX)
8451				continue;
8452
8453			imm_rnd = get_random_int();
8454			rnd_hi32_patch[0] = insn;
8455			rnd_hi32_patch[1].imm = imm_rnd;
8456			rnd_hi32_patch[3].dst_reg = insn.dst_reg;
8457			patch = rnd_hi32_patch;
8458			patch_len = 4;
8459			goto apply_patch_buffer;
8460		}
8461
8462		if (!bpf_jit_needs_zext())
8463			continue;
8464
8465		zext_patch[0] = insn;
8466		zext_patch[1].dst_reg = insn.dst_reg;
8467		zext_patch[1].src_reg = insn.dst_reg;
8468		patch = zext_patch;
8469		patch_len = 2;
8470apply_patch_buffer:
8471		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
8472		if (!new_prog)
8473			return -ENOMEM;
8474		env->prog = new_prog;
8475		insns = new_prog->insnsi;
8476		aux = env->insn_aux_data;
8477		delta += patch_len - 1;
8478	}
8479
8480	return 0;
8481}
8482
8483/* convert load instructions that access fields of a context type into a
8484 * sequence of instructions that access fields of the underlying structure:
8485 *     struct __sk_buff    -> struct sk_buff
8486 *     struct bpf_sock_ops -> struct sock
8487 */
8488static int convert_ctx_accesses(struct bpf_verifier_env *env)
8489{
8490	const struct bpf_verifier_ops *ops = env->ops;
8491	int i, cnt, size, ctx_field_size, delta = 0;
8492	const int insn_cnt = env->prog->len;
8493	struct bpf_insn insn_buf[16], *insn;
8494	u32 target_size, size_default, off;
8495	struct bpf_prog *new_prog;
8496	enum bpf_access_type type;
8497	bool is_narrower_load;
8498
8499	if (ops->gen_prologue || env->seen_direct_write) {
8500		if (!ops->gen_prologue) {
8501			verbose(env, "bpf verifier is misconfigured\n");
8502			return -EINVAL;
8503		}
8504		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
8505					env->prog);
8506		if (cnt >= ARRAY_SIZE(insn_buf)) {
8507			verbose(env, "bpf verifier is misconfigured\n");
8508			return -EINVAL;
8509		} else if (cnt) {
8510			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
8511			if (!new_prog)
8512				return -ENOMEM;
8513
8514			env->prog = new_prog;
8515			delta += cnt - 1;
8516		}
8517	}
8518
8519	if (bpf_prog_is_dev_bound(env->prog->aux))
8520		return 0;
8521
8522	insn = env->prog->insnsi + delta;
8523
8524	for (i = 0; i < insn_cnt; i++, insn++) {
8525		bpf_convert_ctx_access_t convert_ctx_access;
8526
8527		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
8528		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
8529		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
8530		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
8531			type = BPF_READ;
8532		else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
8533			 insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
8534			 insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
8535			 insn->code == (BPF_STX | BPF_MEM | BPF_DW))
8536			type = BPF_WRITE;
8537		else
8538			continue;
8539
8540		if (type == BPF_WRITE &&
8541		    env->insn_aux_data[i + delta].sanitize_stack_off) {
8542			struct bpf_insn patch[] = {
8543				/* Sanitize suspicious stack slot with zero.
8544				 * There are no memory dependencies for this store,
8545				 * since it's only using frame pointer and immediate
8546				 * constant of zero
8547				 */
8548				BPF_ST_MEM(BPF_DW, BPF_REG_FP,
8549					   env->insn_aux_data[i + delta].sanitize_stack_off,
8550					   0),
8551				/* the original STX instruction will immediately
8552				 * overwrite the same stack slot with appropriate value
8553				 */
8554				*insn,
8555			};
8556
8557			cnt = ARRAY_SIZE(patch);
8558			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
8559			if (!new_prog)
8560				return -ENOMEM;
8561
8562			delta    += cnt - 1;
8563			env->prog = new_prog;
8564			insn      = new_prog->insnsi + i + delta;
8565			continue;
8566		}
8567
8568		switch (env->insn_aux_data[i + delta].ptr_type) {
8569		case PTR_TO_CTX:
8570			if (!ops->convert_ctx_access)
8571				continue;
8572			convert_ctx_access = ops->convert_ctx_access;
8573			break;
8574		case PTR_TO_SOCKET:
8575		case PTR_TO_SOCK_COMMON:
8576			convert_ctx_access = bpf_sock_convert_ctx_access;
8577			break;
8578		case PTR_TO_TCP_SOCK:
8579			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
8580			break;
8581		case PTR_TO_XDP_SOCK:
8582			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
8583			break;
8584		default:
8585			continue;
8586		}
8587
8588		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
8589		size = BPF_LDST_BYTES(insn);
8590
8591		/* If the read access is a narrower load of the field,
8592		 * convert to a 4/8-byte load, to minimum program type specific
8593		 * convert_ctx_access changes. If conversion is successful,
8594		 * we will apply proper mask to the result.
8595		 */
8596		is_narrower_load = size < ctx_field_size;
8597		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
8598		off = insn->off;
8599		if (is_narrower_load) {
8600			u8 size_code;
8601
8602			if (type == BPF_WRITE) {
8603				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
8604				return -EINVAL;
8605			}
8606
8607			size_code = BPF_H;
8608			if (ctx_field_size == 4)
8609				size_code = BPF_W;
8610			else if (ctx_field_size == 8)
8611				size_code = BPF_DW;
8612
8613			insn->off = off & ~(size_default - 1);
8614			insn->code = BPF_LDX | BPF_MEM | size_code;
8615		}
8616
8617		target_size = 0;
8618		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
8619					 &target_size);
8620		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
8621		    (ctx_field_size && !target_size)) {
8622			verbose(env, "bpf verifier is misconfigured\n");
8623			return -EINVAL;
8624		}
8625
8626		if (is_narrower_load && size < target_size) {
8627			u8 shift = bpf_ctx_narrow_access_offset(
8628				off, size, size_default) * 8;
8629			if (ctx_field_size <= 4) {
8630				if (shift)
8631					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
8632									insn->dst_reg,
8633									shift);
8634				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
8635								(1 << size * 8) - 1);
8636			} else {
8637				if (shift)
8638					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
8639									insn->dst_reg,
8640									shift);
8641				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
8642								(1ULL << size * 8) - 1);
8643			}
8644		}
8645
8646		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
8647		if (!new_prog)
8648			return -ENOMEM;
8649
8650		delta += cnt - 1;
8651
8652		/* keep walking new program and skip insns we just inserted */
8653		env->prog = new_prog;
8654		insn      = new_prog->insnsi + i + delta;
8655	}
8656
8657	return 0;
8658}
8659
8660static int jit_subprogs(struct bpf_verifier_env *env)
8661{
8662	struct bpf_prog *prog = env->prog, **func, *tmp;
8663	int i, j, subprog_start, subprog_end = 0, len, subprog;
8664	struct bpf_insn *insn;
8665	void *old_bpf_func;
8666	int err;
8667
8668	if (env->subprog_cnt <= 1)
8669		return 0;
8670
8671	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
8672		if (insn->code != (BPF_JMP | BPF_CALL) ||
8673		    insn->src_reg != BPF_PSEUDO_CALL)
8674			continue;
8675		/* Upon error here we cannot fall back to interpreter but
8676		 * need a hard reject of the program. Thus -EFAULT is
8677		 * propagated in any case.
8678		 */
8679		subprog = find_subprog(env, i + insn->imm + 1);
8680		if (subprog < 0) {
8681			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
8682				  i + insn->imm + 1);
8683			return -EFAULT;
8684		}
8685		/* temporarily remember subprog id inside insn instead of
8686		 * aux_data, since next loop will split up all insns into funcs
8687		 */
8688		insn->off = subprog;
8689		/* remember original imm in case JIT fails and fallback
8690		 * to interpreter will be needed
8691		 */
8692		env->insn_aux_data[i].call_imm = insn->imm;
8693		/* point imm to __bpf_call_base+1 from JITs point of view */
8694		insn->imm = 1;
8695	}
8696
8697	err = bpf_prog_alloc_jited_linfo(prog);
8698	if (err)
8699		goto out_undo_insn;
8700
8701	err = -ENOMEM;
8702	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
8703	if (!func)
8704		goto out_undo_insn;
8705
8706	for (i = 0; i < env->subprog_cnt; i++) {
8707		subprog_start = subprog_end;
8708		subprog_end = env->subprog_info[i + 1].start;
8709
8710		len = subprog_end - subprog_start;
8711		/* BPF_PROG_RUN doesn't call subprogs directly,
8712		 * hence main prog stats include the runtime of subprogs.
8713		 * subprogs don't have IDs and not reachable via prog_get_next_id
8714		 * func[i]->aux->stats will never be accessed and stays NULL
8715		 */
8716		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
8717		if (!func[i])
8718			goto out_free;
8719		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
8720		       len * sizeof(struct bpf_insn));
8721		func[i]->type = prog->type;
8722		func[i]->len = len;
8723		if (bpf_prog_calc_tag(func[i]))
8724			goto out_free;
8725		func[i]->is_func = 1;
8726		func[i]->aux->func_idx = i;
8727		/* the btf and func_info will be freed only at prog->aux */
8728		func[i]->aux->btf = prog->aux->btf;
8729		func[i]->aux->func_info = prog->aux->func_info;
8730
8731		/* Use bpf_prog_F_tag to indicate functions in stack traces.
8732		 * Long term would need debug info to populate names
8733		 */
8734		func[i]->aux->name[0] = 'F';
8735		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
8736		func[i]->jit_requested = 1;
8737		func[i]->aux->linfo = prog->aux->linfo;
8738		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
8739		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
8740		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
8741		func[i] = bpf_int_jit_compile(func[i]);
8742		if (!func[i]->jited) {
8743			err = -ENOTSUPP;
8744			goto out_free;
8745		}
8746		cond_resched();
8747	}
8748	/* at this point all bpf functions were successfully JITed
8749	 * now populate all bpf_calls with correct addresses and
8750	 * run last pass of JIT
8751	 */
8752	for (i = 0; i < env->subprog_cnt; i++) {
8753		insn = func[i]->insnsi;
8754		for (j = 0; j < func[i]->len; j++, insn++) {
8755			if (insn->code != (BPF_JMP | BPF_CALL) ||
8756			    insn->src_reg != BPF_PSEUDO_CALL)
8757				continue;
8758			subprog = insn->off;
8759			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
8760				    __bpf_call_base;
8761		}
8762
8763		/* we use the aux data to keep a list of the start addresses
8764		 * of the JITed images for each function in the program
8765		 *
8766		 * for some architectures, such as powerpc64, the imm field
8767		 * might not be large enough to hold the offset of the start
8768		 * address of the callee's JITed image from __bpf_call_base
8769		 *
8770		 * in such cases, we can lookup the start address of a callee
8771		 * by using its subprog id, available from the off field of
8772		 * the call instruction, as an index for this list
8773		 */
8774		func[i]->aux->func = func;
8775		func[i]->aux->func_cnt = env->subprog_cnt;
8776	}
8777	for (i = 0; i < env->subprog_cnt; i++) {
8778		old_bpf_func = func[i]->bpf_func;
8779		tmp = bpf_int_jit_compile(func[i]);
8780		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
8781			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
8782			err = -ENOTSUPP;
8783			goto out_free;
8784		}
8785		cond_resched();
8786	}
8787
8788	/* finally lock prog and jit images for all functions and
8789	 * populate kallsysm
8790	 */
8791	for (i = 0; i < env->subprog_cnt; i++) {
8792		bpf_prog_lock_ro(func[i]);
8793		bpf_prog_kallsyms_add(func[i]);
8794	}
8795
8796	/* Last step: make now unused interpreter insns from main
8797	 * prog consistent for later dump requests, so they can
8798	 * later look the same as if they were interpreted only.
8799	 */
8800	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
8801		if (insn->code != (BPF_JMP | BPF_CALL) ||
8802		    insn->src_reg != BPF_PSEUDO_CALL)
8803			continue;
8804		insn->off = env->insn_aux_data[i].call_imm;
8805		subprog = find_subprog(env, i + insn->off + 1);
8806		insn->imm = subprog;
8807	}
8808
8809	prog->jited = 1;
8810	prog->bpf_func = func[0]->bpf_func;
8811	prog->aux->func = func;
8812	prog->aux->func_cnt = env->subprog_cnt;
8813	bpf_prog_free_unused_jited_linfo(prog);
8814	return 0;
8815out_free:
8816	for (i = 0; i < env->subprog_cnt; i++)
8817		if (func[i])
8818			bpf_jit_free(func[i]);
8819	kfree(func);
8820out_undo_insn:
8821	/* cleanup main prog to be interpreted */
8822	prog->jit_requested = 0;
8823	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
8824		if (insn->code != (BPF_JMP | BPF_CALL) ||
8825		    insn->src_reg != BPF_PSEUDO_CALL)
8826			continue;
8827		insn->off = 0;
8828		insn->imm = env->insn_aux_data[i].call_imm;
8829	}
8830	bpf_prog_free_jited_linfo(prog);
8831	return err;
8832}
8833
8834static int fixup_call_args(struct bpf_verifier_env *env)
8835{
8836#ifndef CONFIG_BPF_JIT_ALWAYS_ON
8837	struct bpf_prog *prog = env->prog;
8838	struct bpf_insn *insn = prog->insnsi;
8839	int i, depth;
8840#endif
8841	int err = 0;
8842
8843	if (env->prog->jit_requested &&
8844	    !bpf_prog_is_dev_bound(env->prog->aux)) {
8845		err = jit_subprogs(env);
8846		if (err == 0)
8847			return 0;
8848		if (err == -EFAULT)
8849			return err;
8850	}
8851#ifndef CONFIG_BPF_JIT_ALWAYS_ON
8852	for (i = 0; i < prog->len; i++, insn++) {
8853		if (insn->code != (BPF_JMP | BPF_CALL) ||
8854		    insn->src_reg != BPF_PSEUDO_CALL)
8855			continue;
8856		depth = get_callee_stack_depth(env, insn, i);
8857		if (depth < 0)
8858			return depth;
8859		bpf_patch_call_args(insn, depth);
8860	}
8861	err = 0;
8862#endif
8863	return err;
8864}
8865
8866/* fixup insn->imm field of bpf_call instructions
8867 * and inline eligible helpers as explicit sequence of BPF instructions
8868 *
8869 * this function is called after eBPF program passed verification
8870 */
8871static int fixup_bpf_calls(struct bpf_verifier_env *env)
8872{
8873	struct bpf_prog *prog = env->prog;
8874	struct bpf_insn *insn = prog->insnsi;
8875	const struct bpf_func_proto *fn;
8876	const int insn_cnt = prog->len;
8877	const struct bpf_map_ops *ops;
8878	struct bpf_insn_aux_data *aux;
8879	struct bpf_insn insn_buf[16];
8880	struct bpf_prog *new_prog;
8881	struct bpf_map *map_ptr;
8882	int i, cnt, delta = 0;
8883
8884	for (i = 0; i < insn_cnt; i++, insn++) {
8885		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
8886		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
8887		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
8888		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
8889			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
8890			struct bpf_insn mask_and_div[] = {
8891				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
8892				/* Rx div 0 -> 0 */
8893				BPF_JMP_IMM(BPF_JNE, insn->src_reg, 0, 2),
8894				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
8895				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
8896				*insn,
8897			};
8898			struct bpf_insn mask_and_mod[] = {
8899				BPF_MOV32_REG(insn->src_reg, insn->src_reg),
8900				/* Rx mod 0 -> Rx */
8901				BPF_JMP_IMM(BPF_JEQ, insn->src_reg, 0, 1),
8902				*insn,
8903			};
8904			struct bpf_insn *patchlet;
8905
8906			if (insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
8907			    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
8908				patchlet = mask_and_div + (is64 ? 1 : 0);
8909				cnt = ARRAY_SIZE(mask_and_div) - (is64 ? 1 : 0);
8910			} else {
8911				patchlet = mask_and_mod + (is64 ? 1 : 0);
8912				cnt = ARRAY_SIZE(mask_and_mod) - (is64 ? 1 : 0);
8913			}
8914
8915			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
8916			if (!new_prog)
8917				return -ENOMEM;
8918
8919			delta    += cnt - 1;
8920			env->prog = prog = new_prog;
8921			insn      = new_prog->insnsi + i + delta;
8922			continue;
8923		}
8924
8925		if (BPF_CLASS(insn->code) == BPF_LD &&
8926		    (BPF_MODE(insn->code) == BPF_ABS ||
8927		     BPF_MODE(insn->code) == BPF_IND)) {
8928			cnt = env->ops->gen_ld_abs(insn, insn_buf);
8929			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
8930				verbose(env, "bpf verifier is misconfigured\n");
8931				return -EINVAL;
8932			}
8933
8934			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
8935			if (!new_prog)
8936				return -ENOMEM;
8937
8938			delta    += cnt - 1;
8939			env->prog = prog = new_prog;
8940			insn      = new_prog->insnsi + i + delta;
8941			continue;
8942		}
8943
8944		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
8945		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
8946			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
8947			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
8948			struct bpf_insn insn_buf[16];
8949			struct bpf_insn *patch = &insn_buf[0];
8950			bool issrc, isneg;
8951			u32 off_reg;
8952
8953			aux = &env->insn_aux_data[i + delta];
8954			if (!aux->alu_state ||
8955			    aux->alu_state == BPF_ALU_NON_POINTER)
8956				continue;
8957
8958			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
8959			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
8960				BPF_ALU_SANITIZE_SRC;
8961
8962			off_reg = issrc ? insn->src_reg : insn->dst_reg;
8963			if (isneg)
8964				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
8965			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
8966			*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
8967			*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
8968			*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
8969			*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
8970			if (issrc) {
8971				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
8972							 off_reg);
8973				insn->src_reg = BPF_REG_AX;
8974			} else {
8975				*patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
8976							 BPF_REG_AX);
8977			}
8978			if (isneg)
8979				insn->code = insn->code == code_add ?
8980					     code_sub : code_add;
8981			*patch++ = *insn;
8982			if (issrc && isneg)
8983				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
8984			cnt = patch - insn_buf;
8985
8986			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
8987			if (!new_prog)
8988				return -ENOMEM;
8989
8990			delta    += cnt - 1;
8991			env->prog = prog = new_prog;
8992			insn      = new_prog->insnsi + i + delta;
8993			continue;
8994		}
8995
8996		if (insn->code != (BPF_JMP | BPF_CALL))
8997			continue;
8998		if (insn->src_reg == BPF_PSEUDO_CALL)
8999			continue;
9000
9001		if (insn->imm == BPF_FUNC_get_route_realm)
9002			prog->dst_needed = 1;
9003		if (insn->imm == BPF_FUNC_get_prandom_u32)
9004			bpf_user_rnd_init_once();
9005		if (insn->imm == BPF_FUNC_override_return)
9006			prog->kprobe_override = 1;
9007		if (insn->imm == BPF_FUNC_tail_call) {
9008			/* If we tail call into other programs, we
9009			 * cannot make any assumptions since they can
9010			 * be replaced dynamically during runtime in
9011			 * the program array.
9012			 */
9013			prog->cb_access = 1;
9014			env->prog->aux->stack_depth = MAX_BPF_STACK;
9015			env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
9016
9017			/* mark bpf_tail_call as different opcode to avoid
9018			 * conditional branch in the interpeter for every normal
9019			 * call and to prevent accidental JITing by JIT compiler
9020			 * that doesn't support bpf_tail_call yet
9021			 */
9022			insn->imm = 0;
9023			insn->code = BPF_JMP | BPF_TAIL_CALL;
9024
9025			aux = &env->insn_aux_data[i + delta];
9026			if (!bpf_map_ptr_unpriv(aux))
9027				continue;
9028
9029			/* instead of changing every JIT dealing with tail_call
9030			 * emit two extra insns:
9031			 * if (index >= max_entries) goto out;
9032			 * index &= array->index_mask;
9033			 * to avoid out-of-bounds cpu speculation
9034			 */
9035			if (bpf_map_ptr_poisoned(aux)) {
9036				verbose(env, "tail_call abusing map_ptr\n");
9037				return -EINVAL;
9038			}
9039
9040			map_ptr = BPF_MAP_PTR(aux->map_state);
9041			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
9042						  map_ptr->max_entries, 2);
9043			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
9044						    container_of(map_ptr,
9045								 struct bpf_array,
9046								 map)->index_mask);
9047			insn_buf[2] = *insn;
9048			cnt = 3;
9049			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
9050			if (!new_prog)
9051				return -ENOMEM;
9052
9053			delta    += cnt - 1;
9054			env->prog = prog = new_prog;
9055			insn      = new_prog->insnsi + i + delta;
9056			continue;
9057		}
9058
9059		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
9060		 * and other inlining handlers are currently limited to 64 bit
9061		 * only.
9062		 */
9063		if (prog->jit_requested && BITS_PER_LONG == 64 &&
9064		    (insn->imm == BPF_FUNC_map_lookup_elem ||
9065		     insn->imm == BPF_FUNC_map_update_elem ||
9066		     insn->imm == BPF_FUNC_map_delete_elem ||
9067		     insn->imm == BPF_FUNC_map_push_elem   ||
9068		     insn->imm == BPF_FUNC_map_pop_elem    ||
9069		     insn->imm == BPF_FUNC_map_peek_elem)) {
9070			aux = &env->insn_aux_data[i + delta];
9071			if (bpf_map_ptr_poisoned(aux))
9072				goto patch_call_imm;
9073
9074			map_ptr = BPF_MAP_PTR(aux->map_state);
9075			ops = map_ptr->ops;
9076			if (insn->imm == BPF_FUNC_map_lookup_elem &&
9077			    ops->map_gen_lookup) {
9078				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
9079				if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
9080					verbose(env, "bpf verifier is misconfigured\n");
9081					return -EINVAL;
9082				}
9083
9084				new_prog = bpf_patch_insn_data(env, i + delta,
9085							       insn_buf, cnt);
9086				if (!new_prog)
9087					return -ENOMEM;
9088
9089				delta    += cnt - 1;
9090				env->prog = prog = new_prog;
9091				insn      = new_prog->insnsi + i + delta;
9092				continue;
9093			}
9094
9095			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
9096				     (void *(*)(struct bpf_map *map, void *key))NULL));
9097			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
9098				     (int (*)(struct bpf_map *map, void *key))NULL));
9099			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
9100				     (int (*)(struct bpf_map *map, void *key, void *value,
9101					      u64 flags))NULL));
9102			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
9103				     (int (*)(struct bpf_map *map, void *value,
9104					      u64 flags))NULL));
9105			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
9106				     (int (*)(struct bpf_map *map, void *value))NULL));
9107			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
9108				     (int (*)(struct bpf_map *map, void *value))NULL));
9109
9110			switch (insn->imm) {
9111			case BPF_FUNC_map_lookup_elem:
9112				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
9113					    __bpf_call_base;
9114				continue;
9115			case BPF_FUNC_map_update_elem:
9116				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
9117					    __bpf_call_base;
9118				continue;
9119			case BPF_FUNC_map_delete_elem:
9120				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
9121					    __bpf_call_base;
9122				continue;
9123			case BPF_FUNC_map_push_elem:
9124				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
9125					    __bpf_call_base;
9126				continue;
9127			case BPF_FUNC_map_pop_elem:
9128				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
9129					    __bpf_call_base;
9130				continue;
9131			case BPF_FUNC_map_peek_elem:
9132				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
9133					    __bpf_call_base;
9134				continue;
9135			}
9136
9137			goto patch_call_imm;
9138		}
9139
9140patch_call_imm:
9141		fn = env->ops->get_func_proto(insn->imm, env->prog);
9142		/* all functions that have prototype and verifier allowed
9143		 * programs to call them, must be real in-kernel functions
9144		 */
9145		if (!fn->func) {
9146			verbose(env,
9147				"kernel subsystem misconfigured func %s#%d\n",
9148				func_id_name(insn->imm), insn->imm);
9149			return -EFAULT;
9150		}
9151		insn->imm = fn->func - __bpf_call_base;
9152	}
9153
9154	return 0;
9155}
9156
9157static void free_states(struct bpf_verifier_env *env)
9158{
9159	struct bpf_verifier_state_list *sl, *sln;
9160	int i;
9161
9162	sl = env->free_list;
9163	while (sl) {
9164		sln = sl->next;
9165		free_verifier_state(&sl->state, false);
9166		kfree(sl);
9167		sl = sln;
9168	}
9169
9170	if (!env->explored_states)
9171		return;
9172
9173	for (i = 0; i < state_htab_size(env); i++) {
9174		sl = env->explored_states[i];
9175
9176		while (sl) {
9177			sln = sl->next;
9178			free_verifier_state(&sl->state, false);
9179			kfree(sl);
9180			sl = sln;
9181		}
9182	}
9183
9184	kvfree(env->explored_states);
9185}
9186
9187static void print_verification_stats(struct bpf_verifier_env *env)
9188{
9189	int i;
9190
9191	if (env->log.level & BPF_LOG_STATS) {
9192		verbose(env, "verification time %lld usec\n",
9193			div_u64(env->verification_time, 1000));
9194		verbose(env, "stack depth ");
9195		for (i = 0; i < env->subprog_cnt; i++) {
9196			u32 depth = env->subprog_info[i].stack_depth;
9197
9198			verbose(env, "%d", depth);
9199			if (i + 1 < env->subprog_cnt)
9200				verbose(env, "+");
9201		}
9202		verbose(env, "\n");
9203	}
9204	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
9205		"total_states %d peak_states %d mark_read %d\n",
9206		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
9207		env->max_states_per_insn, env->total_states,
9208		env->peak_states, env->longest_mark_read_walk);
9209}
9210
9211int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
9212	      union bpf_attr __user *uattr)
9213{
9214	u64 start_time = ktime_get_ns();
9215	struct bpf_verifier_env *env;
9216	struct bpf_verifier_log *log;
9217	int i, len, ret = -EINVAL;
9218	bool is_priv;
9219
9220	/* no program is valid */
9221	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
9222		return -EINVAL;
9223
9224	/* 'struct bpf_verifier_env' can be global, but since it's not small,
9225	 * allocate/free it every time bpf_check() is called
9226	 */
9227	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
9228	if (!env)
9229		return -ENOMEM;
9230	log = &env->log;
9231
9232	len = (*prog)->len;
9233	env->insn_aux_data =
9234		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
9235	ret = -ENOMEM;
9236	if (!env->insn_aux_data)
9237		goto err_free_env;
9238	for (i = 0; i < len; i++)
9239		env->insn_aux_data[i].orig_idx = i;
9240	env->prog = *prog;
9241	env->ops = bpf_verifier_ops[env->prog->type];
9242	is_priv = capable(CAP_SYS_ADMIN);
9243
9244	/* grab the mutex to protect few globals used by verifier */
9245	if (!is_priv)
9246		mutex_lock(&bpf_verifier_lock);
9247
9248	if (attr->log_level || attr->log_buf || attr->log_size) {
9249		/* user requested verbose verifier output
9250		 * and supplied buffer to store the verification trace
9251		 */
9252		log->level = attr->log_level;
9253		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
9254		log->len_total = attr->log_size;
9255
9256		ret = -EINVAL;
9257		/* log attributes have to be sane */
9258		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
9259		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
9260			goto err_unlock;
9261	}
9262
9263	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
9264	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
9265		env->strict_alignment = true;
9266	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
9267		env->strict_alignment = false;
9268
9269	env->allow_ptr_leaks = is_priv;
9270
9271	if (is_priv)
9272		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
9273
9274	ret = replace_map_fd_with_map_ptr(env);
9275	if (ret < 0)
9276		goto skip_full_check;
9277
9278	if (bpf_prog_is_dev_bound(env->prog->aux)) {
9279		ret = bpf_prog_offload_verifier_prep(env->prog);
9280		if (ret)
9281			goto skip_full_check;
9282	}
9283
9284	env->explored_states = kvcalloc(state_htab_size(env),
9285				       sizeof(struct bpf_verifier_state_list *),
9286				       GFP_USER);
9287	ret = -ENOMEM;
9288	if (!env->explored_states)
9289		goto skip_full_check;
9290
9291	ret = check_subprogs(env);
9292	if (ret < 0)
9293		goto skip_full_check;
9294
9295	ret = check_btf_info(env, attr, uattr);
9296	if (ret < 0)
9297		goto skip_full_check;
9298
9299	ret = check_cfg(env);
9300	if (ret < 0)
9301		goto skip_full_check;
9302
9303	ret = do_check(env);
9304	if (env->cur_state) {
9305		free_verifier_state(env->cur_state, true);
9306		env->cur_state = NULL;
9307	}
9308
9309	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
9310		ret = bpf_prog_offload_finalize(env);
9311
9312skip_full_check:
9313	while (!pop_stack(env, NULL, NULL));
9314	free_states(env);
9315
9316	if (ret == 0)
9317		ret = check_max_stack_depth(env);
9318
9319	/* instruction rewrites happen after this point */
9320	if (is_priv) {
9321		if (ret == 0)
9322			opt_hard_wire_dead_code_branches(env);
9323		if (ret == 0)
9324			ret = opt_remove_dead_code(env);
9325		if (ret == 0)
9326			ret = opt_remove_nops(env);
9327	} else {
9328		if (ret == 0)
9329			sanitize_dead_code(env);
9330	}
9331
9332	if (ret == 0)
9333		/* program is valid, convert *(u32*)(ctx + off) accesses */
9334		ret = convert_ctx_accesses(env);
9335
9336	if (ret == 0)
9337		ret = fixup_bpf_calls(env);
9338
9339	/* do 32-bit optimization after insn patching has done so those patched
9340	 * insns could be handled correctly.
9341	 */
9342	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
9343		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
9344		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
9345								     : false;
9346	}
9347
9348	if (ret == 0)
9349		ret = fixup_call_args(env);
9350
9351	env->verification_time = ktime_get_ns() - start_time;
9352	print_verification_stats(env);
9353
9354	if (log->level && bpf_verifier_log_full(log))
9355		ret = -ENOSPC;
9356	if (log->level && !log->ubuf) {
9357		ret = -EFAULT;
9358		goto err_release_maps;
9359	}
9360
9361	if (ret == 0 && env->used_map_cnt) {
9362		/* if program passed verifier, update used_maps in bpf_prog_info */
9363		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
9364							  sizeof(env->used_maps[0]),
9365							  GFP_KERNEL);
9366
9367		if (!env->prog->aux->used_maps) {
9368			ret = -ENOMEM;
9369			goto err_release_maps;
9370		}
9371
9372		memcpy(env->prog->aux->used_maps, env->used_maps,
9373		       sizeof(env->used_maps[0]) * env->used_map_cnt);
9374		env->prog->aux->used_map_cnt = env->used_map_cnt;
9375
9376		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
9377		 * bpf_ld_imm64 instructions
9378		 */
9379		convert_pseudo_ld_imm64(env);
9380	}
9381
9382	if (ret == 0)
9383		adjust_btf_func(env);
9384
9385err_release_maps:
9386	if (!env->prog->aux->used_maps)
9387		/* if we didn't copy map pointers into bpf_prog_info, release
9388		 * them now. Otherwise free_used_maps() will release them.
9389		 */
9390		release_maps(env);
9391	*prog = env->prog;
9392err_unlock:
9393	if (!is_priv)
9394		mutex_unlock(&bpf_verifier_lock);
9395	vfree(env->insn_aux_data);
9396err_free_env:
9397	kfree(env);
9398	return ret;
9399}