Linux Audio

Check our new training course

Loading...
Note: File does not exist in v4.6.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * The back-end-agnostic part of Just-In-Time compiler for eBPF bytecode.
   4 *
   5 * Copyright (c) 2024 Synopsys Inc.
   6 * Author: Shahab Vahedi <shahab@synopsys.com>
   7 */
   8#include <linux/bug.h>
   9#include "bpf_jit.h"
  10
  11/*
  12 * Check for the return value. A pattern used often in this file.
  13 * There must be a "ret" variable of type "int" in the scope.
  14 */
  15#define CHECK_RET(cmd)			\
  16	do {				\
  17		ret = (cmd);		\
  18		if (ret < 0)		\
  19			return ret;	\
  20	} while (0)
  21
  22#ifdef ARC_BPF_JIT_DEBUG
  23/* Dumps bytes in /var/log/messages at KERN_INFO level (4). */
  24static void dump_bytes(const u8 *buf, u32 len, const char *header)
  25{
  26	u8 line[64];
  27	size_t i, j;
  28
  29	pr_info("-----------------[ %s ]-----------------\n", header);
  30
  31	for (i = 0, j = 0; i < len; i++) {
  32		/* Last input byte? */
  33		if (i == len - 1) {
  34			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
  35			pr_info("%s\n", line);
  36			break;
  37		}
  38		/* End of line? */
  39		else if (i % 8 == 7) {
  40			j += scnprintf(line + j, 64 - j, "0x%02x", buf[i]);
  41			pr_info("%s\n", line);
  42			j = 0;
  43		} else {
  44			j += scnprintf(line + j, 64 - j, "0x%02x, ", buf[i]);
  45		}
  46	}
  47}
  48#endif /* ARC_BPF_JIT_DEBUG */
  49
  50/********************* JIT context ***********************/
  51
  52/*
  53 * buf:		Translated instructions end up here.
  54 * len:		The length of whole block in bytes.
  55 * index:	The offset at which the _next_ instruction may be put.
  56 */
  57struct jit_buffer {
  58	u8	*buf;
  59	u32	len;
  60	u32	index;
  61};
  62
  63/*
  64 * This is a subset of "struct jit_context" that its information is deemed
  65 * necessary for the next extra pass to come.
  66 *
  67 * bpf_header:	Needed to finally lock the region.
  68 * bpf2insn:	Used to find the translation for instructions of interest.
  69 *
  70 * Things like "jit.buf" and "jit.len" can be retrieved respectively from
  71 * "prog->bpf_func" and "prog->jited_len".
  72 */
  73struct arc_jit_data {
  74	struct bpf_binary_header *bpf_header;
  75	u32                      *bpf2insn;
  76};
  77
  78/*
  79 * The JIT pertinent context that is used by different functions.
  80 *
  81 * prog:		The current eBPF program being handled.
  82 * orig_prog:		The original eBPF program before any possible change.
  83 * jit:			The JIT buffer and its length.
  84 * bpf_header:		The JITed program header. "jit.buf" points inside it.
  85 * emit:		If set, opcodes are written to memory; else, a dry-run.
  86 * do_zext:		If true, 32-bit sub-regs must be zero extended.
  87 * bpf2insn:		Maps BPF insn indices to their counterparts in jit.buf.
  88 * bpf2insn_valid:	Indicates if "bpf2ins" is populated with the mappings.
  89 * jit_data:		A piece of memory to transfer data to the next pass.
  90 * arc_regs_clobbered:	Each bit status determines if that arc reg is clobbered.
  91 * save_blink:		Whether ARC's "blink" register needs to be saved.
  92 * frame_size:		Derived from "prog->aux->stack_depth".
  93 * epilogue_offset:	Used by early "return"s in the code to jump here.
  94 * need_extra_pass:	A forecast if an "extra_pass" will occur.
  95 * is_extra_pass:	Indicates if the current pass is an extra pass.
  96 * user_bpf_prog:	True, if VM opcodes come from a real program.
  97 * blinded:		True if "constant blinding" step returned a new "prog".
  98 * success:		Indicates if the whole JIT went OK.
  99 */
 100struct jit_context {
 101	struct bpf_prog			*prog;
 102	struct bpf_prog			*orig_prog;
 103	struct jit_buffer		jit;
 104	struct bpf_binary_header	*bpf_header;
 105	bool				emit;
 106	bool				do_zext;
 107	u32				*bpf2insn;
 108	bool				bpf2insn_valid;
 109	struct arc_jit_data		*jit_data;
 110	u32				arc_regs_clobbered;
 111	bool				save_blink;
 112	u16				frame_size;
 113	u32				epilogue_offset;
 114	bool				need_extra_pass;
 115	bool				is_extra_pass;
 116	bool				user_bpf_prog;
 117	bool				blinded;
 118	bool				success;
 119};
 120
 121/*
 122 * If we're in ARC_BPF_JIT_DEBUG mode and the debug level is right, dump the
 123 * input BPF stream. "bpf_jit_dump()" is not fully suited for this purpose.
 124 */
 125static void vm_dump(const struct bpf_prog *prog)
 126{
 127#ifdef ARC_BPF_JIT_DEBUG
 128	if (bpf_jit_enable > 1)
 129		dump_bytes((u8 *)prog->insns, 8 * prog->len, " VM  ");
 130#endif
 131}
 132
 133/*
 134 * If the right level of debug is set, dump the bytes. There are 2 variants
 135 * of this function:
 136 *
 137 * 1. Use the standard bpf_jit_dump() which is meant only for JITed code.
 138 * 2. Use the dump_bytes() to match its "vm_dump()" instance.
 139 */
 140static void jit_dump(const struct jit_context *ctx)
 141{
 142#ifdef ARC_BPF_JIT_DEBUG
 143	u8 header[8];
 144#endif
 145	const int pass = ctx->is_extra_pass ? 2 : 1;
 146
 147	if (bpf_jit_enable <= 1 || !ctx->prog->jited)
 148		return;
 149
 150#ifdef ARC_BPF_JIT_DEBUG
 151	scnprintf(header, sizeof(header), "JIT:%d", pass);
 152	dump_bytes(ctx->jit.buf, ctx->jit.len, header);
 153	pr_info("\n");
 154#else
 155	bpf_jit_dump(ctx->prog->len, ctx->jit.len, pass, ctx->jit.buf);
 156#endif
 157}
 158
 159/* Initialise the context so there's no garbage. */
 160static int jit_ctx_init(struct jit_context *ctx, struct bpf_prog *prog)
 161{
 162	memset(ctx, 0, sizeof(*ctx));
 163
 164	ctx->orig_prog = prog;
 165
 166	/* If constant blinding was requested but failed, scram. */
 167	ctx->prog = bpf_jit_blind_constants(prog);
 168	if (IS_ERR(ctx->prog))
 169		return PTR_ERR(ctx->prog);
 170	ctx->blinded = (ctx->prog != ctx->orig_prog);
 171
 172	/* If the verifier doesn't zero-extend, then we have to do it. */
 173	ctx->do_zext = !ctx->prog->aux->verifier_zext;
 174
 175	ctx->is_extra_pass = ctx->prog->jited;
 176	ctx->user_bpf_prog = ctx->prog->is_func;
 177
 178	return 0;
 179}
 180
 181/*
 182 * Only after the first iteration of normal pass (the dry-run),
 183 * there are valid offsets in ctx->bpf2insn array.
 184 */
 185static inline bool offsets_available(const struct jit_context *ctx)
 186{
 187	return ctx->bpf2insn_valid;
 188}
 189
 190/*
 191 * "*mem" should be freed when there is no "extra pass" to come,
 192 * or the compilation terminated abruptly. A few of such memory
 193 * allocations are: ctx->jit_data and ctx->bpf2insn.
 194 */
 195static inline void maybe_free(struct jit_context *ctx, void **mem)
 196{
 197	if (*mem) {
 198		if (!ctx->success || !ctx->need_extra_pass) {
 199			kfree(*mem);
 200			*mem = NULL;
 201		}
 202	}
 203}
 204
 205/*
 206 * Free memories based on the status of the context.
 207 *
 208 * A note about "bpf_header": On successful runs, "bpf_header" is
 209 * not freed, because "jit.buf", a sub-array of it, is returned as
 210 * the "bpf_func". However, "bpf_header" is lost and nothing points
 211 * to it. This should not cause a leakage, because apparently
 212 * "bpf_header" can be revived by "bpf_jit_binary_hdr()". This is
 213 * how "bpf_jit_free()" in "kernel/bpf/core.c" releases the memory.
 214 */
 215static void jit_ctx_cleanup(struct jit_context *ctx)
 216{
 217	if (ctx->blinded) {
 218		/* if all went well, release the orig_prog. */
 219		if (ctx->success)
 220			bpf_jit_prog_release_other(ctx->prog, ctx->orig_prog);
 221		else
 222			bpf_jit_prog_release_other(ctx->orig_prog, ctx->prog);
 223	}
 224
 225	maybe_free(ctx, (void **)&ctx->bpf2insn);
 226	maybe_free(ctx, (void **)&ctx->jit_data);
 227
 228	if (!ctx->bpf2insn)
 229		ctx->bpf2insn_valid = false;
 230
 231	/* Freeing "bpf_header" is enough. "jit.buf" is a sub-array of it. */
 232	if (!ctx->success && ctx->bpf_header) {
 233		bpf_jit_binary_free(ctx->bpf_header);
 234		ctx->bpf_header = NULL;
 235		ctx->jit.buf    = NULL;
 236		ctx->jit.index  = 0;
 237		ctx->jit.len    = 0;
 238	}
 239
 240	ctx->emit = false;
 241	ctx->do_zext = false;
 242}
 243
 244/*
 245 * Analyse the register usage and record the frame size.
 246 * The register usage is determined by consulting the back-end.
 247 */
 248static void analyze_reg_usage(struct jit_context *ctx)
 249{
 250	size_t i;
 251	u32 usage = 0;
 252	const struct bpf_insn *insn = ctx->prog->insnsi;
 253
 254	for (i = 0; i < ctx->prog->len; i++) {
 255		u8 bpf_reg;
 256		bool call;
 257
 258		bpf_reg = insn[i].dst_reg;
 259		call = (insn[i].code == (BPF_JMP | BPF_CALL)) ? true : false;
 260		usage |= mask_for_used_regs(bpf_reg, call);
 261	}
 262
 263	ctx->arc_regs_clobbered = usage;
 264	ctx->frame_size = ctx->prog->aux->stack_depth;
 265}
 266
 267/* Verify that no instruction will be emitted when there is no buffer. */
 268static inline int jit_buffer_check(const struct jit_context *ctx)
 269{
 270	if (ctx->emit) {
 271		if (!ctx->jit.buf) {
 272			pr_err("bpf-jit: inconsistence state; no "
 273			       "buffer to emit instructions.\n");
 274			return -EINVAL;
 275		} else if (ctx->jit.index > ctx->jit.len) {
 276			pr_err("bpf-jit: estimated JIT length is less "
 277			       "than the emitted instructions.\n");
 278			return -EFAULT;
 279		}
 280	}
 281	return 0;
 282}
 283
 284/* On a dry-run (emit=false), "jit.len" is growing gradually. */
 285static inline void jit_buffer_update(struct jit_context *ctx, u32 n)
 286{
 287	if (!ctx->emit)
 288		ctx->jit.len += n;
 289	else
 290		ctx->jit.index += n;
 291}
 292
 293/* Based on "emit", determine the address where instructions are emitted. */
 294static inline u8 *effective_jit_buf(const struct jit_context *ctx)
 295{
 296	return ctx->emit ? (ctx->jit.buf + ctx->jit.index) : NULL;
 297}
 298
 299/* Prologue based on context variables set by "analyze_reg_usage()". */
 300static int handle_prologue(struct jit_context *ctx)
 301{
 302	int ret;
 303	u8 *buf = effective_jit_buf(ctx);
 304	u32 len = 0;
 305
 306	CHECK_RET(jit_buffer_check(ctx));
 307
 308	len = arc_prologue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
 309	jit_buffer_update(ctx, len);
 310
 311	return 0;
 312}
 313
 314/* The counter part for "handle_prologue()". */
 315static int handle_epilogue(struct jit_context *ctx)
 316{
 317	int ret;
 318	u8 *buf = effective_jit_buf(ctx);
 319	u32 len = 0;
 320
 321	CHECK_RET(jit_buffer_check(ctx));
 322
 323	len = arc_epilogue(buf, ctx->arc_regs_clobbered, ctx->frame_size);
 324	jit_buffer_update(ctx, len);
 325
 326	return 0;
 327}
 328
 329/* Tell which number of the BPF instruction we are dealing with. */
 330static inline s32 get_index_for_insn(const struct jit_context *ctx,
 331				     const struct bpf_insn *insn)
 332{
 333	return (insn - ctx->prog->insnsi);
 334}
 335
 336/*
 337 * In most of the cases, the "offset" is read from "insn->off". However,
 338 * if it is an unconditional BPF_JMP32, then it comes from "insn->imm".
 339 *
 340 * (Courtesy of "cpu=v4" support)
 341 */
 342static inline s32 get_offset(const struct bpf_insn *insn)
 343{
 344	if ((BPF_CLASS(insn->code) == BPF_JMP32) &&
 345	    (BPF_OP(insn->code) == BPF_JA))
 346		return insn->imm;
 347	else
 348		return insn->off;
 349}
 350
 351/*
 352 * Determine to which number of the BPF instruction we're jumping to.
 353 *
 354 * The "offset" is interpreted as the "number" of BPF instructions
 355 * from the _next_ BPF instruction. e.g.:
 356 *
 357 *  4 means 4 instructions after  the next insn
 358 *  0 means 0 instructions after  the next insn -> fallthrough.
 359 * -1 means 1 instruction  before the next insn -> jmp to current insn.
 360 *
 361 *  Another way to look at this, "offset" is the number of instructions
 362 *  that exist between the current instruction and the target instruction.
 363 *
 364 *  It is worth noting that a "mov r,i64", which is 16-byte long, is
 365 *  treated as two instructions long, therefore "offset" needn't be
 366 *  treated specially for those. Everything is uniform.
 367 */
 368static inline s32 get_target_index_for_insn(const struct jit_context *ctx,
 369					    const struct bpf_insn *insn)
 370{
 371	return (get_index_for_insn(ctx, insn) + 1) + get_offset(insn);
 372}
 373
 374/* Is there an immediate operand encoded in the "insn"? */
 375static inline bool has_imm(const struct bpf_insn *insn)
 376{
 377	return BPF_SRC(insn->code) == BPF_K;
 378}
 379
 380/* Is the last BPF instruction? */
 381static inline bool is_last_insn(const struct bpf_prog *prog, u32 idx)
 382{
 383	return idx == (prog->len - 1);
 384}
 385
 386/*
 387 * Invocation of this function, conditionally signals the need for
 388 * an extra pass. The conditions that must be met are:
 389 *
 390 * 1. The current pass itself shouldn't be an extra pass.
 391 * 2. The stream of bytes being JITed must come from a user program.
 392 */
 393static inline void set_need_for_extra_pass(struct jit_context *ctx)
 394{
 395	if (!ctx->is_extra_pass)
 396		ctx->need_extra_pass = ctx->user_bpf_prog;
 397}
 398
 399/*
 400 * Check if the "size" is valid and then transfer the control to
 401 * the back-end for the swap.
 402 */
 403static int handle_swap(u8 *buf, u8 rd, u8 size, u8 endian,
 404		       bool force, bool do_zext, u8 *len)
 405{
 406	/* Sanity check on the size. */
 407	switch (size) {
 408	case 16:
 409	case 32:
 410	case 64:
 411		break;
 412	default:
 413		pr_err("bpf-jit: invalid size for swap.\n");
 414		return -EINVAL;
 415	}
 416
 417	*len = gen_swap(buf, rd, size, endian, force, do_zext);
 418
 419	return 0;
 420}
 421
 422/* Checks if the (instruction) index is in valid range. */
 423static inline bool check_insn_idx_valid(const struct jit_context *ctx,
 424					const s32 idx)
 425{
 426	return (idx >= 0 && idx < ctx->prog->len);
 427}
 428
 429/*
 430 * Decouple the back-end from BPF by converting BPF conditions
 431 * to internal enum. ARC_CC_* start from 0 and are used as index
 432 * to an array. BPF_J* usage must end after this conversion.
 433 */
 434static int bpf_cond_to_arc(const u8 op, u8 *arc_cc)
 435{
 436	switch (op) {
 437	case BPF_JA:
 438		*arc_cc = ARC_CC_AL;
 439		break;
 440	case BPF_JEQ:
 441		*arc_cc = ARC_CC_EQ;
 442		break;
 443	case BPF_JGT:
 444		*arc_cc = ARC_CC_UGT;
 445		break;
 446	case BPF_JGE:
 447		*arc_cc = ARC_CC_UGE;
 448		break;
 449	case BPF_JSET:
 450		*arc_cc = ARC_CC_SET;
 451		break;
 452	case BPF_JNE:
 453		*arc_cc = ARC_CC_NE;
 454		break;
 455	case BPF_JSGT:
 456		*arc_cc = ARC_CC_SGT;
 457		break;
 458	case BPF_JSGE:
 459		*arc_cc = ARC_CC_SGE;
 460		break;
 461	case BPF_JLT:
 462		*arc_cc = ARC_CC_ULT;
 463		break;
 464	case BPF_JLE:
 465		*arc_cc = ARC_CC_ULE;
 466		break;
 467	case BPF_JSLT:
 468		*arc_cc = ARC_CC_SLT;
 469		break;
 470	case BPF_JSLE:
 471		*arc_cc = ARC_CC_SLE;
 472		break;
 473	default:
 474		pr_err("bpf-jit: can't handle condition 0x%02X\n", op);
 475		return -EINVAL;
 476	}
 477	return 0;
 478}
 479
 480/*
 481 * Check a few things for a supposedly "jump" instruction:
 482 *
 483 * 0. "insn" is a "jump" instruction, but not the "call/exit" variant.
 484 * 1. The current "insn" index is in valid range.
 485 * 2. The index of target instruction is in valid range.
 486 */
 487static int check_bpf_jump(const struct jit_context *ctx,
 488			  const struct bpf_insn *insn)
 489{
 490	const u8 class = BPF_CLASS(insn->code);
 491	const u8 op = BPF_OP(insn->code);
 492
 493	/* Must be a jmp(32) instruction that is not a "call/exit". */
 494	if ((class != BPF_JMP && class != BPF_JMP32) ||
 495	    (op == BPF_CALL || op == BPF_EXIT)) {
 496		pr_err("bpf-jit: not a jump instruction.\n");
 497		return -EINVAL;
 498	}
 499
 500	if (!check_insn_idx_valid(ctx, get_index_for_insn(ctx, insn))) {
 501		pr_err("bpf-jit: the bpf jump insn is not in prog.\n");
 502		return -EINVAL;
 503	}
 504
 505	if (!check_insn_idx_valid(ctx, get_target_index_for_insn(ctx, insn))) {
 506		pr_err("bpf-jit: bpf jump label is out of range.\n");
 507		return -EINVAL;
 508	}
 509
 510	return 0;
 511}
 512
 513/*
 514 * Based on input "insn", consult "ctx->bpf2insn" to get the
 515 * related index (offset) of the translation in JIT stream.
 516 */
 517static u32 get_curr_jit_off(const struct jit_context *ctx,
 518			    const struct bpf_insn *insn)
 519{
 520	const s32 idx = get_index_for_insn(ctx, insn);
 521#ifdef ARC_BPF_JIT_DEBUG
 522	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, idx));
 523#endif
 524	return ctx->bpf2insn[idx];
 525}
 526
 527/*
 528 * The input "insn" must be a jump instruction.
 529 *
 530 * Based on input "insn", consult "ctx->bpf2insn" to get the
 531 * related JIT index (offset) of "target instruction" that
 532 * "insn" would jump to.
 533 */
 534static u32 get_targ_jit_off(const struct jit_context *ctx,
 535			    const struct bpf_insn *insn)
 536{
 537	const s32 tidx = get_target_index_for_insn(ctx, insn);
 538#ifdef ARC_BPF_JIT_DEBUG
 539	BUG_ON(!offsets_available(ctx) || !check_insn_idx_valid(ctx, tidx));
 540#endif
 541	return ctx->bpf2insn[tidx];
 542}
 543
 544/*
 545 * This function will return 0 for a feasible jump.
 546 *
 547 * Consult the back-end to check if it finds it feasible to emit
 548 * the necessary instructions based on "cond" and the displacement
 549 * between the "from_off" and the "to_off".
 550 */
 551static int feasible_jit_jump(u32 from_off, u32 to_off, u8 cond, bool j32)
 552{
 553	int ret = 0;
 554
 555	if (j32) {
 556		if (!check_jmp_32(from_off, to_off, cond))
 557			ret = -EFAULT;
 558	} else {
 559		if (!check_jmp_64(from_off, to_off, cond))
 560			ret = -EFAULT;
 561	}
 562
 563	if (ret != 0)
 564		pr_err("bpf-jit: the JIT displacement is not OK.\n");
 565
 566	return ret;
 567}
 568
 569/*
 570 * This jump handler performs the following steps:
 571 *
 572 * 1. Compute ARC's internal condition code from BPF's
 573 * 2. Determine the bitness of the operation (32 vs. 64)
 574 * 3. Sanity check on BPF stream
 575 * 4. Sanity check on what is supposed to be JIT's displacement
 576 * 5. And finally, emit the necessary instructions
 577 *
 578 * The last two steps are performed through the back-end.
 579 * The value of steps 1 and 2 are necessary inputs for the back-end.
 580 */
 581static int handle_jumps(const struct jit_context *ctx,
 582			const struct bpf_insn *insn,
 583			u8 *len)
 584{
 585	u8 cond;
 586	int ret = 0;
 587	u8 *buf = effective_jit_buf(ctx);
 588	const bool j32 = (BPF_CLASS(insn->code) == BPF_JMP32) ? true : false;
 589	const u8 rd = insn->dst_reg;
 590	u8 rs = insn->src_reg;
 591	u32 curr_off = 0, targ_off = 0;
 592
 593	*len = 0;
 594
 595	/* Map the BPF condition to internal enum. */
 596	CHECK_RET(bpf_cond_to_arc(BPF_OP(insn->code), &cond));
 597
 598	/* Sanity check on the BPF byte stream. */
 599	CHECK_RET(check_bpf_jump(ctx, insn));
 600
 601	/*
 602	 * Move the immediate into a temporary register _now_ for 2 reasons:
 603	 *
 604	 * 1. "gen_jmp_{32,64}()" deal with operands in registers.
 605	 *
 606	 * 2. The "len" parameter will grow so that the current jit offset
 607	 *    (curr_off) will have increased to a point where the necessary
 608	 *    instructions can be inserted by "gen_jmp_{32,64}()".
 609	 */
 610	if (has_imm(insn) && cond != ARC_CC_AL) {
 611		if (j32) {
 612			*len += mov_r32_i32(BUF(buf, *len), JIT_REG_TMP,
 613					    insn->imm);
 614		} else {
 615			*len += mov_r64_i32(BUF(buf, *len), JIT_REG_TMP,
 616					    insn->imm);
 617		}
 618		rs = JIT_REG_TMP;
 619	}
 620
 621	/* If the offsets are known, check if the branch can occur. */
 622	if (offsets_available(ctx)) {
 623		curr_off = get_curr_jit_off(ctx, insn) + *len;
 624		targ_off = get_targ_jit_off(ctx, insn);
 625
 626		/* Sanity check on the back-end side. */
 627		CHECK_RET(feasible_jit_jump(curr_off, targ_off, cond, j32));
 628	}
 629
 630	if (j32) {
 631		*len += gen_jmp_32(BUF(buf, *len), rd, rs, cond,
 632				   curr_off, targ_off);
 633	} else {
 634		*len += gen_jmp_64(BUF(buf, *len), rd, rs, cond,
 635				   curr_off, targ_off);
 636	}
 637
 638	return ret;
 639}
 640
 641/* Jump to translated epilogue address. */
 642static int handle_jmp_epilogue(struct jit_context *ctx,
 643			       const struct bpf_insn *insn, u8 *len)
 644{
 645	u8 *buf = effective_jit_buf(ctx);
 646	u32 curr_off = 0, epi_off = 0;
 647
 648	/* Check the offset only if the data is available. */
 649	if (offsets_available(ctx)) {
 650		curr_off = get_curr_jit_off(ctx, insn);
 651		epi_off = ctx->epilogue_offset;
 652
 653		if (!check_jmp_64(curr_off, epi_off, ARC_CC_AL)) {
 654			pr_err("bpf-jit: epilogue offset is not valid.\n");
 655			return -EINVAL;
 656		}
 657	}
 658
 659	/* Jump to "epilogue offset" (rd and rs don't matter). */
 660	*len = gen_jmp_64(buf, 0, 0, ARC_CC_AL, curr_off, epi_off);
 661
 662	return 0;
 663}
 664
 665/* Try to get the resolved address and generate the instructions. */
 666static int handle_call(struct jit_context *ctx,
 667		       const struct bpf_insn *insn,
 668		       u8 *len)
 669{
 670	int  ret;
 671	bool in_kernel_func, fixed = false;
 672	u64  addr = 0;
 673	u8  *buf = effective_jit_buf(ctx);
 674
 675	ret = bpf_jit_get_func_addr(ctx->prog, insn, ctx->is_extra_pass,
 676				    &addr, &fixed);
 677	if (ret < 0) {
 678		pr_err("bpf-jit: can't get the address for call.\n");
 679		return ret;
 680	}
 681	in_kernel_func = (fixed ? true : false);
 682
 683	/* No valuable address retrieved (yet). */
 684	if (!fixed && !addr)
 685		set_need_for_extra_pass(ctx);
 686
 687	*len = gen_func_call(buf, (ARC_ADDR)addr, in_kernel_func);
 688
 689	if (insn->src_reg != BPF_PSEUDO_CALL) {
 690		/* Assigning ABI's return reg to JIT's return reg. */
 691		*len += arc_to_bpf_return(BUF(buf, *len));
 692	}
 693
 694	return 0;
 695}
 696
 697/*
 698 * Try to generate instructions for loading a 64-bit immediate.
 699 * These sort of instructions are usually associated with the 64-bit
 700 * relocations: R_BPF_64_64. Therefore, signal the need for an extra
 701 * pass if the circumstances are right.
 702 */
 703static int handle_ld_imm64(struct jit_context *ctx,
 704			   const struct bpf_insn *insn,
 705			   u8 *len)
 706{
 707	const s32 idx = get_index_for_insn(ctx, insn);
 708	u8 *buf = effective_jit_buf(ctx);
 709
 710	/* We're about to consume 2 VM instructions. */
 711	if (is_last_insn(ctx->prog, idx)) {
 712		pr_err("bpf-jit: need more data for 64-bit immediate.\n");
 713		return -EINVAL;
 714	}
 715
 716	*len = mov_r64_i64(buf, insn->dst_reg, insn->imm, (insn + 1)->imm);
 717
 718	if (bpf_pseudo_func(insn))
 719		set_need_for_extra_pass(ctx);
 720
 721	return 0;
 722}
 723
 724/*
 725 * Handles one eBPF instruction at a time. To make this function faster,
 726 * it does not call "jit_buffer_check()". Else, it would call it for every
 727 * instruction. As a result, it should not be invoked directly. Only
 728 * "handle_body()", that has already executed the "check", may call this
 729 * function.
 730 *
 731 * If the "ret" value is negative, something has went wrong. Else,
 732 * it mostly holds the value 0 and rarely 1. Number 1 signals
 733 * the loop in "handle_body()" to skip the next instruction, because
 734 * it has been consumed as part of a 64-bit immediate value.
 735 */
 736static int handle_insn(struct jit_context *ctx, u32 idx)
 737{
 738	const struct bpf_insn *insn = &ctx->prog->insnsi[idx];
 739	const u8  code = insn->code;
 740	const u8  dst  = insn->dst_reg;
 741	const u8  src  = insn->src_reg;
 742	const s16 off  = insn->off;
 743	const s32 imm  = insn->imm;
 744	u8 *buf = effective_jit_buf(ctx);
 745	u8  len = 0;
 746	int ret = 0;
 747
 748	switch (code) {
 749	/* dst += src (32-bit) */
 750	case BPF_ALU | BPF_ADD | BPF_X:
 751		len = add_r32(buf, dst, src);
 752		break;
 753	/* dst += imm (32-bit) */
 754	case BPF_ALU | BPF_ADD | BPF_K:
 755		len = add_r32_i32(buf, dst, imm);
 756		break;
 757	/* dst -= src (32-bit) */
 758	case BPF_ALU | BPF_SUB | BPF_X:
 759		len = sub_r32(buf, dst, src);
 760		break;
 761	/* dst -= imm (32-bit) */
 762	case BPF_ALU | BPF_SUB | BPF_K:
 763		len = sub_r32_i32(buf, dst, imm);
 764		break;
 765	/* dst = -dst (32-bit) */
 766	case BPF_ALU | BPF_NEG:
 767		len = neg_r32(buf, dst);
 768		break;
 769	/* dst *= src (32-bit) */
 770	case BPF_ALU | BPF_MUL | BPF_X:
 771		len = mul_r32(buf, dst, src);
 772		break;
 773	/* dst *= imm (32-bit) */
 774	case BPF_ALU | BPF_MUL | BPF_K:
 775		len = mul_r32_i32(buf, dst, imm);
 776		break;
 777	/* dst /= src (32-bit) */
 778	case BPF_ALU | BPF_DIV | BPF_X:
 779		len = div_r32(buf, dst, src, off == 1);
 780		break;
 781	/* dst /= imm (32-bit) */
 782	case BPF_ALU | BPF_DIV | BPF_K:
 783		len = div_r32_i32(buf, dst, imm, off == 1);
 784		break;
 785	/* dst %= src (32-bit) */
 786	case BPF_ALU | BPF_MOD | BPF_X:
 787		len = mod_r32(buf, dst, src, off == 1);
 788		break;
 789	/* dst %= imm (32-bit) */
 790	case BPF_ALU | BPF_MOD | BPF_K:
 791		len = mod_r32_i32(buf, dst, imm, off == 1);
 792		break;
 793	/* dst &= src (32-bit) */
 794	case BPF_ALU | BPF_AND | BPF_X:
 795		len = and_r32(buf, dst, src);
 796		break;
 797	/* dst &= imm (32-bit) */
 798	case BPF_ALU | BPF_AND | BPF_K:
 799		len = and_r32_i32(buf, dst, imm);
 800		break;
 801	/* dst |= src (32-bit) */
 802	case BPF_ALU | BPF_OR | BPF_X:
 803		len = or_r32(buf, dst, src);
 804		break;
 805	/* dst |= imm (32-bit) */
 806	case BPF_ALU | BPF_OR | BPF_K:
 807		len = or_r32_i32(buf, dst, imm);
 808		break;
 809	/* dst ^= src (32-bit) */
 810	case BPF_ALU | BPF_XOR | BPF_X:
 811		len = xor_r32(buf, dst, src);
 812		break;
 813	/* dst ^= imm (32-bit) */
 814	case BPF_ALU | BPF_XOR | BPF_K:
 815		len = xor_r32_i32(buf, dst, imm);
 816		break;
 817	/* dst <<= src (32-bit) */
 818	case BPF_ALU | BPF_LSH | BPF_X:
 819		len = lsh_r32(buf, dst, src);
 820		break;
 821	/* dst <<= imm (32-bit) */
 822	case BPF_ALU | BPF_LSH | BPF_K:
 823		len = lsh_r32_i32(buf, dst, imm);
 824		break;
 825	/* dst >>= src (32-bit) [unsigned] */
 826	case BPF_ALU | BPF_RSH | BPF_X:
 827		len = rsh_r32(buf, dst, src);
 828		break;
 829	/* dst >>= imm (32-bit) [unsigned] */
 830	case BPF_ALU | BPF_RSH | BPF_K:
 831		len = rsh_r32_i32(buf, dst, imm);
 832		break;
 833	/* dst >>= src (32-bit) [signed] */
 834	case BPF_ALU | BPF_ARSH | BPF_X:
 835		len = arsh_r32(buf, dst, src);
 836		break;
 837	/* dst >>= imm (32-bit) [signed] */
 838	case BPF_ALU | BPF_ARSH | BPF_K:
 839		len = arsh_r32_i32(buf, dst, imm);
 840		break;
 841	/* dst = src (32-bit) */
 842	case BPF_ALU | BPF_MOV | BPF_X:
 843		len = mov_r32(buf, dst, src, (u8)off);
 844		break;
 845	/* dst = imm32 (32-bit) */
 846	case BPF_ALU | BPF_MOV | BPF_K:
 847		len = mov_r32_i32(buf, dst, imm);
 848		break;
 849	/* dst = swap(dst) */
 850	case BPF_ALU   | BPF_END | BPF_FROM_LE:
 851	case BPF_ALU   | BPF_END | BPF_FROM_BE:
 852	case BPF_ALU64 | BPF_END | BPF_FROM_LE: {
 853		CHECK_RET(handle_swap(buf, dst, imm, BPF_SRC(code),
 854				      BPF_CLASS(code) == BPF_ALU64,
 855				      ctx->do_zext, &len));
 856		break;
 857	}
 858	/* dst += src (64-bit) */
 859	case BPF_ALU64 | BPF_ADD | BPF_X:
 860		len = add_r64(buf, dst, src);
 861		break;
 862	/* dst += imm32 (64-bit) */
 863	case BPF_ALU64 | BPF_ADD | BPF_K:
 864		len = add_r64_i32(buf, dst, imm);
 865		break;
 866	/* dst -= src (64-bit) */
 867	case BPF_ALU64 | BPF_SUB | BPF_X:
 868		len = sub_r64(buf, dst, src);
 869		break;
 870	/* dst -= imm32 (64-bit) */
 871	case BPF_ALU64 | BPF_SUB | BPF_K:
 872		len = sub_r64_i32(buf, dst, imm);
 873		break;
 874	/* dst = -dst (64-bit) */
 875	case BPF_ALU64 | BPF_NEG:
 876		len = neg_r64(buf, dst);
 877		break;
 878	/* dst *= src (64-bit) */
 879	case BPF_ALU64 | BPF_MUL | BPF_X:
 880		len = mul_r64(buf, dst, src);
 881		break;
 882	/* dst *= imm32 (64-bit) */
 883	case BPF_ALU64 | BPF_MUL | BPF_K:
 884		len = mul_r64_i32(buf, dst, imm);
 885		break;
 886	/* dst &= src (64-bit) */
 887	case BPF_ALU64 | BPF_AND | BPF_X:
 888		len = and_r64(buf, dst, src);
 889		break;
 890	/* dst &= imm32 (64-bit) */
 891	case BPF_ALU64 | BPF_AND | BPF_K:
 892		len = and_r64_i32(buf, dst, imm);
 893		break;
 894	/* dst |= src (64-bit) */
 895	case BPF_ALU64 | BPF_OR | BPF_X:
 896		len = or_r64(buf, dst, src);
 897		break;
 898	/* dst |= imm32 (64-bit) */
 899	case BPF_ALU64 | BPF_OR | BPF_K:
 900		len = or_r64_i32(buf, dst, imm);
 901		break;
 902	/* dst ^= src (64-bit) */
 903	case BPF_ALU64 | BPF_XOR | BPF_X:
 904		len = xor_r64(buf, dst, src);
 905		break;
 906	/* dst ^= imm32 (64-bit) */
 907	case BPF_ALU64 | BPF_XOR | BPF_K:
 908		len = xor_r64_i32(buf, dst, imm);
 909		break;
 910	/* dst <<= src (64-bit) */
 911	case BPF_ALU64 | BPF_LSH | BPF_X:
 912		len = lsh_r64(buf, dst, src);
 913		break;
 914	/* dst <<= imm32 (64-bit) */
 915	case BPF_ALU64 | BPF_LSH | BPF_K:
 916		len = lsh_r64_i32(buf, dst, imm);
 917		break;
 918	/* dst >>= src (64-bit) [unsigned] */
 919	case BPF_ALU64 | BPF_RSH | BPF_X:
 920		len = rsh_r64(buf, dst, src);
 921		break;
 922	/* dst >>= imm32 (64-bit) [unsigned] */
 923	case BPF_ALU64 | BPF_RSH | BPF_K:
 924		len = rsh_r64_i32(buf, dst, imm);
 925		break;
 926	/* dst >>= src (64-bit) [signed] */
 927	case BPF_ALU64 | BPF_ARSH | BPF_X:
 928		len = arsh_r64(buf, dst, src);
 929		break;
 930	/* dst >>= imm32 (64-bit) [signed] */
 931	case BPF_ALU64 | BPF_ARSH | BPF_K:
 932		len = arsh_r64_i32(buf, dst, imm);
 933		break;
 934	/* dst = src (64-bit) */
 935	case BPF_ALU64 | BPF_MOV | BPF_X:
 936		len = mov_r64(buf, dst, src, (u8)off);
 937		break;
 938	/* dst = imm32 (sign extend to 64-bit) */
 939	case BPF_ALU64 | BPF_MOV | BPF_K:
 940		len = mov_r64_i32(buf, dst, imm);
 941		break;
 942	/* dst = imm64 */
 943	case BPF_LD | BPF_DW | BPF_IMM:
 944		CHECK_RET(handle_ld_imm64(ctx, insn, &len));
 945		/* Tell the loop to skip the next instruction. */
 946		ret = 1;
 947		break;
 948	/* dst = *(size *)(src + off) */
 949	case BPF_LDX | BPF_MEM | BPF_W:
 950	case BPF_LDX | BPF_MEM | BPF_H:
 951	case BPF_LDX | BPF_MEM | BPF_B:
 952	case BPF_LDX | BPF_MEM | BPF_DW:
 953		len = load_r(buf, dst, src, off, BPF_SIZE(code), false);
 954		break;
 955	case BPF_LDX | BPF_MEMSX | BPF_W:
 956	case BPF_LDX | BPF_MEMSX | BPF_H:
 957	case BPF_LDX | BPF_MEMSX | BPF_B:
 958		len = load_r(buf, dst, src, off, BPF_SIZE(code), true);
 959		break;
 960	/* *(size *)(dst + off) = src */
 961	case BPF_STX | BPF_MEM | BPF_W:
 962	case BPF_STX | BPF_MEM | BPF_H:
 963	case BPF_STX | BPF_MEM | BPF_B:
 964	case BPF_STX | BPF_MEM | BPF_DW:
 965		len = store_r(buf, src, dst, off, BPF_SIZE(code));
 966		break;
 967	case BPF_ST | BPF_MEM | BPF_W:
 968	case BPF_ST | BPF_MEM | BPF_H:
 969	case BPF_ST | BPF_MEM | BPF_B:
 970	case BPF_ST | BPF_MEM | BPF_DW:
 971		len = store_i(buf, imm, dst, off, BPF_SIZE(code));
 972		break;
 973	case BPF_JMP   | BPF_JA:
 974	case BPF_JMP   | BPF_JEQ  | BPF_X:
 975	case BPF_JMP   | BPF_JEQ  | BPF_K:
 976	case BPF_JMP   | BPF_JNE  | BPF_X:
 977	case BPF_JMP   | BPF_JNE  | BPF_K:
 978	case BPF_JMP   | BPF_JSET | BPF_X:
 979	case BPF_JMP   | BPF_JSET | BPF_K:
 980	case BPF_JMP   | BPF_JGT  | BPF_X:
 981	case BPF_JMP   | BPF_JGT  | BPF_K:
 982	case BPF_JMP   | BPF_JGE  | BPF_X:
 983	case BPF_JMP   | BPF_JGE  | BPF_K:
 984	case BPF_JMP   | BPF_JSGT | BPF_X:
 985	case BPF_JMP   | BPF_JSGT | BPF_K:
 986	case BPF_JMP   | BPF_JSGE | BPF_X:
 987	case BPF_JMP   | BPF_JSGE | BPF_K:
 988	case BPF_JMP   | BPF_JLT  | BPF_X:
 989	case BPF_JMP   | BPF_JLT  | BPF_K:
 990	case BPF_JMP   | BPF_JLE  | BPF_X:
 991	case BPF_JMP   | BPF_JLE  | BPF_K:
 992	case BPF_JMP   | BPF_JSLT | BPF_X:
 993	case BPF_JMP   | BPF_JSLT | BPF_K:
 994	case BPF_JMP   | BPF_JSLE | BPF_X:
 995	case BPF_JMP   | BPF_JSLE | BPF_K:
 996	case BPF_JMP32 | BPF_JA:
 997	case BPF_JMP32 | BPF_JEQ  | BPF_X:
 998	case BPF_JMP32 | BPF_JEQ  | BPF_K:
 999	case BPF_JMP32 | BPF_JNE  | BPF_X:
1000	case BPF_JMP32 | BPF_JNE  | BPF_K:
1001	case BPF_JMP32 | BPF_JSET | BPF_X:
1002	case BPF_JMP32 | BPF_JSET | BPF_K:
1003	case BPF_JMP32 | BPF_JGT  | BPF_X:
1004	case BPF_JMP32 | BPF_JGT  | BPF_K:
1005	case BPF_JMP32 | BPF_JGE  | BPF_X:
1006	case BPF_JMP32 | BPF_JGE  | BPF_K:
1007	case BPF_JMP32 | BPF_JSGT | BPF_X:
1008	case BPF_JMP32 | BPF_JSGT | BPF_K:
1009	case BPF_JMP32 | BPF_JSGE | BPF_X:
1010	case BPF_JMP32 | BPF_JSGE | BPF_K:
1011	case BPF_JMP32 | BPF_JLT  | BPF_X:
1012	case BPF_JMP32 | BPF_JLT  | BPF_K:
1013	case BPF_JMP32 | BPF_JLE  | BPF_X:
1014	case BPF_JMP32 | BPF_JLE  | BPF_K:
1015	case BPF_JMP32 | BPF_JSLT | BPF_X:
1016	case BPF_JMP32 | BPF_JSLT | BPF_K:
1017	case BPF_JMP32 | BPF_JSLE | BPF_X:
1018	case BPF_JMP32 | BPF_JSLE | BPF_K:
1019		CHECK_RET(handle_jumps(ctx, insn, &len));
1020		break;
1021	case BPF_JMP | BPF_CALL:
1022		CHECK_RET(handle_call(ctx, insn, &len));
1023		break;
1024
1025	case BPF_JMP | BPF_EXIT:
1026		/* If this is the last instruction, epilogue will follow. */
1027		if (is_last_insn(ctx->prog, idx))
1028			break;
1029		CHECK_RET(handle_jmp_epilogue(ctx, insn, &len));
1030		break;
1031	default:
1032		pr_err("bpf-jit: can't handle instruction code 0x%02X\n", code);
1033		return -EOPNOTSUPP;
1034	}
1035
1036	if (BPF_CLASS(code) == BPF_ALU) {
1037		/*
1038		 * Skip the "swap" instructions. Even 64-bit swaps are of type
1039		 * BPF_ALU (and not BPF_ALU64). Therefore, for the swaps, one
1040		 * has to look at the "size" of the operations rather than the
1041		 * ALU type. "gen_swap()" specifically takes care of that.
1042		 */
1043		if (BPF_OP(code) != BPF_END && ctx->do_zext)
1044			len += zext(BUF(buf, len), dst);
1045	}
1046
1047	jit_buffer_update(ctx, len);
1048
1049	return ret;
1050}
1051
1052static int handle_body(struct jit_context *ctx)
1053{
1054	int ret;
1055	bool populate_bpf2insn = false;
1056	const struct bpf_prog *prog = ctx->prog;
1057
1058	CHECK_RET(jit_buffer_check(ctx));
1059
1060	/*
1061	 * Record the mapping for the instructions during the dry-run.
1062	 * Doing it this way allows us to have the mapping ready for
1063	 * the jump instructions during the real compilation phase.
1064	 */
1065	if (!ctx->emit)
1066		populate_bpf2insn = true;
1067
1068	for (u32 i = 0; i < prog->len; i++) {
1069		/* During the dry-run, jit.len grows gradually per BPF insn. */
1070		if (populate_bpf2insn)
1071			ctx->bpf2insn[i] = ctx->jit.len;
1072
1073		CHECK_RET(handle_insn(ctx, i));
1074		if (ret > 0) {
1075			/* "ret" is 1 if two (64-bit) chunks were consumed. */
1076			ctx->bpf2insn[i + 1] = ctx->bpf2insn[i];
1077			i++;
1078		}
1079	}
1080
1081	/* If bpf2insn had to be populated, then it is done at this point. */
1082	if (populate_bpf2insn)
1083		ctx->bpf2insn_valid = true;
1084
1085	return 0;
1086}
1087
1088/*
1089 * Initialize the memory with "unimp_s" which is the mnemonic for
1090 * "unimplemented" instruction and always raises an exception.
1091 *
1092 * The instruction is 2 bytes. If "size" is odd, there is not much
1093 * that can be done about the last byte in "area". Because, the
1094 * CPU always fetches instructions in two bytes. Therefore, the
1095 * byte beyond the last one is going to accompany it during a
1096 * possible fetch. In the most likely case of a little endian
1097 * system, that beyond-byte will become the major opcode and
1098 * we have no control over its initialisation.
1099 */
1100static void fill_ill_insn(void *area, unsigned int size)
1101{
1102	const u16 unimp_s = 0x79e0;
1103
1104	if (size & 1) {
1105		*((u8 *)area + (size - 1)) = 0xff;
1106		size -= 1;
1107	}
1108
1109	memset16(area, unimp_s, size >> 1);
1110}
1111
1112/* Piece of memory that can be allocated at the beginning of jit_prepare(). */
1113static int jit_prepare_early_mem_alloc(struct jit_context *ctx)
1114{
1115	ctx->bpf2insn = kcalloc(ctx->prog->len, sizeof(ctx->jit.len),
1116				GFP_KERNEL);
1117
1118	if (!ctx->bpf2insn) {
1119		pr_err("bpf-jit: could not allocate memory for "
1120		       "mapping of the instructions.\n");
1121		return -ENOMEM;
1122	}
1123
1124	return 0;
1125}
1126
1127/*
1128 * Memory allocations that rely on parameters known at the end of
1129 * jit_prepare().
1130 */
1131static int jit_prepare_final_mem_alloc(struct jit_context *ctx)
1132{
1133	const size_t alignment = sizeof(u32);
1134
1135	ctx->bpf_header = bpf_jit_binary_alloc(ctx->jit.len, &ctx->jit.buf,
1136					       alignment, fill_ill_insn);
1137	if (!ctx->bpf_header) {
1138		pr_err("bpf-jit: could not allocate memory for translation.\n");
1139		return -ENOMEM;
1140	}
1141
1142	if (ctx->need_extra_pass) {
1143		ctx->jit_data = kzalloc(sizeof(*ctx->jit_data), GFP_KERNEL);
1144		if (!ctx->jit_data)
1145			return -ENOMEM;
1146	}
1147
1148	return 0;
1149}
1150
1151/*
1152 * The first phase of the translation without actually emitting any
1153 * instruction. It helps in getting a forecast on some aspects, such
1154 * as the length of the whole program or where the epilogue starts.
1155 *
1156 * Whenever the necessary parameters are known, memories are allocated.
1157 */
1158static int jit_prepare(struct jit_context *ctx)
1159{
1160	int ret;
1161
1162	/* Dry run. */
1163	ctx->emit = false;
1164
1165	CHECK_RET(jit_prepare_early_mem_alloc(ctx));
1166
1167	/* Get the length of prologue section after some register analysis. */
1168	analyze_reg_usage(ctx);
1169	CHECK_RET(handle_prologue(ctx));
1170
1171	CHECK_RET(handle_body(ctx));
1172
1173	/* Record at which offset epilogue begins. */
1174	ctx->epilogue_offset = ctx->jit.len;
1175
1176	/* Process the epilogue section now. */
1177	CHECK_RET(handle_epilogue(ctx));
1178
1179	CHECK_RET(jit_prepare_final_mem_alloc(ctx));
1180
1181	return 0;
1182}
1183
1184/*
1185 * jit_compile() is the real compilation phase. jit_prepare() is
1186 * invoked before jit_compile() as a dry-run to make sure everything
1187 * will go OK and allocate the necessary memory.
1188 *
1189 * In the end, jit_compile() checks if it has produced the same number
1190 * of instructions as jit_prepare() would.
1191 */
1192static int jit_compile(struct jit_context *ctx)
1193{
1194	int ret;
1195
1196	/* Let there be code. */
1197	ctx->emit = true;
1198
1199	CHECK_RET(handle_prologue(ctx));
1200
1201	CHECK_RET(handle_body(ctx));
1202
1203	CHECK_RET(handle_epilogue(ctx));
1204
1205	if (ctx->jit.index != ctx->jit.len) {
1206		pr_err("bpf-jit: divergence between the phases; "
1207		       "%u vs. %u (bytes).\n",
1208		       ctx->jit.len, ctx->jit.index);
1209		return -EFAULT;
1210	}
1211
1212	return 0;
1213}
1214
1215/*
1216 * Calling this function implies a successful JIT. A successful
1217 * translation is signaled by setting the right parameters:
1218 *
1219 * prog->jited=1, prog->jited_len=..., prog->bpf_func=...
1220 */
1221static int jit_finalize(struct jit_context *ctx)
1222{
1223	struct bpf_prog *prog = ctx->prog;
1224
1225	/* We're going to need this information for the "do_extra_pass()". */
1226	if (ctx->need_extra_pass) {
1227		ctx->jit_data->bpf_header = ctx->bpf_header;
1228		ctx->jit_data->bpf2insn = ctx->bpf2insn;
1229		prog->aux->jit_data = (void *)ctx->jit_data;
1230	} else {
1231		/*
1232		 * If things seem finalised, then mark the JITed memory
1233		 * as R-X and flush it.
1234		 */
1235		if (bpf_jit_binary_lock_ro(ctx->bpf_header)) {
1236			pr_err("bpf-jit: Could not lock the JIT memory.\n");
1237			return -EFAULT;
1238		}
1239		flush_icache_range((unsigned long)ctx->bpf_header,
1240				   (unsigned long)
1241				   BUF(ctx->jit.buf, ctx->jit.len));
1242		prog->aux->jit_data = NULL;
1243		bpf_prog_fill_jited_linfo(prog, ctx->bpf2insn);
1244	}
1245
1246	ctx->success = true;
1247	prog->bpf_func = (void *)ctx->jit.buf;
1248	prog->jited_len = ctx->jit.len;
1249	prog->jited = 1;
1250
1251	jit_ctx_cleanup(ctx);
1252	jit_dump(ctx);
1253
1254	return 0;
1255}
1256
1257/*
1258 * A lenient verification for the existence of JIT context in "prog".
1259 * Apparently the JIT internals, namely jit_subprogs() in bpf/verifier.c,
1260 * may request for a second compilation although nothing needs to be done.
1261 */
1262static inline int check_jit_context(const struct bpf_prog *prog)
1263{
1264	if (!prog->aux->jit_data) {
1265		pr_notice("bpf-jit: no jit data for the extra pass.\n");
1266		return 1;
1267	} else {
1268		return 0;
1269	}
1270}
1271
1272/* Reuse the previous pass's data. */
1273static int jit_resume_context(struct jit_context *ctx)
1274{
1275	struct arc_jit_data *jdata =
1276		(struct arc_jit_data *)ctx->prog->aux->jit_data;
1277
1278	if (!jdata) {
1279		pr_err("bpf-jit: no jit data for the extra pass.\n");
1280		return -EINVAL;
1281	}
1282
1283	ctx->jit.buf = (u8 *)ctx->prog->bpf_func;
1284	ctx->jit.len = ctx->prog->jited_len;
1285	ctx->bpf_header = jdata->bpf_header;
1286	ctx->bpf2insn = (u32 *)jdata->bpf2insn;
1287	ctx->bpf2insn_valid = ctx->bpf2insn ? true : false;
1288	ctx->jit_data = jdata;
1289
1290	return 0;
1291}
1292
1293/*
1294 * Patch in the new addresses. The instructions of interest are:
1295 *
1296 * - call
1297 * - ld r64, imm64
1298 *
1299 * For "call"s, it resolves the addresses one more time through the
1300 * handle_call().
1301 *
1302 * For 64-bit immediate loads, it just retranslates them, because the BPF
1303 * core in kernel might have changed the value since the normal pass.
1304 */
1305static int jit_patch_relocations(struct jit_context *ctx)
1306{
1307	const u8 bpf_opc_call = BPF_JMP | BPF_CALL;
1308	const u8 bpf_opc_ldi64 = BPF_LD | BPF_DW | BPF_IMM;
1309	const struct bpf_prog *prog = ctx->prog;
1310	int ret;
1311
1312	ctx->emit = true;
1313	for (u32 i = 0; i < prog->len; i++) {
1314		const struct bpf_insn *insn = &prog->insnsi[i];
1315		u8 dummy;
1316		/*
1317		 * Adjust "ctx.jit.index", so "gen_*()" functions below
1318		 * can use it for their output addresses.
1319		 */
1320		ctx->jit.index = ctx->bpf2insn[i];
1321
1322		if (insn->code == bpf_opc_call) {
1323			CHECK_RET(handle_call(ctx, insn, &dummy));
1324		} else if (insn->code == bpf_opc_ldi64) {
1325			CHECK_RET(handle_ld_imm64(ctx, insn, &dummy));
1326			/* Skip the next instruction. */
1327			++i;
1328		}
1329	}
1330	return 0;
1331}
1332
1333/*
1334 * A normal pass that involves a "dry-run" phase, jit_prepare(),
1335 * to get the necessary data for the real compilation phase,
1336 * jit_compile().
1337 */
1338static struct bpf_prog *do_normal_pass(struct bpf_prog *prog)
1339{
1340	struct jit_context ctx;
1341
1342	/* Bail out if JIT is disabled. */
1343	if (!prog->jit_requested)
1344		return prog;
1345
1346	if (jit_ctx_init(&ctx, prog)) {
1347		jit_ctx_cleanup(&ctx);
1348		return prog;
1349	}
1350
1351	/* Get the lengths and allocate buffer. */
1352	if (jit_prepare(&ctx)) {
1353		jit_ctx_cleanup(&ctx);
1354		return prog;
1355	}
1356
1357	if (jit_compile(&ctx)) {
1358		jit_ctx_cleanup(&ctx);
1359		return prog;
1360	}
1361
1362	if (jit_finalize(&ctx)) {
1363		jit_ctx_cleanup(&ctx);
1364		return prog;
1365	}
1366
1367	return ctx.prog;
1368}
1369
1370/*
1371 * If there are multi-function BPF programs that call each other,
1372 * their translated addresses are not known all at once. Therefore,
1373 * an extra pass is needed to consult the bpf_jit_get_func_addr()
1374 * again to get the newly translated addresses in order to resolve
1375 * the "call"s.
1376 */
1377static struct bpf_prog *do_extra_pass(struct bpf_prog *prog)
1378{
1379	struct jit_context ctx;
1380
1381	/* Skip if there's no context to resume from. */
1382	if (check_jit_context(prog))
1383		return prog;
1384
1385	if (jit_ctx_init(&ctx, prog)) {
1386		jit_ctx_cleanup(&ctx);
1387		return prog;
1388	}
1389
1390	if (jit_resume_context(&ctx)) {
1391		jit_ctx_cleanup(&ctx);
1392		return prog;
1393	}
1394
1395	if (jit_patch_relocations(&ctx)) {
1396		jit_ctx_cleanup(&ctx);
1397		return prog;
1398	}
1399
1400	if (jit_finalize(&ctx)) {
1401		jit_ctx_cleanup(&ctx);
1402		return prog;
1403	}
1404
1405	return ctx.prog;
1406}
1407
1408/*
1409 * This function may be invoked twice for the same stream of BPF
1410 * instructions. The "extra pass" happens, when there are
1411 * (re)locations involved that their addresses are not known
1412 * during the first run.
1413 */
1414struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1415{
1416	vm_dump(prog);
1417
1418	/* Was this program already translated? */
1419	if (!prog->jited)
1420		return do_normal_pass(prog);
1421	else
1422		return do_extra_pass(prog);
1423
1424	return prog;
1425}