Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.1.
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * Just-In-Time compiler for eBPF bytecode on MIPS.
   4 * Implementation of JIT functions for 32-bit CPUs.
   5 *
   6 * Copyright (c) 2021 Anyfi Networks AB.
   7 * Author: Johan Almbladh <johan.almbladh@gmail.com>
   8 *
   9 * Based on code and ideas from
  10 * Copyright (c) 2017 Cavium, Inc.
  11 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
  12 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
  13 */
  14
  15#include <linux/math64.h>
  16#include <linux/errno.h>
  17#include <linux/filter.h>
  18#include <linux/bpf.h>
  19#include <asm/cpu-features.h>
  20#include <asm/isa-rev.h>
  21#include <asm/uasm.h>
  22
  23#include "bpf_jit_comp.h"
  24
  25/* MIPS a4-a7 are not available in the o32 ABI */
  26#undef MIPS_R_A4
  27#undef MIPS_R_A5
  28#undef MIPS_R_A6
  29#undef MIPS_R_A7
  30
  31/* Stack is 8-byte aligned in o32 ABI */
  32#define MIPS_STACK_ALIGNMENT 8
  33
  34/*
  35 * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
  36 * This corresponds to stack space for register arguments a0-a3.
  37 */
  38#define JIT_RESERVED_STACK 16
  39
  40/* Temporary 64-bit register used by JIT */
  41#define JIT_REG_TMP MAX_BPF_JIT_REG
  42
  43/*
  44 * Number of prologue bytes to skip when doing a tail call.
  45 * Tail call count (TCC) initialization (8 bytes) always, plus
  46 * R0-to-v0 assignment (4 bytes) if big endian.
  47 */
  48#ifdef __BIG_ENDIAN
  49#define JIT_TCALL_SKIP 12
  50#else
  51#define JIT_TCALL_SKIP 8
  52#endif
  53
  54/* CPU registers holding the callee return value */
  55#define JIT_RETURN_REGS	  \
  56	(BIT(MIPS_R_V0) | \
  57	 BIT(MIPS_R_V1))
  58
  59/* CPU registers arguments passed to callee directly */
  60#define JIT_ARG_REGS      \
  61	(BIT(MIPS_R_A0) | \
  62	 BIT(MIPS_R_A1) | \
  63	 BIT(MIPS_R_A2) | \
  64	 BIT(MIPS_R_A3))
  65
  66/* CPU register arguments passed to callee on stack */
  67#define JIT_STACK_REGS    \
  68	(BIT(MIPS_R_T0) | \
  69	 BIT(MIPS_R_T1) | \
  70	 BIT(MIPS_R_T2) | \
  71	 BIT(MIPS_R_T3) | \
  72	 BIT(MIPS_R_T4) | \
  73	 BIT(MIPS_R_T5))
  74
  75/* Caller-saved CPU registers */
  76#define JIT_CALLER_REGS    \
  77	(JIT_RETURN_REGS | \
  78	 JIT_ARG_REGS    | \
  79	 JIT_STACK_REGS)
  80
  81/* Callee-saved CPU registers */
  82#define JIT_CALLEE_REGS   \
  83	(BIT(MIPS_R_S0) | \
  84	 BIT(MIPS_R_S1) | \
  85	 BIT(MIPS_R_S2) | \
  86	 BIT(MIPS_R_S3) | \
  87	 BIT(MIPS_R_S4) | \
  88	 BIT(MIPS_R_S5) | \
  89	 BIT(MIPS_R_S6) | \
  90	 BIT(MIPS_R_S7) | \
  91	 BIT(MIPS_R_GP) | \
  92	 BIT(MIPS_R_FP) | \
  93	 BIT(MIPS_R_RA))
  94
  95/*
  96 * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
  97 *
  98 * 1) Native register pairs are ordered according to CPU endianness, following
  99 *    the MIPS convention for passing 64-bit arguments and return values.
 100 * 2) The eBPF return value, arguments and callee-saved registers are mapped
 101 *    to their native MIPS equivalents.
 102 * 3) Since the 32 highest bits in the eBPF FP register are always zero,
 103 *    only one general-purpose register is actually needed for the mapping.
 104 *    We use the fp register for this purpose, and map the highest bits to
 105 *    the MIPS register r0 (zero).
 106 * 4) We use the MIPS gp and at registers as internal temporary registers
 107 *    for constant blinding. The gp register is callee-saved.
 108 * 5) One 64-bit temporary register is mapped for use when sign-extending
 109 *    immediate operands. MIPS registers t6-t9 are available to the JIT
 110 *    for as temporaries when implementing complex 64-bit operations.
 111 *
 112 * With this scheme all eBPF registers are being mapped to native MIPS
 113 * registers without having to use any stack scratch space. The direct
 114 * register mapping (2) simplifies the handling of function calls.
 115 */
 116static const u8 bpf2mips32[][2] = {
 117	/* Return value from in-kernel function, and exit value from eBPF */
 118	[BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
 119	/* Arguments from eBPF program to in-kernel function */
 120	[BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
 121	[BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
 122	/* Remaining arguments, to be passed on the stack per O32 ABI */
 123	[BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
 124	[BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
 125	[BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
 126	/* Callee-saved registers that in-kernel function will preserve */
 127	[BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
 128	[BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
 129	[BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
 130	[BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
 131	/* Read-only frame pointer to access the eBPF stack */
 132#ifdef __BIG_ENDIAN
 133	[BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
 134#else
 135	[BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
 136#endif
 137	/* Temporary register for blinding constants */
 138	[BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
 139	/* Temporary register for internal JIT use */
 140	[JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
 141};
 142
 143/* Get low CPU register for a 64-bit eBPF register mapping */
 144static inline u8 lo(const u8 reg[])
 145{
 146#ifdef __BIG_ENDIAN
 147	return reg[0];
 148#else
 149	return reg[1];
 150#endif
 151}
 152
 153/* Get high CPU register for a 64-bit eBPF register mapping */
 154static inline u8 hi(const u8 reg[])
 155{
 156#ifdef __BIG_ENDIAN
 157	return reg[1];
 158#else
 159	return reg[0];
 160#endif
 161}
 162
 163/*
 164 * Mark a 64-bit CPU register pair as clobbered, it needs to be
 165 * saved/restored by the program if callee-saved.
 166 */
 167static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
 168{
 169	clobber_reg(ctx, reg[0]);
 170	clobber_reg(ctx, reg[1]);
 171}
 172
 173/* dst = imm (sign-extended) */
 174static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
 175{
 176	emit_mov_i(ctx, lo(dst), imm);
 177	if (imm < 0)
 178		emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
 179	else
 180		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 181	clobber_reg64(ctx, dst);
 182}
 183
 184/* Zero extension, if verifier does not do it for us  */
 185static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
 186{
 187	if (!ctx->program->aux->verifier_zext) {
 188		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 189		clobber_reg(ctx, hi(dst));
 190	}
 191}
 192
 193/* Load delay slot, if ISA mandates it */
 194static void emit_load_delay(struct jit_context *ctx)
 195{
 196	if (!cpu_has_mips_2_3_4_5_r)
 197		emit(ctx, nop);
 198}
 199
 200/* ALU immediate operation (64-bit) */
 201static void emit_alu_i64(struct jit_context *ctx,
 202			 const u8 dst[], s32 imm, u8 op)
 203{
 204	u8 src = MIPS_R_T6;
 205
 206	/*
 207	 * ADD/SUB with all but the max negative imm can be handled by
 208	 * inverting the operation and the imm value, saving one insn.
 209	 */
 210	if (imm > S32_MIN && imm < 0)
 211		switch (op) {
 212		case BPF_ADD:
 213			op = BPF_SUB;
 214			imm = -imm;
 215			break;
 216		case BPF_SUB:
 217			op = BPF_ADD;
 218			imm = -imm;
 219			break;
 220		}
 221
 222	/* Move immediate to temporary register */
 223	emit_mov_i(ctx, src, imm);
 224
 225	switch (op) {
 226	/* dst = dst + imm */
 227	case BPF_ADD:
 228		emit(ctx, addu, lo(dst), lo(dst), src);
 229		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
 230		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
 231		if (imm < 0)
 232			emit(ctx, addiu, hi(dst), hi(dst), -1);
 233		break;
 234	/* dst = dst - imm */
 235	case BPF_SUB:
 236		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
 237		emit(ctx, subu, lo(dst), lo(dst), src);
 238		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
 239		if (imm < 0)
 240			emit(ctx, addiu, hi(dst), hi(dst), 1);
 241		break;
 242	/* dst = dst | imm */
 243	case BPF_OR:
 244		emit(ctx, or, lo(dst), lo(dst), src);
 245		if (imm < 0)
 246			emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
 247		break;
 248	/* dst = dst & imm */
 249	case BPF_AND:
 250		emit(ctx, and, lo(dst), lo(dst), src);
 251		if (imm >= 0)
 252			emit(ctx, move, hi(dst), MIPS_R_ZERO);
 253		break;
 254	/* dst = dst ^ imm */
 255	case BPF_XOR:
 256		emit(ctx, xor, lo(dst), lo(dst), src);
 257		if (imm < 0) {
 258			emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
 259			emit(ctx, addiu, hi(dst), hi(dst), -1);
 260		}
 261		break;
 262	}
 263	clobber_reg64(ctx, dst);
 264}
 265
 266/* ALU register operation (64-bit) */
 267static void emit_alu_r64(struct jit_context *ctx,
 268			 const u8 dst[], const u8 src[], u8 op)
 269{
 270	switch (BPF_OP(op)) {
 271	/* dst = dst + src */
 272	case BPF_ADD:
 273		if (src == dst) {
 274			emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
 275			emit(ctx, addu, lo(dst), lo(dst), lo(dst));
 276		} else {
 277			emit(ctx, addu, lo(dst), lo(dst), lo(src));
 278			emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
 279		}
 280		emit(ctx, addu, hi(dst), hi(dst), hi(src));
 281		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
 282		break;
 283	/* dst = dst - src */
 284	case BPF_SUB:
 285		emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
 286		emit(ctx, subu, lo(dst), lo(dst), lo(src));
 287		emit(ctx, subu, hi(dst), hi(dst), hi(src));
 288		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
 289		break;
 290	/* dst = dst | src */
 291	case BPF_OR:
 292		emit(ctx, or, lo(dst), lo(dst), lo(src));
 293		emit(ctx, or, hi(dst), hi(dst), hi(src));
 294		break;
 295	/* dst = dst & src */
 296	case BPF_AND:
 297		emit(ctx, and, lo(dst), lo(dst), lo(src));
 298		emit(ctx, and, hi(dst), hi(dst), hi(src));
 299		break;
 300	/* dst = dst ^ src */
 301	case BPF_XOR:
 302		emit(ctx, xor, lo(dst), lo(dst), lo(src));
 303		emit(ctx, xor, hi(dst), hi(dst), hi(src));
 304		break;
 305	}
 306	clobber_reg64(ctx, dst);
 307}
 308
 309/* ALU invert (64-bit) */
 310static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
 311{
 312	emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
 313	emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
 314	emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
 315	emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
 316
 317	clobber_reg64(ctx, dst);
 318}
 319
 320/* ALU shift immediate (64-bit) */
 321static void emit_shift_i64(struct jit_context *ctx,
 322			   const u8 dst[], u32 imm, u8 op)
 323{
 324	switch (BPF_OP(op)) {
 325	/* dst = dst << imm */
 326	case BPF_LSH:
 327		if (imm < 32) {
 328			emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
 329			emit(ctx, sll, lo(dst), lo(dst), imm);
 330			emit(ctx, sll, hi(dst), hi(dst), imm);
 331			emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
 332		} else {
 333			emit(ctx, sll, hi(dst), lo(dst), imm - 32);
 334			emit(ctx, move, lo(dst), MIPS_R_ZERO);
 335		}
 336		break;
 337	/* dst = dst >> imm */
 338	case BPF_RSH:
 339		if (imm < 32) {
 340			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
 341			emit(ctx, srl, lo(dst), lo(dst), imm);
 342			emit(ctx, srl, hi(dst), hi(dst), imm);
 343			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
 344		} else {
 345			emit(ctx, srl, lo(dst), hi(dst), imm - 32);
 346			emit(ctx, move, hi(dst), MIPS_R_ZERO);
 347		}
 348		break;
 349	/* dst = dst >> imm (arithmetic) */
 350	case BPF_ARSH:
 351		if (imm < 32) {
 352			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
 353			emit(ctx, srl, lo(dst), lo(dst), imm);
 354			emit(ctx, sra, hi(dst), hi(dst), imm);
 355			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
 356		} else {
 357			emit(ctx, sra, lo(dst), hi(dst), imm - 32);
 358			emit(ctx, sra, hi(dst), hi(dst), 31);
 359		}
 360		break;
 361	}
 362	clobber_reg64(ctx, dst);
 363}
 364
 365/* ALU shift register (64-bit) */
 366static void emit_shift_r64(struct jit_context *ctx,
 367			   const u8 dst[], u8 src, u8 op)
 368{
 369	u8 t1 = MIPS_R_T8;
 370	u8 t2 = MIPS_R_T9;
 371
 372	emit(ctx, andi, t1, src, 32);              /* t1 = src & 32          */
 373	emit(ctx, beqz, t1, 16);                   /* PC += 16 if t1 == 0    */
 374	emit(ctx, nor, t2, src, MIPS_R_ZERO);      /* t2 = ~src (delay slot) */
 375
 376	switch (BPF_OP(op)) {
 377	/* dst = dst << src */
 378	case BPF_LSH:
 379		/* Next: shift >= 32 */
 380		emit(ctx, sllv, hi(dst), lo(dst), src);    /* dh = dl << src */
 381		emit(ctx, move, lo(dst), MIPS_R_ZERO);     /* dl = 0         */
 382		emit(ctx, b, 20);                          /* PC += 20       */
 383		/* +16: shift < 32 */
 384		emit(ctx, srl, t1, lo(dst), 1);            /* t1 = dl >> 1   */
 385		emit(ctx, srlv, t1, t1, t2);               /* t1 = t1 >> t2  */
 386		emit(ctx, sllv, lo(dst), lo(dst), src);    /* dl = dl << src */
 387		emit(ctx, sllv, hi(dst), hi(dst), src);    /* dh = dh << src */
 388		emit(ctx, or, hi(dst), hi(dst), t1);       /* dh = dh | t1   */
 389		break;
 390	/* dst = dst >> src */
 391	case BPF_RSH:
 392		/* Next: shift >= 32 */
 393		emit(ctx, srlv, lo(dst), hi(dst), src);    /* dl = dh >> src */
 394		emit(ctx, move, hi(dst), MIPS_R_ZERO);     /* dh = 0         */
 395		emit(ctx, b, 20);                          /* PC += 20       */
 396		/* +16: shift < 32 */
 397		emit(ctx, sll, t1, hi(dst), 1);            /* t1 = dl << 1   */
 398		emit(ctx, sllv, t1, t1, t2);               /* t1 = t1 << t2  */
 399		emit(ctx, srlv, lo(dst), lo(dst), src);    /* dl = dl >> src */
 400		emit(ctx, srlv, hi(dst), hi(dst), src);    /* dh = dh >> src */
 401		emit(ctx, or, lo(dst), lo(dst), t1);       /* dl = dl | t1   */
 402		break;
 403	/* dst = dst >> src (arithmetic) */
 404	case BPF_ARSH:
 405		/* Next: shift >= 32 */
 406		emit(ctx, srav, lo(dst), hi(dst), src);   /* dl = dh >>a src */
 407		emit(ctx, sra, hi(dst), hi(dst), 31);     /* dh = dh >>a 31  */
 408		emit(ctx, b, 20);                         /* PC += 20        */
 409		/* +16: shift < 32 */
 410		emit(ctx, sll, t1, hi(dst), 1);           /* t1 = dl << 1    */
 411		emit(ctx, sllv, t1, t1, t2);              /* t1 = t1 << t2   */
 412		emit(ctx, srlv, lo(dst), lo(dst), src);   /* dl = dl >>a src */
 413		emit(ctx, srav, hi(dst), hi(dst), src);   /* dh = dh >> src  */
 414		emit(ctx, or, lo(dst), lo(dst), t1);      /* dl = dl | t1    */
 415		break;
 416	}
 417
 418	/* +20: Done */
 419	clobber_reg64(ctx, dst);
 420}
 421
 422/* ALU mul immediate (64x32-bit) */
 423static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
 424{
 425	u8 src = MIPS_R_T6;
 426	u8 tmp = MIPS_R_T9;
 427
 428	switch (imm) {
 429	/* dst = dst * 1 is a no-op */
 430	case 1:
 431		break;
 432	/* dst = dst * -1 */
 433	case -1:
 434		emit_neg_i64(ctx, dst);
 435		break;
 436	case 0:
 437		emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
 438		emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
 439		break;
 440	/* Full 64x32 multiply */
 441	default:
 442		/* hi(dst) = hi(dst) * src(imm) */
 443		emit_mov_i(ctx, src, imm);
 444		if (cpu_has_mips32r1 || cpu_has_mips32r6) {
 445			emit(ctx, mul, hi(dst), hi(dst), src);
 446		} else {
 447			emit(ctx, multu, hi(dst), src);
 448			emit(ctx, mflo, hi(dst));
 449		}
 450
 451		/* hi(dst) = hi(dst) - lo(dst) */
 452		if (imm < 0)
 453			emit(ctx, subu, hi(dst), hi(dst), lo(dst));
 454
 455		/* tmp = lo(dst) * src(imm) >> 32 */
 456		/* lo(dst) = lo(dst) * src(imm) */
 457		if (cpu_has_mips32r6) {
 458			emit(ctx, muhu, tmp, lo(dst), src);
 459			emit(ctx, mulu, lo(dst), lo(dst), src);
 460		} else {
 461			emit(ctx, multu, lo(dst), src);
 462			emit(ctx, mflo, lo(dst));
 463			emit(ctx, mfhi, tmp);
 464		}
 465
 466		/* hi(dst) += tmp */
 467		emit(ctx, addu, hi(dst), hi(dst), tmp);
 468		clobber_reg64(ctx, dst);
 469		break;
 470	}
 471}
 472
 473/* ALU mul register (64x64-bit) */
 474static void emit_mul_r64(struct jit_context *ctx,
 475			 const u8 dst[], const u8 src[])
 476{
 477	u8 acc = MIPS_R_T8;
 478	u8 tmp = MIPS_R_T9;
 479
 480	/* acc = hi(dst) * lo(src) */
 481	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
 482		emit(ctx, mul, acc, hi(dst), lo(src));
 483	} else {
 484		emit(ctx, multu, hi(dst), lo(src));
 485		emit(ctx, mflo, acc);
 486	}
 487
 488	/* tmp = lo(dst) * hi(src) */
 489	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
 490		emit(ctx, mul, tmp, lo(dst), hi(src));
 491	} else {
 492		emit(ctx, multu, lo(dst), hi(src));
 493		emit(ctx, mflo, tmp);
 494	}
 495
 496	/* acc += tmp */
 497	emit(ctx, addu, acc, acc, tmp);
 498
 499	/* tmp = lo(dst) * lo(src) >> 32 */
 500	/* lo(dst) = lo(dst) * lo(src) */
 501	if (cpu_has_mips32r6) {
 502		emit(ctx, muhu, tmp, lo(dst), lo(src));
 503		emit(ctx, mulu, lo(dst), lo(dst), lo(src));
 504	} else {
 505		emit(ctx, multu, lo(dst), lo(src));
 506		emit(ctx, mflo, lo(dst));
 507		emit(ctx, mfhi, tmp);
 508	}
 509
 510	/* hi(dst) = acc + tmp */
 511	emit(ctx, addu, hi(dst), acc, tmp);
 512	clobber_reg64(ctx, dst);
 513}
 514
 515/* Helper function for 64-bit modulo */
 516static u64 jit_mod64(u64 a, u64 b)
 517{
 518	u64 rem;
 519
 520	div64_u64_rem(a, b, &rem);
 521	return rem;
 522}
 523
 524/* ALU div/mod register (64-bit) */
 525static void emit_divmod_r64(struct jit_context *ctx,
 526			    const u8 dst[], const u8 src[], u8 op)
 527{
 528	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
 529	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
 530	const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
 531	int exclude, k;
 532	u32 addr = 0;
 533
 534	/* Push caller-saved registers on stack */
 535	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 536		  0, JIT_RESERVED_STACK);
 537
 538	/* Put 64-bit arguments 1 and 2 in registers a0-a3 */
 539	for (k = 0; k < 2; k++) {
 540		emit(ctx, move, MIPS_R_T9, src[k]);
 541		emit(ctx, move, r1[k], dst[k]);
 542		emit(ctx, move, r2[k], MIPS_R_T9);
 543	}
 544
 545	/* Emit function call */
 546	switch (BPF_OP(op)) {
 547	/* dst = dst / src */
 548	case BPF_DIV:
 549		addr = (u32)&div64_u64;
 550		break;
 551	/* dst = dst % src */
 552	case BPF_MOD:
 553		addr = (u32)&jit_mod64;
 554		break;
 555	}
 556	emit_mov_i(ctx, MIPS_R_T9, addr);
 557	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
 558	emit(ctx, nop); /* Delay slot */
 559
 560	/* Store the 64-bit result in dst */
 561	emit(ctx, move, dst[0], r0[0]);
 562	emit(ctx, move, dst[1], r0[1]);
 563
 564	/* Restore caller-saved registers, excluding the computed result */
 565	exclude = BIT(lo(dst)) | BIT(hi(dst));
 566	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 567		 exclude, JIT_RESERVED_STACK);
 568	emit_load_delay(ctx);
 569
 570	clobber_reg64(ctx, dst);
 571	clobber_reg(ctx, MIPS_R_V0);
 572	clobber_reg(ctx, MIPS_R_V1);
 573	clobber_reg(ctx, MIPS_R_RA);
 574}
 575
 576/* Swap bytes in a register word */
 577static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
 578{
 579	u8 tmp = MIPS_R_T9;
 580
 581	emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
 582	emit(ctx, sll, tmp, tmp, 8);    /* tmp = tmp << 8         */
 583	emit(ctx, srl, dst, src, 8);    /* dst = src >> 8         */
 584	emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
 585	emit(ctx, or,  dst, dst, tmp);  /* dst = dst | tmp        */
 586}
 587
 588/* Swap half words in a register word */
 589static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
 590{
 591	u8 tmp = MIPS_R_T9;
 592
 593	emit(ctx, sll, tmp, src, 16);  /* tmp = src << 16 */
 594	emit(ctx, srl, dst, src, 16);  /* dst = src >> 16 */
 595	emit(ctx, or,  dst, dst, tmp); /* dst = dst | tmp */
 596}
 597
 598/* Swap bytes and truncate a register double word, word or half word */
 599static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
 600{
 601	u8 tmp = MIPS_R_T8;
 602
 603	switch (width) {
 604	/* Swap bytes in a double word */
 605	case 64:
 606		if (cpu_has_mips32r2 || cpu_has_mips32r6) {
 607			emit(ctx, rotr, tmp, hi(dst), 16);
 608			emit(ctx, rotr, hi(dst), lo(dst), 16);
 609			emit(ctx, wsbh, lo(dst), tmp);
 610			emit(ctx, wsbh, hi(dst), hi(dst));
 611		} else {
 612			emit_swap16_r(ctx, tmp, lo(dst));
 613			emit_swap16_r(ctx, lo(dst), hi(dst));
 614			emit(ctx, move, hi(dst), tmp);
 615
 616			emit(ctx, lui, tmp, 0xff);      /* tmp = 0x00ff0000 */
 617			emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
 618			emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
 619			emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
 620		}
 621		break;
 622	/* Swap bytes in a word */
 623	/* Swap bytes in a half word */
 624	case 32:
 625	case 16:
 626		emit_bswap_r(ctx, lo(dst), width);
 627		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 628		break;
 629	}
 630	clobber_reg64(ctx, dst);
 631}
 632
 633/* Truncate a register double word, word or half word */
 634static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
 635{
 636	switch (width) {
 637	case 64:
 638		break;
 639	/* Zero-extend a word */
 640	case 32:
 641		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 642		clobber_reg(ctx, hi(dst));
 643		break;
 644	/* Zero-extend a half word */
 645	case 16:
 646		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 647		emit(ctx, andi, lo(dst), lo(dst), 0xffff);
 648		clobber_reg64(ctx, dst);
 649		break;
 650	}
 651}
 652
 653/* Load operation: dst = *(size*)(src + off) */
 654static void emit_ldx(struct jit_context *ctx,
 655		     const u8 dst[], u8 src, s16 off, u8 size)
 656{
 657	switch (size) {
 658	/* Load a byte */
 659	case BPF_B:
 660		emit(ctx, lbu, lo(dst), off, src);
 661		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 662		break;
 663	/* Load a half word */
 664	case BPF_H:
 665		emit(ctx, lhu, lo(dst), off, src);
 666		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 667		break;
 668	/* Load a word */
 669	case BPF_W:
 670		emit(ctx, lw, lo(dst), off, src);
 671		emit(ctx, move, hi(dst), MIPS_R_ZERO);
 672		break;
 673	/* Load a double word */
 674	case BPF_DW:
 675		if (dst[1] == src) {
 676			emit(ctx, lw, dst[0], off + 4, src);
 677			emit(ctx, lw, dst[1], off, src);
 678		} else {
 679			emit(ctx, lw, dst[1], off, src);
 680			emit(ctx, lw, dst[0], off + 4, src);
 681		}
 682		emit_load_delay(ctx);
 683		break;
 684	}
 685	clobber_reg64(ctx, dst);
 686}
 687
 688/* Store operation: *(size *)(dst + off) = src */
 689static void emit_stx(struct jit_context *ctx,
 690		     const u8 dst, const u8 src[], s16 off, u8 size)
 691{
 692	switch (size) {
 693	/* Store a byte */
 694	case BPF_B:
 695		emit(ctx, sb, lo(src), off, dst);
 696		break;
 697	/* Store a half word */
 698	case BPF_H:
 699		emit(ctx, sh, lo(src), off, dst);
 700		break;
 701	/* Store a word */
 702	case BPF_W:
 703		emit(ctx, sw, lo(src), off, dst);
 704		break;
 705	/* Store a double word */
 706	case BPF_DW:
 707		emit(ctx, sw, src[1], off, dst);
 708		emit(ctx, sw, src[0], off + 4, dst);
 709		break;
 710	}
 711}
 712
 713/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
 714static void emit_atomic_r32(struct jit_context *ctx,
 715			    u8 dst, u8 src, s16 off, u8 code)
 716{
 717	u32 exclude = 0;
 718	u32 addr = 0;
 719
 720	/* Push caller-saved registers on stack */
 721	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 722		  0, JIT_RESERVED_STACK);
 723	/*
 724	 * Argument 1: dst+off if xchg, otherwise src, passed in register a0
 725	 * Argument 2: src if xchg, otherwise dst+off, passed in register a1
 726	 */
 727	emit(ctx, move, MIPS_R_T9, dst);
 728	if (code == BPF_XCHG) {
 729		emit(ctx, move, MIPS_R_A1, src);
 730		emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off);
 731	} else {
 732		emit(ctx, move, MIPS_R_A0, src);
 733		emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
 734	}
 735
 736	/* Emit function call */
 737	switch (code) {
 738	case BPF_ADD:
 739		addr = (u32)&atomic_add;
 740		break;
 741	case BPF_ADD | BPF_FETCH:
 742		addr = (u32)&atomic_fetch_add;
 743		break;
 744	case BPF_SUB:
 745		addr = (u32)&atomic_sub;
 746		break;
 747	case BPF_SUB | BPF_FETCH:
 748		addr = (u32)&atomic_fetch_sub;
 749		break;
 750	case BPF_OR:
 751		addr = (u32)&atomic_or;
 752		break;
 753	case BPF_OR | BPF_FETCH:
 754		addr = (u32)&atomic_fetch_or;
 755		break;
 756	case BPF_AND:
 757		addr = (u32)&atomic_and;
 758		break;
 759	case BPF_AND | BPF_FETCH:
 760		addr = (u32)&atomic_fetch_and;
 761		break;
 762	case BPF_XOR:
 763		addr = (u32)&atomic_xor;
 764		break;
 765	case BPF_XOR | BPF_FETCH:
 766		addr = (u32)&atomic_fetch_xor;
 767		break;
 768	case BPF_XCHG:
 769		addr = (u32)&atomic_xchg;
 770		break;
 771	}
 772	emit_mov_i(ctx, MIPS_R_T9, addr);
 773	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
 774	emit(ctx, nop); /* Delay slot */
 775
 776	/* Update src register with old value, if specified */
 777	if (code & BPF_FETCH) {
 778		emit(ctx, move, src, MIPS_R_V0);
 779		exclude = BIT(src);
 780		clobber_reg(ctx, src);
 781	}
 782
 783	/* Restore caller-saved registers, except any fetched value */
 784	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 785		 exclude, JIT_RESERVED_STACK);
 786	emit_load_delay(ctx);
 787	clobber_reg(ctx, MIPS_R_RA);
 788}
 789
 790/* Helper function for 64-bit atomic exchange */
 791static s64 jit_xchg64(s64 a, atomic64_t *v)
 792{
 793	return atomic64_xchg(v, a);
 794}
 795
 796/* Atomic read-modify-write (64-bit) */
 797static void emit_atomic_r64(struct jit_context *ctx,
 798			    u8 dst, const u8 src[], s16 off, u8 code)
 799{
 800	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
 801	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
 802	u32 exclude = 0;
 803	u32 addr = 0;
 804
 805	/* Push caller-saved registers on stack */
 806	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 807		  0, JIT_RESERVED_STACK);
 808	/*
 809	 * Argument 1: 64-bit src, passed in registers a0-a1
 810	 * Argument 2: 32-bit dst+off, passed in register a2
 811	 */
 812	emit(ctx, move, MIPS_R_T9, dst);
 813	emit(ctx, move, r1[0], src[0]);
 814	emit(ctx, move, r1[1], src[1]);
 815	emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
 816
 817	/* Emit function call */
 818	switch (code) {
 819	case BPF_ADD:
 820		addr = (u32)&atomic64_add;
 821		break;
 822	case BPF_ADD | BPF_FETCH:
 823		addr = (u32)&atomic64_fetch_add;
 824		break;
 825	case BPF_SUB:
 826		addr = (u32)&atomic64_sub;
 827		break;
 828	case BPF_SUB | BPF_FETCH:
 829		addr = (u32)&atomic64_fetch_sub;
 830		break;
 831	case BPF_OR:
 832		addr = (u32)&atomic64_or;
 833		break;
 834	case BPF_OR | BPF_FETCH:
 835		addr = (u32)&atomic64_fetch_or;
 836		break;
 837	case BPF_AND:
 838		addr = (u32)&atomic64_and;
 839		break;
 840	case BPF_AND | BPF_FETCH:
 841		addr = (u32)&atomic64_fetch_and;
 842		break;
 843	case BPF_XOR:
 844		addr = (u32)&atomic64_xor;
 845		break;
 846	case BPF_XOR | BPF_FETCH:
 847		addr = (u32)&atomic64_fetch_xor;
 848		break;
 849	case BPF_XCHG:
 850		addr = (u32)&jit_xchg64;
 851		break;
 852	}
 853	emit_mov_i(ctx, MIPS_R_T9, addr);
 854	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
 855	emit(ctx, nop); /* Delay slot */
 856
 857	/* Update src register with old value, if specified */
 858	if (code & BPF_FETCH) {
 859		emit(ctx, move, lo(src), lo(r0));
 860		emit(ctx, move, hi(src), hi(r0));
 861		exclude = BIT(src[0]) | BIT(src[1]);
 862		clobber_reg64(ctx, src);
 863	}
 864
 865	/* Restore caller-saved registers, except any fetched value */
 866	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 867		 exclude, JIT_RESERVED_STACK);
 868	emit_load_delay(ctx);
 869	clobber_reg(ctx, MIPS_R_RA);
 870}
 871
 872/* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */
 873static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off)
 874{
 875	const u8 *r0 = bpf2mips32[BPF_REG_0];
 876
 877	/* Push caller-saved registers on stack */
 878	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 879		  JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
 880	/*
 881	 * Argument 1: 32-bit dst+off, passed in register a0
 882	 * Argument 2: 32-bit r0, passed in register a1
 883	 * Argument 3: 32-bit src, passed in register a2
 884	 */
 885	emit(ctx, addiu, MIPS_R_T9, dst, off);
 886	emit(ctx, move, MIPS_R_T8, src);
 887	emit(ctx, move, MIPS_R_A1, lo(r0));
 888	emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
 889	emit(ctx, move, MIPS_R_A2, MIPS_R_T8);
 890
 891	/* Emit function call */
 892	emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg);
 893	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
 894	emit(ctx, nop); /* Delay slot */
 895
 896#ifdef __BIG_ENDIAN
 897	emit(ctx, move, lo(r0), MIPS_R_V0);
 898#endif
 899	/* Restore caller-saved registers, except the return value */
 900	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 901		 JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
 902	emit_load_delay(ctx);
 903	clobber_reg(ctx, MIPS_R_V0);
 904	clobber_reg(ctx, MIPS_R_V1);
 905	clobber_reg(ctx, MIPS_R_RA);
 906}
 907
 908/* Atomic compare-and-exchange (64-bit) */
 909static void emit_cmpxchg_r64(struct jit_context *ctx,
 910			     u8 dst, const u8 src[], s16 off)
 911{
 912	const u8 *r0 = bpf2mips32[BPF_REG_0];
 913	const u8 *r2 = bpf2mips32[BPF_REG_2];
 914
 915	/* Push caller-saved registers on stack */
 916	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 917		  JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
 918	/*
 919	 * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused)
 920	 * Argument 2: 64-bit r0, passed in registers a2-a3
 921	 * Argument 3: 64-bit src, passed on stack
 922	 */
 923	push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK);
 924	emit(ctx, addiu, MIPS_R_T9, dst, off);
 925	emit(ctx, move, r2[0], r0[0]);
 926	emit(ctx, move, r2[1], r0[1]);
 927	emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
 928
 929	/* Emit function call */
 930	emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg);
 931	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
 932	emit(ctx, nop); /* Delay slot */
 933
 934	/* Restore caller-saved registers, except the return value */
 935	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
 936		 JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
 937	emit_load_delay(ctx);
 938	clobber_reg(ctx, MIPS_R_V0);
 939	clobber_reg(ctx, MIPS_R_V1);
 940	clobber_reg(ctx, MIPS_R_RA);
 941}
 942
 943/*
 944 * Conditional movz or an emulated equivalent.
 945 * Note that the rs register may be modified.
 946 */
 947static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
 948{
 949	if (cpu_has_mips_2) {
 950		emit(ctx, movz, rd, rs, rt);           /* rd = rt ? rd : rs  */
 951	} else if (cpu_has_mips32r6) {
 952		if (rs != MIPS_R_ZERO)
 953			emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0  */
 954		emit(ctx, selnez, rd, rd, rt);         /* rd = 0 if rt != 0  */
 955		if (rs != MIPS_R_ZERO)
 956			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
 957	} else {
 958		emit(ctx, bnez, rt, 8);                /* PC += 8 if rd != 0 */
 959		emit(ctx, nop);                        /* +0: delay slot     */
 960		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
 961	}
 962	clobber_reg(ctx, rd);
 963	clobber_reg(ctx, rs);
 964}
 965
 966/*
 967 * Conditional movn or an emulated equivalent.
 968 * Note that the rs register may be modified.
 969 */
 970static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
 971{
 972	if (cpu_has_mips_2) {
 973		emit(ctx, movn, rd, rs, rt);           /* rd = rt ? rs : rd  */
 974	} else if (cpu_has_mips32r6) {
 975		if (rs != MIPS_R_ZERO)
 976			emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0  */
 977		emit(ctx, seleqz, rd, rd, rt);         /* rd = 0 if rt != 0  */
 978		if (rs != MIPS_R_ZERO)
 979			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
 980	} else {
 981		emit(ctx, beqz, rt, 8);                /* PC += 8 if rd == 0 */
 982		emit(ctx, nop);                        /* +0: delay slot     */
 983		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
 984	}
 985	clobber_reg(ctx, rd);
 986	clobber_reg(ctx, rs);
 987}
 988
 989/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
 990static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
 991			   const u8 rs[], s64 imm)
 992{
 993	u8 tmp = MIPS_R_T9;
 994
 995	if (imm < 0) {
 996		emit_mov_i(ctx, rd, imm);                 /* rd = imm        */
 997		emit(ctx, sltu, rd, lo(rs), rd);          /* rd = rsl < rd   */
 998		emit(ctx, sltiu, tmp, hi(rs), -1);        /* tmp = rsh < ~0U */
 999		emit(ctx, or, rd, rd, tmp);               /* rd = rd | tmp   */
1000	} else { /* imm >= 0 */
1001		if (imm > 0x7fff) {
1002			emit_mov_i(ctx, rd, (s32)imm);     /* rd = imm       */
1003			emit(ctx, sltu, rd, lo(rs), rd);   /* rd = rsl < rd  */
1004		} else {
1005			emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
1006		}
1007		emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh  */
1008	}
1009}
1010
1011/* Emulation of 64-bit sltu rd, rs, rt */
1012static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
1013			  const u8 rs[], const u8 rt[])
1014{
1015	u8 tmp = MIPS_R_T9;
1016
1017	emit(ctx, sltu, rd, lo(rs), lo(rt));           /* rd = rsl < rtl     */
1018	emit(ctx, subu, tmp, hi(rs), hi(rt));          /* tmp = rsh - rth    */
1019	emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp);        /* rd = 0 if tmp != 0 */
1020	emit(ctx, sltu, tmp, hi(rs), hi(rt));          /* tmp = rsh < rth    */
1021	emit(ctx, or, rd, rd, tmp);                    /* rd = rd | tmp      */
1022}
1023
1024/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
1025static void emit_slti_r64(struct jit_context *ctx, u8 rd,
1026			  const u8 rs[], s64 imm)
1027{
1028	u8 t1 = MIPS_R_T8;
1029	u8 t2 = MIPS_R_T9;
1030	u8 cmp;
1031
1032	/*
1033	 * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
1034	 * else                      t1 = rsl <u imm
1035	 */
1036	emit_mov_i(ctx, rd, (s32)imm);
1037	emit(ctx, sltu, t1, lo(rs), rd);               /* t1 = rsl <u imm   */
1038	emit(ctx, sltu, t2, rd, lo(rs));               /* t2 = imm <u rsl   */
1039	emit(ctx, srl, rd, hi(rs), 31);                /* rd = rsh >> 31    */
1040	if (imm < 0)
1041		emit_movz_r(ctx, t1, t2, rd);          /* t1 = rd ? t1 : t2 */
1042	else
1043		emit_movn_r(ctx, t1, t2, rd);          /* t1 = rd ? t2 : t1 */
1044	/*
1045	 * if ((imm < 0 && rsh != 0xffffffff) ||
1046	 *     (imm >= 0 && rsh != 0))
1047	 *      t1 = 0
1048	 */
1049	if (imm < 0) {
1050		emit(ctx, addiu, rd, hi(rs), 1);       /* rd = rsh + 1 */
1051		cmp = rd;
1052	} else { /* imm >= 0 */
1053		cmp = hi(rs);
1054	}
1055	emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp);        /* t1 = 0 if cmp != 0 */
1056
1057	/*
1058	 * if (imm < 0) rd = rsh < -1
1059	 * else         rd = rsh != 0
1060	 * rd = rd | t1
1061	 */
1062	emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
1063	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1       */
1064}
1065
1066/* Emulation of 64-bit(slt rd, rs, rt) */
1067static void emit_slt_r64(struct jit_context *ctx, u8 rd,
1068			 const u8 rs[], const u8 rt[])
1069{
1070	u8 t1 = MIPS_R_T7;
1071	u8 t2 = MIPS_R_T8;
1072	u8 t3 = MIPS_R_T9;
1073
1074	/*
1075	 * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
1076	 * else                     t1 = rsl <u rtl
1077	 * if (rsh == rth)          t1 = 0
1078	 */
1079	emit(ctx, sltu, t1, lo(rs), lo(rt));           /* t1 = rsl <u rtl   */
1080	emit(ctx, sltu, t2, lo(rt), lo(rs));           /* t2 = rtl <u rsl   */
1081	emit(ctx, xor, t3, hi(rs), hi(rt));            /* t3 = rlh ^ rth    */
1082	emit(ctx, srl, rd, t3, 31);                    /* rd = t3 >> 31     */
1083	emit_movn_r(ctx, t1, t2, rd);                  /* t1 = rd ? t2 : t1 */
1084	emit_movn_r(ctx, t1, MIPS_R_ZERO, t3);         /* t1 = 0 if t3 != 0 */
1085
1086	/* rd = (rsh < rth) | t1 */
1087	emit(ctx, slt, rd, hi(rs), hi(rt));            /* rd = rsh <s rth   */
1088	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1      */
1089}
1090
1091/* Jump immediate (64-bit) */
1092static void emit_jmp_i64(struct jit_context *ctx,
1093			 const u8 dst[], s32 imm, s32 off, u8 op)
1094{
1095	u8 tmp = MIPS_R_T6;
1096
1097	switch (op) {
1098	/* No-op, used internally for branch optimization */
1099	case JIT_JNOP:
1100		break;
1101	/* PC += off if dst == imm */
1102	/* PC += off if dst != imm */
1103	case BPF_JEQ:
1104	case BPF_JNE:
1105		if (imm >= -0x7fff && imm <= 0x8000) {
1106			emit(ctx, addiu, tmp, lo(dst), -imm);
1107		} else if ((u32)imm <= 0xffff) {
1108			emit(ctx, xori, tmp, lo(dst), imm);
1109		} else {       /* Register fallback */
1110			emit_mov_i(ctx, tmp, imm);
1111			emit(ctx, xor, tmp, lo(dst), tmp);
1112		}
1113		if (imm < 0) { /* Compare sign extension */
1114			emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
1115			emit(ctx, or, tmp, tmp, MIPS_R_T9);
1116		} else {       /* Compare zero extension */
1117			emit(ctx, or, tmp, tmp, hi(dst));
1118		}
1119		if (op == BPF_JEQ)
1120			emit(ctx, beqz, tmp, off);
1121		else   /* BPF_JNE */
1122			emit(ctx, bnez, tmp, off);
1123		break;
1124	/* PC += off if dst & imm */
1125	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1126	case BPF_JSET:
1127	case JIT_JNSET:
1128		if ((u32)imm <= 0xffff) {
1129			emit(ctx, andi, tmp, lo(dst), imm);
1130		} else {     /* Register fallback */
1131			emit_mov_i(ctx, tmp, imm);
1132			emit(ctx, and, tmp, lo(dst), tmp);
1133		}
1134		if (imm < 0) /* Sign-extension pulls in high word */
1135			emit(ctx, or, tmp, tmp, hi(dst));
1136		if (op == BPF_JSET)
1137			emit(ctx, bnez, tmp, off);
1138		else   /* JIT_JNSET */
1139			emit(ctx, beqz, tmp, off);
1140		break;
1141	/* PC += off if dst > imm */
1142	case BPF_JGT:
1143		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1144		emit(ctx, beqz, tmp, off);
1145		break;
1146	/* PC += off if dst >= imm */
1147	case BPF_JGE:
1148		emit_sltiu_r64(ctx, tmp, dst, imm);
1149		emit(ctx, beqz, tmp, off);
1150		break;
1151	/* PC += off if dst < imm */
1152	case BPF_JLT:
1153		emit_sltiu_r64(ctx, tmp, dst, imm);
1154		emit(ctx, bnez, tmp, off);
1155		break;
1156	/* PC += off if dst <= imm */
1157	case BPF_JLE:
1158		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1159		emit(ctx, bnez, tmp, off);
1160		break;
1161	/* PC += off if dst > imm (signed) */
1162	case BPF_JSGT:
1163		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1164		emit(ctx, beqz, tmp, off);
1165		break;
1166	/* PC += off if dst >= imm (signed) */
1167	case BPF_JSGE:
1168		emit_slti_r64(ctx, tmp, dst, imm);
1169		emit(ctx, beqz, tmp, off);
1170		break;
1171	/* PC += off if dst < imm (signed) */
1172	case BPF_JSLT:
1173		emit_slti_r64(ctx, tmp, dst, imm);
1174		emit(ctx, bnez, tmp, off);
1175		break;
1176	/* PC += off if dst <= imm (signed) */
1177	case BPF_JSLE:
1178		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1179		emit(ctx, bnez, tmp, off);
1180		break;
1181	}
1182}
1183
1184/* Jump register (64-bit) */
1185static void emit_jmp_r64(struct jit_context *ctx,
1186			 const u8 dst[], const u8 src[], s32 off, u8 op)
1187{
1188	u8 t1 = MIPS_R_T6;
1189	u8 t2 = MIPS_R_T7;
1190
1191	switch (op) {
1192	/* No-op, used internally for branch optimization */
1193	case JIT_JNOP:
1194		break;
1195	/* PC += off if dst == src */
1196	/* PC += off if dst != src */
1197	case BPF_JEQ:
1198	case BPF_JNE:
1199		emit(ctx, subu, t1, lo(dst), lo(src));
1200		emit(ctx, subu, t2, hi(dst), hi(src));
1201		emit(ctx, or, t1, t1, t2);
1202		if (op == BPF_JEQ)
1203			emit(ctx, beqz, t1, off);
1204		else   /* BPF_JNE */
1205			emit(ctx, bnez, t1, off);
1206		break;
1207	/* PC += off if dst & src */
1208	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1209	case BPF_JSET:
1210	case JIT_JNSET:
1211		emit(ctx, and, t1, lo(dst), lo(src));
1212		emit(ctx, and, t2, hi(dst), hi(src));
1213		emit(ctx, or, t1, t1, t2);
1214		if (op == BPF_JSET)
1215			emit(ctx, bnez, t1, off);
1216		else   /* JIT_JNSET */
1217			emit(ctx, beqz, t1, off);
1218		break;
1219	/* PC += off if dst > src */
1220	case BPF_JGT:
1221		emit_sltu_r64(ctx, t1, src, dst);
1222		emit(ctx, bnez, t1, off);
1223		break;
1224	/* PC += off if dst >= src */
1225	case BPF_JGE:
1226		emit_sltu_r64(ctx, t1, dst, src);
1227		emit(ctx, beqz, t1, off);
1228		break;
1229	/* PC += off if dst < src */
1230	case BPF_JLT:
1231		emit_sltu_r64(ctx, t1, dst, src);
1232		emit(ctx, bnez, t1, off);
1233		break;
1234	/* PC += off if dst <= src */
1235	case BPF_JLE:
1236		emit_sltu_r64(ctx, t1, src, dst);
1237		emit(ctx, beqz, t1, off);
1238		break;
1239	/* PC += off if dst > src (signed) */
1240	case BPF_JSGT:
1241		emit_slt_r64(ctx, t1, src, dst);
1242		emit(ctx, bnez, t1, off);
1243		break;
1244	/* PC += off if dst >= src (signed) */
1245	case BPF_JSGE:
1246		emit_slt_r64(ctx, t1, dst, src);
1247		emit(ctx, beqz, t1, off);
1248		break;
1249	/* PC += off if dst < src (signed) */
1250	case BPF_JSLT:
1251		emit_slt_r64(ctx, t1, dst, src);
1252		emit(ctx, bnez, t1, off);
1253		break;
1254	/* PC += off if dst <= src (signed) */
1255	case BPF_JSLE:
1256		emit_slt_r64(ctx, t1, src, dst);
1257		emit(ctx, beqz, t1, off);
1258		break;
1259	}
1260}
1261
1262/* Function call */
1263static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
1264{
1265	bool fixed;
1266	u64 addr;
1267
1268	/* Decode the call address */
1269	if (bpf_jit_get_func_addr(ctx->program, insn, false,
1270				  &addr, &fixed) < 0)
1271		return -1;
1272	if (!fixed)
1273		return -1;
1274
1275	/* Push stack arguments */
1276	push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
1277
1278	/* Emit function call */
1279	emit_mov_i(ctx, MIPS_R_T9, addr);
1280	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
1281	emit(ctx, nop); /* Delay slot */
1282
1283	clobber_reg(ctx, MIPS_R_RA);
1284	clobber_reg(ctx, MIPS_R_V0);
1285	clobber_reg(ctx, MIPS_R_V1);
1286	return 0;
1287}
1288
1289/* Function tail call */
1290static int emit_tail_call(struct jit_context *ctx)
1291{
1292	u8 ary = lo(bpf2mips32[BPF_REG_2]);
1293	u8 ind = lo(bpf2mips32[BPF_REG_3]);
1294	u8 t1 = MIPS_R_T8;
1295	u8 t2 = MIPS_R_T9;
1296	int off;
1297
1298	/*
1299	 * Tail call:
1300	 * eBPF R1   - function argument (context ptr), passed in a0-a1
1301	 * eBPF R2   - ptr to object with array of function entry points
1302	 * eBPF R3   - array index of function to be called
1303	 * stack[sz] - remaining tail call count, initialized in prologue
1304	 */
1305
1306	/* if (ind >= ary->map.max_entries) goto out */
1307	off = offsetof(struct bpf_array, map.max_entries);
1308	if (off > 0x7fff)
1309		return -1;
1310	emit(ctx, lw, t1, off, ary);             /* t1 = ary->map.max_entries*/
1311	emit_load_delay(ctx);                    /* Load delay slot          */
1312	emit(ctx, sltu, t1, ind, t1);            /* t1 = ind < t1            */
1313	emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0  */
1314						 /* (next insn delay slot)   */
1315	/* if (TCC-- <= 0) goto out */
1316	emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP);  /* t2 = *(SP + size) */
1317	emit_load_delay(ctx);                     /* Load delay slot         */
1318	emit(ctx, blez, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 <= 0 */
1319	emit(ctx, addiu, t2, t2, -1);             /* t2-- (delay slot)       */
1320	emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP);  /* *(SP + size) = t2 */
1321
1322	/* prog = ary->ptrs[ind] */
1323	off = offsetof(struct bpf_array, ptrs);
1324	if (off > 0x7fff)
1325		return -1;
1326	emit(ctx, sll, t1, ind, 2);               /* t1 = ind << 2           */
1327	emit(ctx, addu, t1, t1, ary);             /* t1 += ary               */
1328	emit(ctx, lw, t2, off, t1);               /* t2 = *(t1 + off)        */
1329	emit_load_delay(ctx);                     /* Load delay slot         */
1330
1331	/* if (prog == 0) goto out */
1332	emit(ctx, beqz, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 == 0 */
1333	emit(ctx, nop);                           /* Delay slot              */
1334
1335	/* func = prog->bpf_func + 8 (prologue skip offset) */
1336	off = offsetof(struct bpf_prog, bpf_func);
1337	if (off > 0x7fff)
1338		return -1;
1339	emit(ctx, lw, t1, off, t2);                /* t1 = *(t2 + off)       */
1340	emit_load_delay(ctx);                      /* Load delay slot        */
1341	emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP);  /* t1 += skip (8 or 12)   */
1342
1343	/* goto func */
1344	build_epilogue(ctx, t1);
1345	return 0;
1346}
1347
1348/*
1349 * Stack frame layout for a JITed program (stack grows down).
1350 *
1351 * Higher address  : Caller's stack frame       :
1352 *                 :----------------------------:
1353 *                 : 64-bit eBPF args r3-r5     :
1354 *                 :----------------------------:
1355 *                 : Reserved / tail call count :
1356 *                 +============================+  <--- MIPS sp before call
1357 *                 | Callee-saved registers,    |
1358 *                 | including RA and FP        |
1359 *                 +----------------------------+  <--- eBPF FP (MIPS zero,fp)
1360 *                 | Local eBPF variables       |
1361 *                 | allocated by program       |
1362 *                 +----------------------------+
1363 *                 | Reserved for caller-saved  |
1364 *                 | registers                  |
1365 *                 +----------------------------+
1366 *                 | Reserved for 64-bit eBPF   |
1367 *                 | args r3-r5 & args passed   |
1368 *                 | on stack in kernel calls   |
1369 * Lower address   +============================+  <--- MIPS sp
1370 */
1371
1372/* Build program prologue to set up the stack and registers */
1373void build_prologue(struct jit_context *ctx)
1374{
1375	const u8 *r1 = bpf2mips32[BPF_REG_1];
1376	const u8 *fp = bpf2mips32[BPF_REG_FP];
1377	int stack, saved, locals, reserved;
1378
1379	/*
1380	 * In the unlikely event that the TCC limit is raised to more
1381	 * than 16 bits, it is clamped to the maximum value allowed for
1382	 * the generated code (0xffff). It is better fail to compile
1383	 * instead of degrading gracefully.
1384	 */
1385	BUILD_BUG_ON(MAX_TAIL_CALL_CNT > 0xffff);
1386
1387	/*
1388	 * The first two instructions initialize TCC in the reserved (for us)
1389	 * 16-byte area in the parent's stack frame. On a tail call, the
1390	 * calling function jumps into the prologue after these instructions.
1391	 */
1392	emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
1393	emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
1394
1395	/*
1396	 * Register eBPF R1 contains the 32-bit context pointer argument.
1397	 * A 32-bit argument is always passed in MIPS register a0, regardless
1398	 * of CPU endianness. Initialize R1 accordingly and zero-extend.
1399	 */
1400#ifdef __BIG_ENDIAN
1401	emit(ctx, move, lo(r1), MIPS_R_A0);
1402#endif
1403
1404	/* === Entry-point for tail calls === */
1405
1406	/* Zero-extend the 32-bit argument */
1407	emit(ctx, move, hi(r1), MIPS_R_ZERO);
1408
1409	/* If the eBPF frame pointer was accessed it must be saved */
1410	if (ctx->accessed & BIT(BPF_REG_FP))
1411		clobber_reg64(ctx, fp);
1412
1413	/* Compute the stack space needed for callee-saved registers */
1414	saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
1415	saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
1416
1417	/* Stack space used by eBPF program local data */
1418	locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
1419
1420	/*
1421	 * If we are emitting function calls, reserve extra stack space for
1422	 * caller-saved registers and function arguments passed on the stack.
1423	 * The required space is computed automatically during resource
1424	 * usage discovery (pass 1).
1425	 */
1426	reserved = ctx->stack_used;
1427
1428	/* Allocate the stack frame */
1429	stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
1430	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
1431
1432	/* Store callee-saved registers on stack */
1433	push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
1434
1435	/* Initialize the eBPF frame pointer if accessed */
1436	if (ctx->accessed & BIT(BPF_REG_FP))
1437		emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
1438
1439	ctx->saved_size = saved;
1440	ctx->stack_size = stack;
1441}
1442
1443/* Build the program epilogue to restore the stack and registers */
1444void build_epilogue(struct jit_context *ctx, int dest_reg)
1445{
1446	/* Restore callee-saved registers from stack */
1447	pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
1448		 ctx->stack_size - ctx->saved_size);
1449	/*
1450	 * A 32-bit return value is always passed in MIPS register v0,
1451	 * but on big-endian targets the low part of R0 is mapped to v1.
1452	 */
1453#ifdef __BIG_ENDIAN
1454	emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
1455#endif
1456
1457	/* Jump to the return address and adjust the stack pointer */
1458	emit(ctx, jr, dest_reg);
1459	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
1460}
1461
1462/* Build one eBPF instruction */
1463int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
1464{
1465	const u8 *dst = bpf2mips32[insn->dst_reg];
1466	const u8 *src = bpf2mips32[insn->src_reg];
1467	const u8 *res = bpf2mips32[BPF_REG_0];
1468	const u8 *tmp = bpf2mips32[JIT_REG_TMP];
1469	u8 code = insn->code;
1470	s16 off = insn->off;
1471	s32 imm = insn->imm;
1472	s32 val, rel;
1473	u8 alu, jmp;
1474
1475	switch (code) {
1476	/* ALU operations */
1477	/* dst = imm */
1478	case BPF_ALU | BPF_MOV | BPF_K:
1479		emit_mov_i(ctx, lo(dst), imm);
1480		emit_zext_ver(ctx, dst);
1481		break;
1482	/* dst = src */
1483	case BPF_ALU | BPF_MOV | BPF_X:
1484		if (imm == 1) {
1485			/* Special mov32 for zext */
1486			emit_mov_i(ctx, hi(dst), 0);
1487		} else {
1488			emit_mov_r(ctx, lo(dst), lo(src));
1489			emit_zext_ver(ctx, dst);
1490		}
1491		break;
1492	/* dst = -dst */
1493	case BPF_ALU | BPF_NEG:
1494		emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
1495		emit_zext_ver(ctx, dst);
1496		break;
1497	/* dst = dst & imm */
1498	/* dst = dst | imm */
1499	/* dst = dst ^ imm */
1500	/* dst = dst << imm */
1501	/* dst = dst >> imm */
1502	/* dst = dst >> imm (arithmetic) */
1503	/* dst = dst + imm */
1504	/* dst = dst - imm */
1505	/* dst = dst * imm */
1506	/* dst = dst / imm */
1507	/* dst = dst % imm */
1508	case BPF_ALU | BPF_OR | BPF_K:
1509	case BPF_ALU | BPF_AND | BPF_K:
1510	case BPF_ALU | BPF_XOR | BPF_K:
1511	case BPF_ALU | BPF_LSH | BPF_K:
1512	case BPF_ALU | BPF_RSH | BPF_K:
1513	case BPF_ALU | BPF_ARSH | BPF_K:
1514	case BPF_ALU | BPF_ADD | BPF_K:
1515	case BPF_ALU | BPF_SUB | BPF_K:
1516	case BPF_ALU | BPF_MUL | BPF_K:
1517	case BPF_ALU | BPF_DIV | BPF_K:
1518	case BPF_ALU | BPF_MOD | BPF_K:
1519		if (!valid_alu_i(BPF_OP(code), imm)) {
1520			emit_mov_i(ctx, MIPS_R_T6, imm);
1521			emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
1522		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
1523			emit_alu_i(ctx, lo(dst), val, alu);
1524		}
1525		emit_zext_ver(ctx, dst);
1526		break;
1527	/* dst = dst & src */
1528	/* dst = dst | src */
1529	/* dst = dst ^ src */
1530	/* dst = dst << src */
1531	/* dst = dst >> src */
1532	/* dst = dst >> src (arithmetic) */
1533	/* dst = dst + src */
1534	/* dst = dst - src */
1535	/* dst = dst * src */
1536	/* dst = dst / src */
1537	/* dst = dst % src */
1538	case BPF_ALU | BPF_AND | BPF_X:
1539	case BPF_ALU | BPF_OR | BPF_X:
1540	case BPF_ALU | BPF_XOR | BPF_X:
1541	case BPF_ALU | BPF_LSH | BPF_X:
1542	case BPF_ALU | BPF_RSH | BPF_X:
1543	case BPF_ALU | BPF_ARSH | BPF_X:
1544	case BPF_ALU | BPF_ADD | BPF_X:
1545	case BPF_ALU | BPF_SUB | BPF_X:
1546	case BPF_ALU | BPF_MUL | BPF_X:
1547	case BPF_ALU | BPF_DIV | BPF_X:
1548	case BPF_ALU | BPF_MOD | BPF_X:
1549		emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
1550		emit_zext_ver(ctx, dst);
1551		break;
1552	/* dst = imm (64-bit) */
1553	case BPF_ALU64 | BPF_MOV | BPF_K:
1554		emit_mov_se_i64(ctx, dst, imm);
1555		break;
1556	/* dst = src (64-bit) */
1557	case BPF_ALU64 | BPF_MOV | BPF_X:
1558		emit_mov_r(ctx, lo(dst), lo(src));
1559		emit_mov_r(ctx, hi(dst), hi(src));
1560		break;
1561	/* dst = -dst (64-bit) */
1562	case BPF_ALU64 | BPF_NEG:
1563		emit_neg_i64(ctx, dst);
1564		break;
1565	/* dst = dst & imm (64-bit) */
1566	case BPF_ALU64 | BPF_AND | BPF_K:
1567		emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1568		break;
1569	/* dst = dst | imm (64-bit) */
1570	/* dst = dst ^ imm (64-bit) */
1571	/* dst = dst + imm (64-bit) */
1572	/* dst = dst - imm (64-bit) */
1573	case BPF_ALU64 | BPF_OR | BPF_K:
1574	case BPF_ALU64 | BPF_XOR | BPF_K:
1575	case BPF_ALU64 | BPF_ADD | BPF_K:
1576	case BPF_ALU64 | BPF_SUB | BPF_K:
1577		if (imm)
1578			emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1579		break;
1580	/* dst = dst << imm (64-bit) */
1581	/* dst = dst >> imm (64-bit) */
1582	/* dst = dst >> imm (64-bit, arithmetic) */
1583	case BPF_ALU64 | BPF_LSH | BPF_K:
1584	case BPF_ALU64 | BPF_RSH | BPF_K:
1585	case BPF_ALU64 | BPF_ARSH | BPF_K:
1586		if (imm)
1587			emit_shift_i64(ctx, dst, imm, BPF_OP(code));
1588		break;
1589	/* dst = dst * imm (64-bit) */
1590	case BPF_ALU64 | BPF_MUL | BPF_K:
1591		emit_mul_i64(ctx, dst, imm);
1592		break;
1593	/* dst = dst / imm (64-bit) */
1594	/* dst = dst % imm (64-bit) */
1595	case BPF_ALU64 | BPF_DIV | BPF_K:
1596	case BPF_ALU64 | BPF_MOD | BPF_K:
1597		/*
1598		 * Sign-extend the immediate value into a temporary register,
1599		 * and then do the operation on this register.
1600		 */
1601		emit_mov_se_i64(ctx, tmp, imm);
1602		emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
1603		break;
1604	/* dst = dst & src (64-bit) */
1605	/* dst = dst | src (64-bit) */
1606	/* dst = dst ^ src (64-bit) */
1607	/* dst = dst + src (64-bit) */
1608	/* dst = dst - src (64-bit) */
1609	case BPF_ALU64 | BPF_AND | BPF_X:
1610	case BPF_ALU64 | BPF_OR | BPF_X:
1611	case BPF_ALU64 | BPF_XOR | BPF_X:
1612	case BPF_ALU64 | BPF_ADD | BPF_X:
1613	case BPF_ALU64 | BPF_SUB | BPF_X:
1614		emit_alu_r64(ctx, dst, src, BPF_OP(code));
1615		break;
1616	/* dst = dst << src (64-bit) */
1617	/* dst = dst >> src (64-bit) */
1618	/* dst = dst >> src (64-bit, arithmetic) */
1619	case BPF_ALU64 | BPF_LSH | BPF_X:
1620	case BPF_ALU64 | BPF_RSH | BPF_X:
1621	case BPF_ALU64 | BPF_ARSH | BPF_X:
1622		emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
1623		break;
1624	/* dst = dst * src (64-bit) */
1625	case BPF_ALU64 | BPF_MUL | BPF_X:
1626		emit_mul_r64(ctx, dst, src);
1627		break;
1628	/* dst = dst / src (64-bit) */
1629	/* dst = dst % src (64-bit) */
1630	case BPF_ALU64 | BPF_DIV | BPF_X:
1631	case BPF_ALU64 | BPF_MOD | BPF_X:
1632		emit_divmod_r64(ctx, dst, src, BPF_OP(code));
1633		break;
1634	/* dst = htole(dst) */
1635	/* dst = htobe(dst) */
1636	case BPF_ALU | BPF_END | BPF_FROM_LE:
1637	case BPF_ALU | BPF_END | BPF_FROM_BE:
1638		if (BPF_SRC(code) ==
1639#ifdef __BIG_ENDIAN
1640		    BPF_FROM_LE
1641#else
1642		    BPF_FROM_BE
1643#endif
1644		    )
1645			emit_bswap_r64(ctx, dst, imm);
1646		else
1647			emit_trunc_r64(ctx, dst, imm);
1648		break;
1649	/* dst = imm64 */
1650	case BPF_LD | BPF_IMM | BPF_DW:
1651		emit_mov_i(ctx, lo(dst), imm);
1652		emit_mov_i(ctx, hi(dst), insn[1].imm);
1653		return 1;
1654	/* LDX: dst = *(size *)(src + off) */
1655	case BPF_LDX | BPF_MEM | BPF_W:
1656	case BPF_LDX | BPF_MEM | BPF_H:
1657	case BPF_LDX | BPF_MEM | BPF_B:
1658	case BPF_LDX | BPF_MEM | BPF_DW:
1659		emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
1660		break;
1661	/* ST: *(size *)(dst + off) = imm */
1662	case BPF_ST | BPF_MEM | BPF_W:
1663	case BPF_ST | BPF_MEM | BPF_H:
1664	case BPF_ST | BPF_MEM | BPF_B:
1665	case BPF_ST | BPF_MEM | BPF_DW:
1666		switch (BPF_SIZE(code)) {
1667		case BPF_DW:
1668			/* Sign-extend immediate value into temporary reg */
1669			emit_mov_se_i64(ctx, tmp, imm);
1670			break;
1671		case BPF_W:
1672		case BPF_H:
1673		case BPF_B:
1674			emit_mov_i(ctx, lo(tmp), imm);
1675			break;
1676		}
1677		emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
1678		break;
1679	/* STX: *(size *)(dst + off) = src */
1680	case BPF_STX | BPF_MEM | BPF_W:
1681	case BPF_STX | BPF_MEM | BPF_H:
1682	case BPF_STX | BPF_MEM | BPF_B:
1683	case BPF_STX | BPF_MEM | BPF_DW:
1684		emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
1685		break;
1686	/* Speculation barrier */
1687	case BPF_ST | BPF_NOSPEC:
1688		break;
1689	/* Atomics */
1690	case BPF_STX | BPF_ATOMIC | BPF_W:
1691		switch (imm) {
1692		case BPF_ADD:
1693		case BPF_ADD | BPF_FETCH:
1694		case BPF_AND:
1695		case BPF_AND | BPF_FETCH:
1696		case BPF_OR:
1697		case BPF_OR | BPF_FETCH:
1698		case BPF_XOR:
1699		case BPF_XOR | BPF_FETCH:
1700		case BPF_XCHG:
1701			if (cpu_has_llsc)
1702				emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
1703			else /* Non-ll/sc fallback */
1704				emit_atomic_r32(ctx, lo(dst), lo(src),
1705						off, imm);
1706			if (imm & BPF_FETCH)
1707				emit_zext_ver(ctx, src);
1708			break;
1709		case BPF_CMPXCHG:
1710			if (cpu_has_llsc)
1711				emit_cmpxchg_r(ctx, lo(dst), lo(src),
1712					       lo(res), off);
1713			else /* Non-ll/sc fallback */
1714				emit_cmpxchg_r32(ctx, lo(dst), lo(src), off);
1715			/* Result zero-extension inserted by verifier */
1716			break;
1717		default:
1718			goto notyet;
1719		}
1720		break;
1721	/* Atomics (64-bit) */
1722	case BPF_STX | BPF_ATOMIC | BPF_DW:
1723		switch (imm) {
1724		case BPF_ADD:
1725		case BPF_ADD | BPF_FETCH:
1726		case BPF_AND:
1727		case BPF_AND | BPF_FETCH:
1728		case BPF_OR:
1729		case BPF_OR | BPF_FETCH:
1730		case BPF_XOR:
1731		case BPF_XOR | BPF_FETCH:
1732		case BPF_XCHG:
1733			emit_atomic_r64(ctx, lo(dst), src, off, imm);
1734			break;
1735		case BPF_CMPXCHG:
1736			emit_cmpxchg_r64(ctx, lo(dst), src, off);
1737			break;
1738		default:
1739			goto notyet;
1740		}
1741		break;
1742	/* PC += off if dst == src */
1743	/* PC += off if dst != src */
1744	/* PC += off if dst & src */
1745	/* PC += off if dst > src */
1746	/* PC += off if dst >= src */
1747	/* PC += off if dst < src */
1748	/* PC += off if dst <= src */
1749	/* PC += off if dst > src (signed) */
1750	/* PC += off if dst >= src (signed) */
1751	/* PC += off if dst < src (signed) */
1752	/* PC += off if dst <= src (signed) */
1753	case BPF_JMP32 | BPF_JEQ | BPF_X:
1754	case BPF_JMP32 | BPF_JNE | BPF_X:
1755	case BPF_JMP32 | BPF_JSET | BPF_X:
1756	case BPF_JMP32 | BPF_JGT | BPF_X:
1757	case BPF_JMP32 | BPF_JGE | BPF_X:
1758	case BPF_JMP32 | BPF_JLT | BPF_X:
1759	case BPF_JMP32 | BPF_JLE | BPF_X:
1760	case BPF_JMP32 | BPF_JSGT | BPF_X:
1761	case BPF_JMP32 | BPF_JSGE | BPF_X:
1762	case BPF_JMP32 | BPF_JSLT | BPF_X:
1763	case BPF_JMP32 | BPF_JSLE | BPF_X:
1764		if (off == 0)
1765			break;
1766		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1767		emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
1768		if (finish_jmp(ctx, jmp, off) < 0)
1769			goto toofar;
1770		break;
1771	/* PC += off if dst == imm */
1772	/* PC += off if dst != imm */
1773	/* PC += off if dst & imm */
1774	/* PC += off if dst > imm */
1775	/* PC += off if dst >= imm */
1776	/* PC += off if dst < imm */
1777	/* PC += off if dst <= imm */
1778	/* PC += off if dst > imm (signed) */
1779	/* PC += off if dst >= imm (signed) */
1780	/* PC += off if dst < imm (signed) */
1781	/* PC += off if dst <= imm (signed) */
1782	case BPF_JMP32 | BPF_JEQ | BPF_K:
1783	case BPF_JMP32 | BPF_JNE | BPF_K:
1784	case BPF_JMP32 | BPF_JSET | BPF_K:
1785	case BPF_JMP32 | BPF_JGT | BPF_K:
1786	case BPF_JMP32 | BPF_JGE | BPF_K:
1787	case BPF_JMP32 | BPF_JLT | BPF_K:
1788	case BPF_JMP32 | BPF_JLE | BPF_K:
1789	case BPF_JMP32 | BPF_JSGT | BPF_K:
1790	case BPF_JMP32 | BPF_JSGE | BPF_K:
1791	case BPF_JMP32 | BPF_JSLT | BPF_K:
1792	case BPF_JMP32 | BPF_JSLE | BPF_K:
1793		if (off == 0)
1794			break;
1795		setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
1796		if (valid_jmp_i(jmp, imm)) {
1797			emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
1798		} else {
1799			/* Move large immediate to register */
1800			emit_mov_i(ctx, MIPS_R_T6, imm);
1801			emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
1802		}
1803		if (finish_jmp(ctx, jmp, off) < 0)
1804			goto toofar;
1805		break;
1806	/* PC += off if dst == src */
1807	/* PC += off if dst != src */
1808	/* PC += off if dst & src */
1809	/* PC += off if dst > src */
1810	/* PC += off if dst >= src */
1811	/* PC += off if dst < src */
1812	/* PC += off if dst <= src */
1813	/* PC += off if dst > src (signed) */
1814	/* PC += off if dst >= src (signed) */
1815	/* PC += off if dst < src (signed) */
1816	/* PC += off if dst <= src (signed) */
1817	case BPF_JMP | BPF_JEQ | BPF_X:
1818	case BPF_JMP | BPF_JNE | BPF_X:
1819	case BPF_JMP | BPF_JSET | BPF_X:
1820	case BPF_JMP | BPF_JGT | BPF_X:
1821	case BPF_JMP | BPF_JGE | BPF_X:
1822	case BPF_JMP | BPF_JLT | BPF_X:
1823	case BPF_JMP | BPF_JLE | BPF_X:
1824	case BPF_JMP | BPF_JSGT | BPF_X:
1825	case BPF_JMP | BPF_JSGE | BPF_X:
1826	case BPF_JMP | BPF_JSLT | BPF_X:
1827	case BPF_JMP | BPF_JSLE | BPF_X:
1828		if (off == 0)
1829			break;
1830		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1831		emit_jmp_r64(ctx, dst, src, rel, jmp);
1832		if (finish_jmp(ctx, jmp, off) < 0)
1833			goto toofar;
1834		break;
1835	/* PC += off if dst == imm */
1836	/* PC += off if dst != imm */
1837	/* PC += off if dst & imm */
1838	/* PC += off if dst > imm */
1839	/* PC += off if dst >= imm */
1840	/* PC += off if dst < imm */
1841	/* PC += off if dst <= imm */
1842	/* PC += off if dst > imm (signed) */
1843	/* PC += off if dst >= imm (signed) */
1844	/* PC += off if dst < imm (signed) */
1845	/* PC += off if dst <= imm (signed) */
1846	case BPF_JMP | BPF_JEQ | BPF_K:
1847	case BPF_JMP | BPF_JNE | BPF_K:
1848	case BPF_JMP | BPF_JSET | BPF_K:
1849	case BPF_JMP | BPF_JGT | BPF_K:
1850	case BPF_JMP | BPF_JGE | BPF_K:
1851	case BPF_JMP | BPF_JLT | BPF_K:
1852	case BPF_JMP | BPF_JLE | BPF_K:
1853	case BPF_JMP | BPF_JSGT | BPF_K:
1854	case BPF_JMP | BPF_JSGE | BPF_K:
1855	case BPF_JMP | BPF_JSLT | BPF_K:
1856	case BPF_JMP | BPF_JSLE | BPF_K:
1857		if (off == 0)
1858			break;
1859		setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
1860		emit_jmp_i64(ctx, dst, imm, rel, jmp);
1861		if (finish_jmp(ctx, jmp, off) < 0)
1862			goto toofar;
1863		break;
1864	/* PC += off */
1865	case BPF_JMP | BPF_JA:
1866		if (off == 0)
1867			break;
1868		if (emit_ja(ctx, off) < 0)
1869			goto toofar;
1870		break;
1871	/* Tail call */
1872	case BPF_JMP | BPF_TAIL_CALL:
1873		if (emit_tail_call(ctx) < 0)
1874			goto invalid;
1875		break;
1876	/* Function call */
1877	case BPF_JMP | BPF_CALL:
1878		if (emit_call(ctx, insn) < 0)
1879			goto invalid;
1880		break;
1881	/* Function return */
1882	case BPF_JMP | BPF_EXIT:
1883		/*
1884		 * Optimization: when last instruction is EXIT
1885		 * simply continue to epilogue.
1886		 */
1887		if (ctx->bpf_index == ctx->program->len - 1)
1888			break;
1889		if (emit_exit(ctx) < 0)
1890			goto toofar;
1891		break;
1892
1893	default:
1894invalid:
1895		pr_err_once("unknown opcode %02x\n", code);
1896		return -EINVAL;
1897notyet:
1898		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
1899		return -EFAULT;
1900toofar:
1901		pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
1902			     ctx->bpf_index, code);
1903		return -E2BIG;
1904	}
1905	return 0;
1906}