Linux Audio

Check our new training course

Loading...
Note: File does not exist in v3.5.6.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
   4 *
   5 * Author: Wang YanQing (udknight@gmail.com)
   6 * The code based on code and ideas from:
   7 * Eric Dumazet (eric.dumazet@gmail.com)
   8 * and from:
   9 * Shubham Bansal <illusionist.neo@gmail.com>
  10 */
  11
  12#include <linux/netdevice.h>
  13#include <linux/filter.h>
  14#include <linux/if_vlan.h>
  15#include <asm/cacheflush.h>
  16#include <asm/set_memory.h>
  17#include <asm/nospec-branch.h>
  18#include <linux/bpf.h>
  19
  20/*
  21 * eBPF prog stack layout:
  22 *
  23 *                         high
  24 * original ESP =>        +-----+
  25 *                        |     | callee saved registers
  26 *                        +-----+
  27 *                        | ... | eBPF JIT scratch space
  28 * BPF_FP,IA32_EBP  =>    +-----+
  29 *                        | ... | eBPF prog stack
  30 *                        +-----+
  31 *                        |RSVD | JIT scratchpad
  32 * current ESP =>         +-----+
  33 *                        |     |
  34 *                        | ... | Function call stack
  35 *                        |     |
  36 *                        +-----+
  37 *                          low
  38 *
  39 * The callee saved registers:
  40 *
  41 *                                high
  42 * original ESP =>        +------------------+ \
  43 *                        |        ebp       | |
  44 * current EBP =>         +------------------+ } callee saved registers
  45 *                        |    ebx,esi,edi   | |
  46 *                        +------------------+ /
  47 *                                low
  48 */
  49
  50static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
  51{
  52	if (len == 1)
  53		*ptr = bytes;
  54	else if (len == 2)
  55		*(u16 *)ptr = bytes;
  56	else {
  57		*(u32 *)ptr = bytes;
  58		barrier();
  59	}
  60	return ptr + len;
  61}
  62
  63#define EMIT(bytes, len) \
  64	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
  65
  66#define EMIT1(b1)		EMIT(b1, 1)
  67#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
  68#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
  69#define EMIT4(b1, b2, b3, b4)   \
  70	EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
  71
  72#define EMIT1_off32(b1, off) \
  73	do { EMIT1(b1); EMIT(off, 4); } while (0)
  74#define EMIT2_off32(b1, b2, off) \
  75	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
  76#define EMIT3_off32(b1, b2, b3, off) \
  77	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
  78#define EMIT4_off32(b1, b2, b3, b4, off) \
  79	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
  80
  81#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
  82
  83static bool is_imm8(int value)
  84{
  85	return value <= 127 && value >= -128;
  86}
  87
  88static bool is_simm32(s64 value)
  89{
  90	return value == (s64) (s32) value;
  91}
  92
  93#define STACK_OFFSET(k)	(k)
  94#define TCALL_CNT	(MAX_BPF_JIT_REG + 0)	/* Tail Call Count */
  95
  96#define IA32_EAX	(0x0)
  97#define IA32_EBX	(0x3)
  98#define IA32_ECX	(0x1)
  99#define IA32_EDX	(0x2)
 100#define IA32_ESI	(0x6)
 101#define IA32_EDI	(0x7)
 102#define IA32_EBP	(0x5)
 103#define IA32_ESP	(0x4)
 104
 105/*
 106 * List of x86 cond jumps opcodes (. + s8)
 107 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
 108 */
 109#define IA32_JB  0x72
 110#define IA32_JAE 0x73
 111#define IA32_JE  0x74
 112#define IA32_JNE 0x75
 113#define IA32_JBE 0x76
 114#define IA32_JA  0x77
 115#define IA32_JL  0x7C
 116#define IA32_JGE 0x7D
 117#define IA32_JLE 0x7E
 118#define IA32_JG  0x7F
 119
 120#define COND_JMP_OPCODE_INVALID	(0xFF)
 121
 122/*
 123 * Map eBPF registers to IA32 32bit registers or stack scratch space.
 124 *
 125 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
 126 * 2. We need two 64 bit temp registers to do complex operations on eBPF
 127 *    registers.
 128 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
 129 *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
 130 *
 131 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
 132 * registers, we have to map each eBPF registers with two IA32 32 bit regs
 133 * or scratch memory space and we have to build eBPF 64 bit register from those.
 134 *
 135 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
 136 */
 137static const u8 bpf2ia32[][2] = {
 138	/* Return value from in-kernel function, and exit value from eBPF */
 139	[BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
 140
 141	/* The arguments from eBPF program to in-kernel function */
 142	/* Stored on stack scratch space */
 143	[BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
 144	[BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
 145	[BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
 146	[BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
 147	[BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
 148
 149	/* Callee saved registers that in-kernel function will preserve */
 150	/* Stored on stack scratch space */
 151	[BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
 152	[BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
 153	[BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
 154	[BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
 155
 156	/* Read only Frame Pointer to access Stack */
 157	[BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
 158
 159	/* Temporary register for blinding constants. */
 160	[BPF_REG_AX] = {IA32_ESI, IA32_EDI},
 161
 162	/* Tail call count. Stored on stack scratch space. */
 163	[TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
 164};
 165
 166#define dst_lo	dst[0]
 167#define dst_hi	dst[1]
 168#define src_lo	src[0]
 169#define src_hi	src[1]
 170
 171#define STACK_ALIGNMENT	8
 172/*
 173 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
 174 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
 175 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
 176 */
 177#define SCRATCH_SIZE 96
 178
 179/* Total stack size used in JITed code */
 180#define _STACK_SIZE	(stack_depth + SCRATCH_SIZE)
 181
 182#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
 183
 184/* Get the offset of eBPF REGISTERs stored on scratch space. */
 185#define STACK_VAR(off) (off)
 186
 187/* Encode 'dst_reg' register into IA32 opcode 'byte' */
 188static u8 add_1reg(u8 byte, u32 dst_reg)
 189{
 190	return byte + dst_reg;
 191}
 192
 193/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
 194static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 195{
 196	return byte + dst_reg + (src_reg << 3);
 197}
 198
 199static void jit_fill_hole(void *area, unsigned int size)
 200{
 201	/* Fill whole space with int3 instructions */
 202	memset(area, 0xcc, size);
 203}
 204
 205static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
 206				   u8 **pprog)
 207{
 208	u8 *prog = *pprog;
 209	int cnt = 0;
 210
 211	if (dstk) {
 212		if (val == 0) {
 213			/* xor eax,eax */
 214			EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
 215			/* mov dword ptr [ebp+off],eax */
 216			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 217			      STACK_VAR(dst));
 218		} else {
 219			EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
 220				    STACK_VAR(dst), val);
 221		}
 222	} else {
 223		if (val == 0)
 224			EMIT2(0x33, add_2reg(0xC0, dst, dst));
 225		else
 226			EMIT2_off32(0xC7, add_1reg(0xC0, dst),
 227				    val);
 228	}
 229	*pprog = prog;
 230}
 231
 232/* dst = imm (4 bytes)*/
 233static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
 234				   bool sstk, u8 **pprog)
 235{
 236	u8 *prog = *pprog;
 237	int cnt = 0;
 238	u8 sreg = sstk ? IA32_EAX : src;
 239
 240	if (sstk)
 241		/* mov eax,dword ptr [ebp+off] */
 242		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
 243	if (dstk)
 244		/* mov dword ptr [ebp+off],eax */
 245		EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
 246	else
 247		/* mov dst,sreg */
 248		EMIT2(0x89, add_2reg(0xC0, dst, sreg));
 249
 250	*pprog = prog;
 251}
 252
 253/* dst = src */
 254static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
 255				     const u8 src[], bool dstk,
 256				     bool sstk, u8 **pprog,
 257				     const struct bpf_prog_aux *aux)
 258{
 259	emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
 260	if (is64)
 261		/* complete 8 byte move */
 262		emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
 263	else if (!aux->verifier_zext)
 264		/* zero out high 4 bytes */
 265		emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
 266}
 267
 268/* Sign extended move */
 269static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
 270				     const u32 val, bool dstk, u8 **pprog)
 271{
 272	u32 hi = 0;
 273
 274	if (is64 && (val & (1<<31)))
 275		hi = (u32)~0;
 276	emit_ia32_mov_i(dst_lo, val, dstk, pprog);
 277	emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
 278}
 279
 280/*
 281 * ALU operation (32 bit)
 282 * dst = dst * src
 283 */
 284static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
 285				   bool sstk, u8 **pprog)
 286{
 287	u8 *prog = *pprog;
 288	int cnt = 0;
 289	u8 sreg = sstk ? IA32_ECX : src;
 290
 291	if (sstk)
 292		/* mov ecx,dword ptr [ebp+off] */
 293		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
 294
 295	if (dstk)
 296		/* mov eax,dword ptr [ebp+off] */
 297		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
 298	else
 299		/* mov eax,dst */
 300		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
 301
 302
 303	EMIT2(0xF7, add_1reg(0xE0, sreg));
 304
 305	if (dstk)
 306		/* mov dword ptr [ebp+off],eax */
 307		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 308		      STACK_VAR(dst));
 309	else
 310		/* mov dst,eax */
 311		EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
 312
 313	*pprog = prog;
 314}
 315
 316static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
 317					 bool dstk, u8 **pprog,
 318					 const struct bpf_prog_aux *aux)
 319{
 320	u8 *prog = *pprog;
 321	int cnt = 0;
 322	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 323	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 324
 325	if (dstk && val != 64) {
 326		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 327		      STACK_VAR(dst_lo));
 328		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 329		      STACK_VAR(dst_hi));
 330	}
 331	switch (val) {
 332	case 16:
 333		/*
 334		 * Emit 'movzwl eax,ax' to zero extend 16-bit
 335		 * into 64 bit
 336		 */
 337		EMIT2(0x0F, 0xB7);
 338		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
 339		if (!aux->verifier_zext)
 340			/* xor dreg_hi,dreg_hi */
 341			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 342		break;
 343	case 32:
 344		if (!aux->verifier_zext)
 345			/* xor dreg_hi,dreg_hi */
 346			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 347		break;
 348	case 64:
 349		/* nop */
 350		break;
 351	}
 352
 353	if (dstk && val != 64) {
 354		/* mov dword ptr [ebp+off],dreg_lo */
 355		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 356		      STACK_VAR(dst_lo));
 357		/* mov dword ptr [ebp+off],dreg_hi */
 358		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 359		      STACK_VAR(dst_hi));
 360	}
 361	*pprog = prog;
 362}
 363
 364static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
 365				       bool dstk, u8 **pprog,
 366				       const struct bpf_prog_aux *aux)
 367{
 368	u8 *prog = *pprog;
 369	int cnt = 0;
 370	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 371	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 372
 373	if (dstk) {
 374		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 375		      STACK_VAR(dst_lo));
 376		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 377		      STACK_VAR(dst_hi));
 378	}
 379	switch (val) {
 380	case 16:
 381		/* Emit 'ror %ax, 8' to swap lower 2 bytes */
 382		EMIT1(0x66);
 383		EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
 384
 385		EMIT2(0x0F, 0xB7);
 386		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
 387
 388		if (!aux->verifier_zext)
 389			/* xor dreg_hi,dreg_hi */
 390			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 391		break;
 392	case 32:
 393		/* Emit 'bswap eax' to swap lower 4 bytes */
 394		EMIT1(0x0F);
 395		EMIT1(add_1reg(0xC8, dreg_lo));
 396
 397		if (!aux->verifier_zext)
 398			/* xor dreg_hi,dreg_hi */
 399			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 400		break;
 401	case 64:
 402		/* Emit 'bswap eax' to swap lower 4 bytes */
 403		EMIT1(0x0F);
 404		EMIT1(add_1reg(0xC8, dreg_lo));
 405
 406		/* Emit 'bswap edx' to swap lower 4 bytes */
 407		EMIT1(0x0F);
 408		EMIT1(add_1reg(0xC8, dreg_hi));
 409
 410		/* mov ecx,dreg_hi */
 411		EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
 412		/* mov dreg_hi,dreg_lo */
 413		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
 414		/* mov dreg_lo,ecx */
 415		EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
 416
 417		break;
 418	}
 419	if (dstk) {
 420		/* mov dword ptr [ebp+off],dreg_lo */
 421		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 422		      STACK_VAR(dst_lo));
 423		/* mov dword ptr [ebp+off],dreg_hi */
 424		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 425		      STACK_VAR(dst_hi));
 426	}
 427	*pprog = prog;
 428}
 429
 430/*
 431 * ALU operation (32 bit)
 432 * dst = dst (div|mod) src
 433 */
 434static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
 435				       bool dstk, bool sstk, u8 **pprog)
 436{
 437	u8 *prog = *pprog;
 438	int cnt = 0;
 439
 440	if (sstk)
 441		/* mov ecx,dword ptr [ebp+off] */
 442		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 443		      STACK_VAR(src));
 444	else if (src != IA32_ECX)
 445		/* mov ecx,src */
 446		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
 447
 448	if (dstk)
 449		/* mov eax,dword ptr [ebp+off] */
 450		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 451		      STACK_VAR(dst));
 452	else
 453		/* mov eax,dst */
 454		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
 455
 456	/* xor edx,edx */
 457	EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
 458	/* div ecx */
 459	EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
 460
 461	if (op == BPF_MOD) {
 462		if (dstk)
 463			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
 464			      STACK_VAR(dst));
 465		else
 466			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
 467	} else {
 468		if (dstk)
 469			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 470			      STACK_VAR(dst));
 471		else
 472			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
 473	}
 474	*pprog = prog;
 475}
 476
 477/*
 478 * ALU operation (32 bit)
 479 * dst = dst (shift) src
 480 */
 481static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
 482				     bool dstk, bool sstk, u8 **pprog)
 483{
 484	u8 *prog = *pprog;
 485	int cnt = 0;
 486	u8 dreg = dstk ? IA32_EAX : dst;
 487	u8 b2;
 488
 489	if (dstk)
 490		/* mov eax,dword ptr [ebp+off] */
 491		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
 492
 493	if (sstk)
 494		/* mov ecx,dword ptr [ebp+off] */
 495		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
 496	else if (src != IA32_ECX)
 497		/* mov ecx,src */
 498		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
 499
 500	switch (op) {
 501	case BPF_LSH:
 502		b2 = 0xE0; break;
 503	case BPF_RSH:
 504		b2 = 0xE8; break;
 505	case BPF_ARSH:
 506		b2 = 0xF8; break;
 507	default:
 508		return;
 509	}
 510	EMIT2(0xD3, add_1reg(b2, dreg));
 511
 512	if (dstk)
 513		/* mov dword ptr [ebp+off],dreg */
 514		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
 515	*pprog = prog;
 516}
 517
 518/*
 519 * ALU operation (32 bit)
 520 * dst = dst (op) src
 521 */
 522static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
 523				   const u8 dst, const u8 src, bool dstk,
 524				   bool sstk, u8 **pprog)
 525{
 526	u8 *prog = *pprog;
 527	int cnt = 0;
 528	u8 sreg = sstk ? IA32_EAX : src;
 529	u8 dreg = dstk ? IA32_EDX : dst;
 530
 531	if (sstk)
 532		/* mov eax,dword ptr [ebp+off] */
 533		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
 534
 535	if (dstk)
 536		/* mov eax,dword ptr [ebp+off] */
 537		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
 538
 539	switch (BPF_OP(op)) {
 540	/* dst = dst + src */
 541	case BPF_ADD:
 542		if (hi && is64)
 543			EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
 544		else
 545			EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
 546		break;
 547	/* dst = dst - src */
 548	case BPF_SUB:
 549		if (hi && is64)
 550			EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
 551		else
 552			EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
 553		break;
 554	/* dst = dst | src */
 555	case BPF_OR:
 556		EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
 557		break;
 558	/* dst = dst & src */
 559	case BPF_AND:
 560		EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
 561		break;
 562	/* dst = dst ^ src */
 563	case BPF_XOR:
 564		EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
 565		break;
 566	}
 567
 568	if (dstk)
 569		/* mov dword ptr [ebp+off],dreg */
 570		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
 571		      STACK_VAR(dst));
 572	*pprog = prog;
 573}
 574
 575/* ALU operation (64 bit) */
 576static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
 577				     const u8 dst[], const u8 src[],
 578				     bool dstk,  bool sstk,
 579				     u8 **pprog, const struct bpf_prog_aux *aux)
 580{
 581	u8 *prog = *pprog;
 582
 583	emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
 584	if (is64)
 585		emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
 586				&prog);
 587	else if (!aux->verifier_zext)
 588		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
 589	*pprog = prog;
 590}
 591
 592/*
 593 * ALU operation (32 bit)
 594 * dst = dst (op) val
 595 */
 596static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
 597				   const u8 dst, const s32 val, bool dstk,
 598				   u8 **pprog)
 599{
 600	u8 *prog = *pprog;
 601	int cnt = 0;
 602	u8 dreg = dstk ? IA32_EAX : dst;
 603	u8 sreg = IA32_EDX;
 604
 605	if (dstk)
 606		/* mov eax,dword ptr [ebp+off] */
 607		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
 608
 609	if (!is_imm8(val))
 610		/* mov edx,imm32*/
 611		EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
 612
 613	switch (op) {
 614	/* dst = dst + val */
 615	case BPF_ADD:
 616		if (hi && is64) {
 617			if (is_imm8(val))
 618				EMIT3(0x83, add_1reg(0xD0, dreg), val);
 619			else
 620				EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
 621		} else {
 622			if (is_imm8(val))
 623				EMIT3(0x83, add_1reg(0xC0, dreg), val);
 624			else
 625				EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
 626		}
 627		break;
 628	/* dst = dst - val */
 629	case BPF_SUB:
 630		if (hi && is64) {
 631			if (is_imm8(val))
 632				EMIT3(0x83, add_1reg(0xD8, dreg), val);
 633			else
 634				EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
 635		} else {
 636			if (is_imm8(val))
 637				EMIT3(0x83, add_1reg(0xE8, dreg), val);
 638			else
 639				EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
 640		}
 641		break;
 642	/* dst = dst | val */
 643	case BPF_OR:
 644		if (is_imm8(val))
 645			EMIT3(0x83, add_1reg(0xC8, dreg), val);
 646		else
 647			EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
 648		break;
 649	/* dst = dst & val */
 650	case BPF_AND:
 651		if (is_imm8(val))
 652			EMIT3(0x83, add_1reg(0xE0, dreg), val);
 653		else
 654			EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
 655		break;
 656	/* dst = dst ^ val */
 657	case BPF_XOR:
 658		if (is_imm8(val))
 659			EMIT3(0x83, add_1reg(0xF0, dreg), val);
 660		else
 661			EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
 662		break;
 663	case BPF_NEG:
 664		EMIT2(0xF7, add_1reg(0xD8, dreg));
 665		break;
 666	}
 667
 668	if (dstk)
 669		/* mov dword ptr [ebp+off],dreg */
 670		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
 671		      STACK_VAR(dst));
 672	*pprog = prog;
 673}
 674
 675/* ALU operation (64 bit) */
 676static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
 677				     const u8 dst[], const u32 val,
 678				     bool dstk, u8 **pprog,
 679				     const struct bpf_prog_aux *aux)
 680{
 681	u8 *prog = *pprog;
 682	u32 hi = 0;
 683
 684	if (is64 && (val & (1<<31)))
 685		hi = (u32)~0;
 686
 687	emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
 688	if (is64)
 689		emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
 690	else if (!aux->verifier_zext)
 691		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
 692
 693	*pprog = prog;
 694}
 695
 696/* dst = ~dst (64 bit) */
 697static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
 698{
 699	u8 *prog = *pprog;
 700	int cnt = 0;
 701	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 702	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 703
 704	if (dstk) {
 705		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 706		      STACK_VAR(dst_lo));
 707		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 708		      STACK_VAR(dst_hi));
 709	}
 710
 711	/* neg dreg_lo */
 712	EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
 713	/* adc dreg_hi,0x0 */
 714	EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
 715	/* neg dreg_hi */
 716	EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
 717
 718	if (dstk) {
 719		/* mov dword ptr [ebp+off],dreg_lo */
 720		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 721		      STACK_VAR(dst_lo));
 722		/* mov dword ptr [ebp+off],dreg_hi */
 723		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 724		      STACK_VAR(dst_hi));
 725	}
 726	*pprog = prog;
 727}
 728
 729/* dst = dst << src */
 730static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
 731				     bool dstk, bool sstk, u8 **pprog)
 732{
 733	u8 *prog = *pprog;
 734	int cnt = 0;
 735	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 736	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 737
 738	if (dstk) {
 739		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 740		      STACK_VAR(dst_lo));
 741		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 742		      STACK_VAR(dst_hi));
 743	}
 744
 745	if (sstk)
 746		/* mov ecx,dword ptr [ebp+off] */
 747		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 748		      STACK_VAR(src_lo));
 749	else
 750		/* mov ecx,src_lo */
 751		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
 752
 753	/* shld dreg_hi,dreg_lo,cl */
 754	EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
 755	/* shl dreg_lo,cl */
 756	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
 757
 758	/* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
 759
 760	/* cmp ecx,32 */
 761	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
 762	/* skip the next two instructions (4 bytes) when < 32 */
 763	EMIT2(IA32_JB, 4);
 764
 765	/* mov dreg_hi,dreg_lo */
 766	EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
 767	/* xor dreg_lo,dreg_lo */
 768	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
 769
 770	if (dstk) {
 771		/* mov dword ptr [ebp+off],dreg_lo */
 772		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 773		      STACK_VAR(dst_lo));
 774		/* mov dword ptr [ebp+off],dreg_hi */
 775		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 776		      STACK_VAR(dst_hi));
 777	}
 778	/* out: */
 779	*pprog = prog;
 780}
 781
 782/* dst = dst >> src (signed)*/
 783static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
 784				      bool dstk, bool sstk, u8 **pprog)
 785{
 786	u8 *prog = *pprog;
 787	int cnt = 0;
 788	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 789	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 790
 791	if (dstk) {
 792		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 793		      STACK_VAR(dst_lo));
 794		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 795		      STACK_VAR(dst_hi));
 796	}
 797
 798	if (sstk)
 799		/* mov ecx,dword ptr [ebp+off] */
 800		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 801		      STACK_VAR(src_lo));
 802	else
 803		/* mov ecx,src_lo */
 804		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
 805
 806	/* shrd dreg_lo,dreg_hi,cl */
 807	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
 808	/* sar dreg_hi,cl */
 809	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
 810
 811	/* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
 812
 813	/* cmp ecx,32 */
 814	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
 815	/* skip the next two instructions (5 bytes) when < 32 */
 816	EMIT2(IA32_JB, 5);
 817
 818	/* mov dreg_lo,dreg_hi */
 819	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
 820	/* sar dreg_hi,31 */
 821	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
 822
 823	if (dstk) {
 824		/* mov dword ptr [ebp+off],dreg_lo */
 825		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 826		      STACK_VAR(dst_lo));
 827		/* mov dword ptr [ebp+off],dreg_hi */
 828		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 829		      STACK_VAR(dst_hi));
 830	}
 831	/* out: */
 832	*pprog = prog;
 833}
 834
 835/* dst = dst >> src */
 836static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
 837				     bool sstk, u8 **pprog)
 838{
 839	u8 *prog = *pprog;
 840	int cnt = 0;
 841	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 842	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 843
 844	if (dstk) {
 845		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 846		      STACK_VAR(dst_lo));
 847		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 848		      STACK_VAR(dst_hi));
 849	}
 850
 851	if (sstk)
 852		/* mov ecx,dword ptr [ebp+off] */
 853		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
 854		      STACK_VAR(src_lo));
 855	else
 856		/* mov ecx,src_lo */
 857		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
 858
 859	/* shrd dreg_lo,dreg_hi,cl */
 860	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
 861	/* shr dreg_hi,cl */
 862	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
 863
 864	/* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
 865
 866	/* cmp ecx,32 */
 867	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
 868	/* skip the next two instructions (4 bytes) when < 32 */
 869	EMIT2(IA32_JB, 4);
 870
 871	/* mov dreg_lo,dreg_hi */
 872	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
 873	/* xor dreg_hi,dreg_hi */
 874	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 875
 876	if (dstk) {
 877		/* mov dword ptr [ebp+off],dreg_lo */
 878		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 879		      STACK_VAR(dst_lo));
 880		/* mov dword ptr [ebp+off],dreg_hi */
 881		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 882		      STACK_VAR(dst_hi));
 883	}
 884	/* out: */
 885	*pprog = prog;
 886}
 887
 888/* dst = dst << val */
 889static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
 890				     bool dstk, u8 **pprog)
 891{
 892	u8 *prog = *pprog;
 893	int cnt = 0;
 894	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 895	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 896
 897	if (dstk) {
 898		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 899		      STACK_VAR(dst_lo));
 900		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 901		      STACK_VAR(dst_hi));
 902	}
 903	/* Do LSH operation */
 904	if (val < 32) {
 905		/* shld dreg_hi,dreg_lo,imm8 */
 906		EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
 907		/* shl dreg_lo,imm8 */
 908		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
 909	} else if (val >= 32 && val < 64) {
 910		u32 value = val - 32;
 911
 912		/* shl dreg_lo,imm8 */
 913		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
 914		/* mov dreg_hi,dreg_lo */
 915		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
 916		/* xor dreg_lo,dreg_lo */
 917		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
 918	} else {
 919		/* xor dreg_lo,dreg_lo */
 920		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
 921		/* xor dreg_hi,dreg_hi */
 922		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 923	}
 924
 925	if (dstk) {
 926		/* mov dword ptr [ebp+off],dreg_lo */
 927		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 928		      STACK_VAR(dst_lo));
 929		/* mov dword ptr [ebp+off],dreg_hi */
 930		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 931		      STACK_VAR(dst_hi));
 932	}
 933	*pprog = prog;
 934}
 935
 936/* dst = dst >> val */
 937static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
 938				     bool dstk, u8 **pprog)
 939{
 940	u8 *prog = *pprog;
 941	int cnt = 0;
 942	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 943	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 944
 945	if (dstk) {
 946		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 947		      STACK_VAR(dst_lo));
 948		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 949		      STACK_VAR(dst_hi));
 950	}
 951
 952	/* Do RSH operation */
 953	if (val < 32) {
 954		/* shrd dreg_lo,dreg_hi,imm8 */
 955		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
 956		/* shr dreg_hi,imm8 */
 957		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
 958	} else if (val >= 32 && val < 64) {
 959		u32 value = val - 32;
 960
 961		/* shr dreg_hi,imm8 */
 962		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
 963		/* mov dreg_lo,dreg_hi */
 964		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
 965		/* xor dreg_hi,dreg_hi */
 966		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 967	} else {
 968		/* xor dreg_lo,dreg_lo */
 969		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
 970		/* xor dreg_hi,dreg_hi */
 971		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
 972	}
 973
 974	if (dstk) {
 975		/* mov dword ptr [ebp+off],dreg_lo */
 976		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
 977		      STACK_VAR(dst_lo));
 978		/* mov dword ptr [ebp+off],dreg_hi */
 979		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
 980		      STACK_VAR(dst_hi));
 981	}
 982	*pprog = prog;
 983}
 984
 985/* dst = dst >> val (signed) */
 986static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
 987				      bool dstk, u8 **pprog)
 988{
 989	u8 *prog = *pprog;
 990	int cnt = 0;
 991	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
 992	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
 993
 994	if (dstk) {
 995		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
 996		      STACK_VAR(dst_lo));
 997		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
 998		      STACK_VAR(dst_hi));
 999	}
1000	/* Do RSH operation */
1001	if (val < 32) {
1002		/* shrd dreg_lo,dreg_hi,imm8 */
1003		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
1004		/* ashr dreg_hi,imm8 */
1005		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1006	} else if (val >= 32 && val < 64) {
1007		u32 value = val - 32;
1008
1009		/* ashr dreg_hi,imm8 */
1010		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1011		/* mov dreg_lo,dreg_hi */
1012		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1013
1014		/* ashr dreg_hi,imm8 */
1015		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1016	} else {
1017		/* ashr dreg_hi,imm8 */
1018		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1019		/* mov dreg_lo,dreg_hi */
1020		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1021	}
1022
1023	if (dstk) {
1024		/* mov dword ptr [ebp+off],dreg_lo */
1025		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1026		      STACK_VAR(dst_lo));
1027		/* mov dword ptr [ebp+off],dreg_hi */
1028		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1029		      STACK_VAR(dst_hi));
1030	}
1031	*pprog = prog;
1032}
1033
1034static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1035				     bool sstk, u8 **pprog)
1036{
1037	u8 *prog = *pprog;
1038	int cnt = 0;
1039
1040	if (dstk)
1041		/* mov eax,dword ptr [ebp+off] */
1042		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1043		      STACK_VAR(dst_hi));
1044	else
1045		/* mov eax,dst_hi */
1046		EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1047
1048	if (sstk)
1049		/* mul dword ptr [ebp+off] */
1050		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1051	else
1052		/* mul src_lo */
1053		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1054
1055	/* mov ecx,eax */
1056	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1057
1058	if (dstk)
1059		/* mov eax,dword ptr [ebp+off] */
1060		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1061		      STACK_VAR(dst_lo));
1062	else
1063		/* mov eax,dst_lo */
1064		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1065
1066	if (sstk)
1067		/* mul dword ptr [ebp+off] */
1068		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1069	else
1070		/* mul src_hi */
1071		EMIT2(0xF7, add_1reg(0xE0, src_hi));
1072
1073	/* add eax,eax */
1074	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1075
1076	if (dstk)
1077		/* mov eax,dword ptr [ebp+off] */
1078		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1079		      STACK_VAR(dst_lo));
1080	else
1081		/* mov eax,dst_lo */
1082		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1083
1084	if (sstk)
1085		/* mul dword ptr [ebp+off] */
1086		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1087	else
1088		/* mul src_lo */
1089		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1090
1091	/* add ecx,edx */
1092	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1093
1094	if (dstk) {
1095		/* mov dword ptr [ebp+off],eax */
1096		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1097		      STACK_VAR(dst_lo));
1098		/* mov dword ptr [ebp+off],ecx */
1099		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1100		      STACK_VAR(dst_hi));
1101	} else {
1102		/* mov dst_lo,eax */
1103		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1104		/* mov dst_hi,ecx */
1105		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1106	}
1107
1108	*pprog = prog;
1109}
1110
1111static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1112				     bool dstk, u8 **pprog)
1113{
1114	u8 *prog = *pprog;
1115	int cnt = 0;
1116	u32 hi;
1117
1118	hi = val & (1<<31) ? (u32)~0 : 0;
1119	/* movl eax,imm32 */
1120	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1121	if (dstk)
1122		/* mul dword ptr [ebp+off] */
1123		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1124	else
1125		/* mul dst_hi */
1126		EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1127
1128	/* mov ecx,eax */
1129	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1130
1131	/* movl eax,imm32 */
1132	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1133	if (dstk)
1134		/* mul dword ptr [ebp+off] */
1135		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1136	else
1137		/* mul dst_lo */
1138		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1139	/* add ecx,eax */
1140	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1141
1142	/* movl eax,imm32 */
1143	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1144	if (dstk)
1145		/* mul dword ptr [ebp+off] */
1146		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1147	else
1148		/* mul dst_lo */
1149		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1150
1151	/* add ecx,edx */
1152	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1153
1154	if (dstk) {
1155		/* mov dword ptr [ebp+off],eax */
1156		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1157		      STACK_VAR(dst_lo));
1158		/* mov dword ptr [ebp+off],ecx */
1159		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1160		      STACK_VAR(dst_hi));
1161	} else {
1162		/* mov dword ptr [ebp+off],eax */
1163		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1164		/* mov dword ptr [ebp+off],ecx */
1165		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1166	}
1167
1168	*pprog = prog;
1169}
1170
1171static int bpf_size_to_x86_bytes(int bpf_size)
1172{
1173	if (bpf_size == BPF_W)
1174		return 4;
1175	else if (bpf_size == BPF_H)
1176		return 2;
1177	else if (bpf_size == BPF_B)
1178		return 1;
1179	else if (bpf_size == BPF_DW)
1180		return 4; /* imm32 */
1181	else
1182		return 0;
1183}
1184
1185struct jit_context {
1186	int cleanup_addr; /* Epilogue code offset */
1187};
1188
1189/* Maximum number of bytes emitted while JITing one eBPF insn */
1190#define BPF_MAX_INSN_SIZE	128
1191#define BPF_INSN_SAFETY		64
1192
1193#define PROLOGUE_SIZE 35
1194
1195/*
1196 * Emit prologue code for BPF program and check it's size.
1197 * bpf_tail_call helper will skip it while jumping into another program.
1198 */
1199static void emit_prologue(u8 **pprog, u32 stack_depth)
1200{
1201	u8 *prog = *pprog;
1202	int cnt = 0;
1203	const u8 *r1 = bpf2ia32[BPF_REG_1];
1204	const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1205	const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1206	const u8 *tcc = bpf2ia32[TCALL_CNT];
1207
1208	/* push ebp */
1209	EMIT1(0x55);
1210	/* mov ebp,esp */
1211	EMIT2(0x89, 0xE5);
1212	/* push edi */
1213	EMIT1(0x57);
1214	/* push esi */
1215	EMIT1(0x56);
1216	/* push ebx */
1217	EMIT1(0x53);
1218
1219	/* sub esp,STACK_SIZE */
1220	EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1221	/* sub ebp,SCRATCH_SIZE+12*/
1222	EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1223	/* xor ebx,ebx */
1224	EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1225
1226	/* Set up BPF prog stack base register */
1227	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1228	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1229
1230	/* Move BPF_CTX (EAX) to BPF_REG_R1 */
1231	/* mov dword ptr [ebp+off],eax */
1232	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1233	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1234
1235	/* Initialize Tail Count */
1236	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1237	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1238
1239	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1240	*pprog = prog;
1241}
1242
1243/* Emit epilogue code for BPF program */
1244static void emit_epilogue(u8 **pprog, u32 stack_depth)
1245{
1246	u8 *prog = *pprog;
1247	const u8 *r0 = bpf2ia32[BPF_REG_0];
1248	int cnt = 0;
1249
1250	/* mov eax,dword ptr [ebp+off]*/
1251	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1252	/* mov edx,dword ptr [ebp+off]*/
1253	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1254
1255	/* add ebp,SCRATCH_SIZE+12*/
1256	EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1257
1258	/* mov ebx,dword ptr [ebp-12]*/
1259	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1260	/* mov esi,dword ptr [ebp-8]*/
1261	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1262	/* mov edi,dword ptr [ebp-4]*/
1263	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1264
1265	EMIT1(0xC9); /* leave */
1266	EMIT1(0xC3); /* ret */
1267	*pprog = prog;
1268}
1269
1270/*
1271 * Generate the following code:
1272 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1273 *   if (index >= array->map.max_entries)
1274 *     goto out;
1275 *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1276 *     goto out;
1277 *   prog = array->ptrs[index];
1278 *   if (prog == NULL)
1279 *     goto out;
1280 *   goto *(prog->bpf_func + prologue_size);
1281 * out:
1282 */
1283static void emit_bpf_tail_call(u8 **pprog)
1284{
1285	u8 *prog = *pprog;
1286	int cnt = 0;
1287	const u8 *r1 = bpf2ia32[BPF_REG_1];
1288	const u8 *r2 = bpf2ia32[BPF_REG_2];
1289	const u8 *r3 = bpf2ia32[BPF_REG_3];
1290	const u8 *tcc = bpf2ia32[TCALL_CNT];
1291	u32 lo, hi;
1292	static int jmp_label1 = -1;
1293
1294	/*
1295	 * if (index >= array->map.max_entries)
1296	 *     goto out;
1297	 */
1298	/* mov eax,dword ptr [ebp+off] */
1299	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1300	/* mov edx,dword ptr [ebp+off] */
1301	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1302
1303	/* cmp dword ptr [eax+off],edx */
1304	EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1305	      offsetof(struct bpf_array, map.max_entries));
1306	/* jbe out */
1307	EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1308
1309	/*
1310	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1311	 *     goto out;
1312	 */
1313	lo = (u32)MAX_TAIL_CALL_CNT;
1314	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1315	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1316	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1317
1318	/* cmp edx,hi */
1319	EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1320	EMIT2(IA32_JNE, 3);
1321	/* cmp ecx,lo */
1322	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1323
1324	/* ja out */
1325	EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1326
1327	/* add eax,0x1 */
1328	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1329	/* adc ebx,0x0 */
1330	EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1331
1332	/* mov dword ptr [ebp+off],eax */
1333	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1334	/* mov dword ptr [ebp+off],edx */
1335	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1336
1337	/* prog = array->ptrs[index]; */
1338	/* mov edx, [eax + edx * 4 + offsetof(...)] */
1339	EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1340
1341	/*
1342	 * if (prog == NULL)
1343	 *     goto out;
1344	 */
1345	/* test edx,edx */
1346	EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1347	/* je out */
1348	EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1349
1350	/* goto *(prog->bpf_func + prologue_size); */
1351	/* mov edx, dword ptr [edx + 32] */
1352	EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1353	      offsetof(struct bpf_prog, bpf_func));
1354	/* add edx,prologue_size */
1355	EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1356
1357	/* mov eax,dword ptr [ebp+off] */
1358	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1359
1360	/*
1361	 * Now we're ready to jump into next BPF program:
1362	 * eax == ctx (1st arg)
1363	 * edx == prog->bpf_func + prologue_size
1364	 */
1365	RETPOLINE_EDX_BPF_JIT();
1366
1367	if (jmp_label1 == -1)
1368		jmp_label1 = cnt;
1369
1370	/* out: */
1371	*pprog = prog;
1372}
1373
1374/* Push the scratch stack register on top of the stack. */
1375static inline void emit_push_r64(const u8 src[], u8 **pprog)
1376{
1377	u8 *prog = *pprog;
1378	int cnt = 0;
1379
1380	/* mov ecx,dword ptr [ebp+off] */
1381	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1382	/* push ecx */
1383	EMIT1(0x51);
1384
1385	/* mov ecx,dword ptr [ebp+off] */
1386	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1387	/* push ecx */
1388	EMIT1(0x51);
1389
1390	*pprog = prog;
1391}
1392
1393static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
1394{
1395	u8 jmp_cond;
1396
1397	/* Convert BPF opcode to x86 */
1398	switch (op) {
1399	case BPF_JEQ:
1400		jmp_cond = IA32_JE;
1401		break;
1402	case BPF_JSET:
1403	case BPF_JNE:
1404		jmp_cond = IA32_JNE;
1405		break;
1406	case BPF_JGT:
1407		/* GT is unsigned '>', JA in x86 */
1408		jmp_cond = IA32_JA;
1409		break;
1410	case BPF_JLT:
1411		/* LT is unsigned '<', JB in x86 */
1412		jmp_cond = IA32_JB;
1413		break;
1414	case BPF_JGE:
1415		/* GE is unsigned '>=', JAE in x86 */
1416		jmp_cond = IA32_JAE;
1417		break;
1418	case BPF_JLE:
1419		/* LE is unsigned '<=', JBE in x86 */
1420		jmp_cond = IA32_JBE;
1421		break;
1422	case BPF_JSGT:
1423		if (!is_cmp_lo)
1424			/* Signed '>', GT in x86 */
1425			jmp_cond = IA32_JG;
1426		else
1427			/* GT is unsigned '>', JA in x86 */
1428			jmp_cond = IA32_JA;
1429		break;
1430	case BPF_JSLT:
1431		if (!is_cmp_lo)
1432			/* Signed '<', LT in x86 */
1433			jmp_cond = IA32_JL;
1434		else
1435			/* LT is unsigned '<', JB in x86 */
1436			jmp_cond = IA32_JB;
1437		break;
1438	case BPF_JSGE:
1439		if (!is_cmp_lo)
1440			/* Signed '>=', GE in x86 */
1441			jmp_cond = IA32_JGE;
1442		else
1443			/* GE is unsigned '>=', JAE in x86 */
1444			jmp_cond = IA32_JAE;
1445		break;
1446	case BPF_JSLE:
1447		if (!is_cmp_lo)
1448			/* Signed '<=', LE in x86 */
1449			jmp_cond = IA32_JLE;
1450		else
1451			/* LE is unsigned '<=', JBE in x86 */
1452			jmp_cond = IA32_JBE;
1453		break;
1454	default: /* to silence GCC warning */
1455		jmp_cond = COND_JMP_OPCODE_INVALID;
1456		break;
1457	}
1458
1459	return jmp_cond;
1460}
1461
1462static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1463		  int oldproglen, struct jit_context *ctx)
1464{
1465	struct bpf_insn *insn = bpf_prog->insnsi;
1466	int insn_cnt = bpf_prog->len;
1467	bool seen_exit = false;
1468	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1469	int i, cnt = 0;
1470	int proglen = 0;
1471	u8 *prog = temp;
1472
1473	emit_prologue(&prog, bpf_prog->aux->stack_depth);
1474
1475	for (i = 0; i < insn_cnt; i++, insn++) {
1476		const s32 imm32 = insn->imm;
1477		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1478		const bool dstk = insn->dst_reg != BPF_REG_AX;
1479		const bool sstk = insn->src_reg != BPF_REG_AX;
1480		const u8 code = insn->code;
1481		const u8 *dst = bpf2ia32[insn->dst_reg];
1482		const u8 *src = bpf2ia32[insn->src_reg];
1483		const u8 *r0 = bpf2ia32[BPF_REG_0];
1484		s64 jmp_offset;
1485		u8 jmp_cond;
1486		int ilen;
1487		u8 *func;
1488
1489		switch (code) {
1490		/* ALU operations */
1491		/* dst = src */
1492		case BPF_ALU | BPF_MOV | BPF_K:
1493		case BPF_ALU | BPF_MOV | BPF_X:
1494		case BPF_ALU64 | BPF_MOV | BPF_K:
1495		case BPF_ALU64 | BPF_MOV | BPF_X:
1496			switch (BPF_SRC(code)) {
1497			case BPF_X:
1498				if (imm32 == 1) {
1499					/* Special mov32 for zext. */
1500					emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1501					break;
1502				}
1503				emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
1504						  &prog, bpf_prog->aux);
1505				break;
1506			case BPF_K:
1507				/* Sign-extend immediate value to dst reg */
1508				emit_ia32_mov_i64(is64, dst, imm32,
1509						  dstk, &prog);
1510				break;
1511			}
1512			break;
1513		/* dst = dst + src/imm */
1514		/* dst = dst - src/imm */
1515		/* dst = dst | src/imm */
1516		/* dst = dst & src/imm */
1517		/* dst = dst ^ src/imm */
1518		/* dst = dst * src/imm */
1519		/* dst = dst << src */
1520		/* dst = dst >> src */
1521		case BPF_ALU | BPF_ADD | BPF_K:
1522		case BPF_ALU | BPF_ADD | BPF_X:
1523		case BPF_ALU | BPF_SUB | BPF_K:
1524		case BPF_ALU | BPF_SUB | BPF_X:
1525		case BPF_ALU | BPF_OR | BPF_K:
1526		case BPF_ALU | BPF_OR | BPF_X:
1527		case BPF_ALU | BPF_AND | BPF_K:
1528		case BPF_ALU | BPF_AND | BPF_X:
1529		case BPF_ALU | BPF_XOR | BPF_K:
1530		case BPF_ALU | BPF_XOR | BPF_X:
1531		case BPF_ALU64 | BPF_ADD | BPF_K:
1532		case BPF_ALU64 | BPF_ADD | BPF_X:
1533		case BPF_ALU64 | BPF_SUB | BPF_K:
1534		case BPF_ALU64 | BPF_SUB | BPF_X:
1535		case BPF_ALU64 | BPF_OR | BPF_K:
1536		case BPF_ALU64 | BPF_OR | BPF_X:
1537		case BPF_ALU64 | BPF_AND | BPF_K:
1538		case BPF_ALU64 | BPF_AND | BPF_X:
1539		case BPF_ALU64 | BPF_XOR | BPF_K:
1540		case BPF_ALU64 | BPF_XOR | BPF_X:
1541			switch (BPF_SRC(code)) {
1542			case BPF_X:
1543				emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1544						  src, dstk, sstk, &prog,
1545						  bpf_prog->aux);
1546				break;
1547			case BPF_K:
1548				emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1549						  imm32, dstk, &prog,
1550						  bpf_prog->aux);
1551				break;
1552			}
1553			break;
1554		case BPF_ALU | BPF_MUL | BPF_K:
1555		case BPF_ALU | BPF_MUL | BPF_X:
1556			switch (BPF_SRC(code)) {
1557			case BPF_X:
1558				emit_ia32_mul_r(dst_lo, src_lo, dstk,
1559						sstk, &prog);
1560				break;
1561			case BPF_K:
1562				/* mov ecx,imm32*/
1563				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1564					    imm32);
1565				emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1566						false, &prog);
1567				break;
1568			}
1569			if (!bpf_prog->aux->verifier_zext)
1570				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1571			break;
1572		case BPF_ALU | BPF_LSH | BPF_X:
1573		case BPF_ALU | BPF_RSH | BPF_X:
1574		case BPF_ALU | BPF_ARSH | BPF_K:
1575		case BPF_ALU | BPF_ARSH | BPF_X:
1576			switch (BPF_SRC(code)) {
1577			case BPF_X:
1578				emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1579						  dstk, sstk, &prog);
1580				break;
1581			case BPF_K:
1582				/* mov ecx,imm32*/
1583				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1584					    imm32);
1585				emit_ia32_shift_r(BPF_OP(code), dst_lo,
1586						  IA32_ECX, dstk, false,
1587						  &prog);
1588				break;
1589			}
1590			if (!bpf_prog->aux->verifier_zext)
1591				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1592			break;
1593		/* dst = dst / src(imm) */
1594		/* dst = dst % src(imm) */
1595		case BPF_ALU | BPF_DIV | BPF_K:
1596		case BPF_ALU | BPF_DIV | BPF_X:
1597		case BPF_ALU | BPF_MOD | BPF_K:
1598		case BPF_ALU | BPF_MOD | BPF_X:
1599			switch (BPF_SRC(code)) {
1600			case BPF_X:
1601				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1602						    src_lo, dstk, sstk, &prog);
1603				break;
1604			case BPF_K:
1605				/* mov ecx,imm32*/
1606				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1607					    imm32);
1608				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1609						    IA32_ECX, dstk, false,
1610						    &prog);
1611				break;
1612			}
1613			if (!bpf_prog->aux->verifier_zext)
1614				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1615			break;
1616		case BPF_ALU64 | BPF_DIV | BPF_K:
1617		case BPF_ALU64 | BPF_DIV | BPF_X:
1618		case BPF_ALU64 | BPF_MOD | BPF_K:
1619		case BPF_ALU64 | BPF_MOD | BPF_X:
1620			goto notyet;
1621		/* dst = dst >> imm */
1622		/* dst = dst << imm */
1623		case BPF_ALU | BPF_RSH | BPF_K:
1624		case BPF_ALU | BPF_LSH | BPF_K:
1625			if (unlikely(imm32 > 31))
1626				return -EINVAL;
1627			/* mov ecx,imm32*/
1628			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1629			emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1630					  false, &prog);
1631			if (!bpf_prog->aux->verifier_zext)
1632				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1633			break;
1634		/* dst = dst << imm */
1635		case BPF_ALU64 | BPF_LSH | BPF_K:
1636			if (unlikely(imm32 > 63))
1637				return -EINVAL;
1638			emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1639			break;
1640		/* dst = dst >> imm */
1641		case BPF_ALU64 | BPF_RSH | BPF_K:
1642			if (unlikely(imm32 > 63))
1643				return -EINVAL;
1644			emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1645			break;
1646		/* dst = dst << src */
1647		case BPF_ALU64 | BPF_LSH | BPF_X:
1648			emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1649			break;
1650		/* dst = dst >> src */
1651		case BPF_ALU64 | BPF_RSH | BPF_X:
1652			emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1653			break;
1654		/* dst = dst >> src (signed) */
1655		case BPF_ALU64 | BPF_ARSH | BPF_X:
1656			emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1657			break;
1658		/* dst = dst >> imm (signed) */
1659		case BPF_ALU64 | BPF_ARSH | BPF_K:
1660			if (unlikely(imm32 > 63))
1661				return -EINVAL;
1662			emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1663			break;
1664		/* dst = ~dst */
1665		case BPF_ALU | BPF_NEG:
1666			emit_ia32_alu_i(is64, false, BPF_OP(code),
1667					dst_lo, 0, dstk, &prog);
1668			if (!bpf_prog->aux->verifier_zext)
1669				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1670			break;
1671		/* dst = ~dst (64 bit) */
1672		case BPF_ALU64 | BPF_NEG:
1673			emit_ia32_neg64(dst, dstk, &prog);
1674			break;
1675		/* dst = dst * src/imm */
1676		case BPF_ALU64 | BPF_MUL | BPF_X:
1677		case BPF_ALU64 | BPF_MUL | BPF_K:
1678			switch (BPF_SRC(code)) {
1679			case BPF_X:
1680				emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1681				break;
1682			case BPF_K:
1683				emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1684				break;
1685			}
1686			break;
1687		/* dst = htole(dst) */
1688		case BPF_ALU | BPF_END | BPF_FROM_LE:
1689			emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
1690					    bpf_prog->aux);
1691			break;
1692		/* dst = htobe(dst) */
1693		case BPF_ALU | BPF_END | BPF_FROM_BE:
1694			emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
1695					    bpf_prog->aux);
1696			break;
1697		/* dst = imm64 */
1698		case BPF_LD | BPF_IMM | BPF_DW: {
1699			s32 hi, lo = imm32;
1700
1701			hi = insn[1].imm;
1702			emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1703			emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1704			insn++;
1705			i++;
1706			break;
1707		}
1708		/* ST: *(u8*)(dst_reg + off) = imm */
1709		case BPF_ST | BPF_MEM | BPF_H:
1710		case BPF_ST | BPF_MEM | BPF_B:
1711		case BPF_ST | BPF_MEM | BPF_W:
1712		case BPF_ST | BPF_MEM | BPF_DW:
1713			if (dstk)
1714				/* mov eax,dword ptr [ebp+off] */
1715				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1716				      STACK_VAR(dst_lo));
1717			else
1718				/* mov eax,dst_lo */
1719				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1720
1721			switch (BPF_SIZE(code)) {
1722			case BPF_B:
1723				EMIT(0xC6, 1); break;
1724			case BPF_H:
1725				EMIT2(0x66, 0xC7); break;
1726			case BPF_W:
1727			case BPF_DW:
1728				EMIT(0xC7, 1); break;
1729			}
1730
1731			if (is_imm8(insn->off))
1732				EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1733			else
1734				EMIT1_off32(add_1reg(0x80, IA32_EAX),
1735					    insn->off);
1736			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1737
1738			if (BPF_SIZE(code) == BPF_DW) {
1739				u32 hi;
1740
1741				hi = imm32 & (1<<31) ? (u32)~0 : 0;
1742				EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1743					    insn->off + 4);
1744				EMIT(hi, 4);
1745			}
1746			break;
1747
1748		/* STX: *(u8*)(dst_reg + off) = src_reg */
1749		case BPF_STX | BPF_MEM | BPF_B:
1750		case BPF_STX | BPF_MEM | BPF_H:
1751		case BPF_STX | BPF_MEM | BPF_W:
1752		case BPF_STX | BPF_MEM | BPF_DW:
1753			if (dstk)
1754				/* mov eax,dword ptr [ebp+off] */
1755				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1756				      STACK_VAR(dst_lo));
1757			else
1758				/* mov eax,dst_lo */
1759				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1760
1761			if (sstk)
1762				/* mov edx,dword ptr [ebp+off] */
1763				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1764				      STACK_VAR(src_lo));
1765			else
1766				/* mov edx,src_lo */
1767				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1768
1769			switch (BPF_SIZE(code)) {
1770			case BPF_B:
1771				EMIT(0x88, 1); break;
1772			case BPF_H:
1773				EMIT2(0x66, 0x89); break;
1774			case BPF_W:
1775			case BPF_DW:
1776				EMIT(0x89, 1); break;
1777			}
1778
1779			if (is_imm8(insn->off))
1780				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1781				      insn->off);
1782			else
1783				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1784					    insn->off);
1785
1786			if (BPF_SIZE(code) == BPF_DW) {
1787				if (sstk)
1788					/* mov edi,dword ptr [ebp+off] */
1789					EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1790							     IA32_EDX),
1791					      STACK_VAR(src_hi));
1792				else
1793					/* mov edi,src_hi */
1794					EMIT2(0x8B, add_2reg(0xC0, src_hi,
1795							     IA32_EDX));
1796				EMIT1(0x89);
1797				if (is_imm8(insn->off + 4)) {
1798					EMIT2(add_2reg(0x40, IA32_EAX,
1799						       IA32_EDX),
1800					      insn->off + 4);
1801				} else {
1802					EMIT1(add_2reg(0x80, IA32_EAX,
1803						       IA32_EDX));
1804					EMIT(insn->off + 4, 4);
1805				}
1806			}
1807			break;
1808
1809		/* LDX: dst_reg = *(u8*)(src_reg + off) */
1810		case BPF_LDX | BPF_MEM | BPF_B:
1811		case BPF_LDX | BPF_MEM | BPF_H:
1812		case BPF_LDX | BPF_MEM | BPF_W:
1813		case BPF_LDX | BPF_MEM | BPF_DW:
1814			if (sstk)
1815				/* mov eax,dword ptr [ebp+off] */
1816				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1817				      STACK_VAR(src_lo));
1818			else
1819				/* mov eax,dword ptr [ebp+off] */
1820				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1821
1822			switch (BPF_SIZE(code)) {
1823			case BPF_B:
1824				EMIT2(0x0F, 0xB6); break;
1825			case BPF_H:
1826				EMIT2(0x0F, 0xB7); break;
1827			case BPF_W:
1828			case BPF_DW:
1829				EMIT(0x8B, 1); break;
1830			}
1831
1832			if (is_imm8(insn->off))
1833				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1834				      insn->off);
1835			else
1836				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1837					    insn->off);
1838
1839			if (dstk)
1840				/* mov dword ptr [ebp+off],edx */
1841				EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1842				      STACK_VAR(dst_lo));
1843			else
1844				/* mov dst_lo,edx */
1845				EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1846			switch (BPF_SIZE(code)) {
1847			case BPF_B:
1848			case BPF_H:
1849			case BPF_W:
1850				if (bpf_prog->aux->verifier_zext)
1851					break;
1852				if (dstk) {
1853					EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
1854					      STACK_VAR(dst_hi));
1855					EMIT(0x0, 4);
1856				} else {
1857					/* xor dst_hi,dst_hi */
1858					EMIT2(0x33,
1859					      add_2reg(0xC0, dst_hi, dst_hi));
1860				}
1861				break;
1862			case BPF_DW:
1863				EMIT2_off32(0x8B,
1864					    add_2reg(0x80, IA32_EAX, IA32_EDX),
1865					    insn->off + 4);
1866				if (dstk)
1867					EMIT3(0x89,
1868					      add_2reg(0x40, IA32_EBP,
1869						       IA32_EDX),
1870					      STACK_VAR(dst_hi));
1871				else
1872					EMIT2(0x89,
1873					      add_2reg(0xC0, dst_hi, IA32_EDX));
1874				break;
1875			default:
1876				break;
1877			}
1878			break;
1879		/* call */
1880		case BPF_JMP | BPF_CALL:
1881		{
1882			const u8 *r1 = bpf2ia32[BPF_REG_1];
1883			const u8 *r2 = bpf2ia32[BPF_REG_2];
1884			const u8 *r3 = bpf2ia32[BPF_REG_3];
1885			const u8 *r4 = bpf2ia32[BPF_REG_4];
1886			const u8 *r5 = bpf2ia32[BPF_REG_5];
1887
1888			if (insn->src_reg == BPF_PSEUDO_CALL)
1889				goto notyet;
1890
1891			func = (u8 *) __bpf_call_base + imm32;
1892			jmp_offset = func - (image + addrs[i]);
1893
1894			if (!imm32 || !is_simm32(jmp_offset)) {
1895				pr_err("unsupported BPF func %d addr %p image %p\n",
1896				       imm32, func, image);
1897				return -EINVAL;
1898			}
1899
1900			/* mov eax,dword ptr [ebp+off] */
1901			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1902			      STACK_VAR(r1[0]));
1903			/* mov edx,dword ptr [ebp+off] */
1904			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1905			      STACK_VAR(r1[1]));
1906
1907			emit_push_r64(r5, &prog);
1908			emit_push_r64(r4, &prog);
1909			emit_push_r64(r3, &prog);
1910			emit_push_r64(r2, &prog);
1911
1912			EMIT1_off32(0xE8, jmp_offset + 9);
1913
1914			/* mov dword ptr [ebp+off],eax */
1915			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1916			      STACK_VAR(r0[0]));
1917			/* mov dword ptr [ebp+off],edx */
1918			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1919			      STACK_VAR(r0[1]));
1920
1921			/* add esp,32 */
1922			EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
1923			break;
1924		}
1925		case BPF_JMP | BPF_TAIL_CALL:
1926			emit_bpf_tail_call(&prog);
1927			break;
1928
1929		/* cond jump */
1930		case BPF_JMP | BPF_JEQ | BPF_X:
1931		case BPF_JMP | BPF_JNE | BPF_X:
1932		case BPF_JMP | BPF_JGT | BPF_X:
1933		case BPF_JMP | BPF_JLT | BPF_X:
1934		case BPF_JMP | BPF_JGE | BPF_X:
1935		case BPF_JMP | BPF_JLE | BPF_X:
1936		case BPF_JMP32 | BPF_JEQ | BPF_X:
1937		case BPF_JMP32 | BPF_JNE | BPF_X:
1938		case BPF_JMP32 | BPF_JGT | BPF_X:
1939		case BPF_JMP32 | BPF_JLT | BPF_X:
1940		case BPF_JMP32 | BPF_JGE | BPF_X:
1941		case BPF_JMP32 | BPF_JLE | BPF_X:
1942		case BPF_JMP32 | BPF_JSGT | BPF_X:
1943		case BPF_JMP32 | BPF_JSLE | BPF_X:
1944		case BPF_JMP32 | BPF_JSLT | BPF_X:
1945		case BPF_JMP32 | BPF_JSGE | BPF_X: {
1946			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
1947			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1948			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1949			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
1950			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
1951
1952			if (dstk) {
1953				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1954				      STACK_VAR(dst_lo));
1955				if (is_jmp64)
1956					EMIT3(0x8B,
1957					      add_2reg(0x40, IA32_EBP,
1958						       IA32_EDX),
1959					      STACK_VAR(dst_hi));
1960			}
1961
1962			if (sstk) {
1963				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
1964				      STACK_VAR(src_lo));
1965				if (is_jmp64)
1966					EMIT3(0x8B,
1967					      add_2reg(0x40, IA32_EBP,
1968						       IA32_EBX),
1969					      STACK_VAR(src_hi));
1970			}
1971
1972			if (is_jmp64) {
1973				/* cmp dreg_hi,sreg_hi */
1974				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
1975				EMIT2(IA32_JNE, 2);
1976			}
1977			/* cmp dreg_lo,sreg_lo */
1978			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
1979			goto emit_cond_jmp;
1980		}
1981		case BPF_JMP | BPF_JSGT | BPF_X:
1982		case BPF_JMP | BPF_JSLE | BPF_X:
1983		case BPF_JMP | BPF_JSLT | BPF_X:
1984		case BPF_JMP | BPF_JSGE | BPF_X: {
1985			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1986			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1987			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
1988			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
1989
1990			if (dstk) {
1991				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1992				      STACK_VAR(dst_lo));
1993				EMIT3(0x8B,
1994				      add_2reg(0x40, IA32_EBP,
1995					       IA32_EDX),
1996				      STACK_VAR(dst_hi));
1997			}
1998
1999			if (sstk) {
2000				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2001				      STACK_VAR(src_lo));
2002				EMIT3(0x8B,
2003				      add_2reg(0x40, IA32_EBP,
2004					       IA32_EBX),
2005				      STACK_VAR(src_hi));
2006			}
2007
2008			/* cmp dreg_hi,sreg_hi */
2009			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2010			EMIT2(IA32_JNE, 10);
2011			/* cmp dreg_lo,sreg_lo */
2012			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2013			goto emit_cond_jmp_signed;
2014		}
2015		case BPF_JMP | BPF_JSET | BPF_X:
2016		case BPF_JMP32 | BPF_JSET | BPF_X: {
2017			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2018			u8 dreg_lo = IA32_EAX;
2019			u8 dreg_hi = IA32_EDX;
2020			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2021			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2022
2023			if (dstk) {
2024				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2025				      STACK_VAR(dst_lo));
2026				if (is_jmp64)
2027					EMIT3(0x8B,
2028					      add_2reg(0x40, IA32_EBP,
2029						       IA32_EDX),
2030					      STACK_VAR(dst_hi));
2031			} else {
2032				/* mov dreg_lo,dst_lo */
2033				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2034				if (is_jmp64)
2035					/* mov dreg_hi,dst_hi */
2036					EMIT2(0x89,
2037					      add_2reg(0xC0, dreg_hi, dst_hi));
2038			}
2039
2040			if (sstk) {
2041				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2042				      STACK_VAR(src_lo));
2043				if (is_jmp64)
2044					EMIT3(0x8B,
2045					      add_2reg(0x40, IA32_EBP,
2046						       IA32_EBX),
2047					      STACK_VAR(src_hi));
2048			}
2049			/* and dreg_lo,sreg_lo */
2050			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2051			if (is_jmp64) {
2052				/* and dreg_hi,sreg_hi */
2053				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2054				/* or dreg_lo,dreg_hi */
2055				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2056			}
2057			goto emit_cond_jmp;
2058		}
2059		case BPF_JMP | BPF_JSET | BPF_K:
2060		case BPF_JMP32 | BPF_JSET | BPF_K: {
2061			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2062			u8 dreg_lo = IA32_EAX;
2063			u8 dreg_hi = IA32_EDX;
2064			u8 sreg_lo = IA32_ECX;
2065			u8 sreg_hi = IA32_EBX;
2066			u32 hi;
2067
2068			if (dstk) {
2069				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2070				      STACK_VAR(dst_lo));
2071				if (is_jmp64)
2072					EMIT3(0x8B,
2073					      add_2reg(0x40, IA32_EBP,
2074						       IA32_EDX),
2075					      STACK_VAR(dst_hi));
2076			} else {
2077				/* mov dreg_lo,dst_lo */
2078				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2079				if (is_jmp64)
2080					/* mov dreg_hi,dst_hi */
2081					EMIT2(0x89,
2082					      add_2reg(0xC0, dreg_hi, dst_hi));
2083			}
2084
2085			/* mov ecx,imm32 */
2086			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
2087
2088			/* and dreg_lo,sreg_lo */
2089			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2090			if (is_jmp64) {
2091				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2092				/* mov ebx,imm32 */
2093				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
2094				/* and dreg_hi,sreg_hi */
2095				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2096				/* or dreg_lo,dreg_hi */
2097				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2098			}
2099			goto emit_cond_jmp;
2100		}
2101		case BPF_JMP | BPF_JEQ | BPF_K:
2102		case BPF_JMP | BPF_JNE | BPF_K:
2103		case BPF_JMP | BPF_JGT | BPF_K:
2104		case BPF_JMP | BPF_JLT | BPF_K:
2105		case BPF_JMP | BPF_JGE | BPF_K:
2106		case BPF_JMP | BPF_JLE | BPF_K:
2107		case BPF_JMP32 | BPF_JEQ | BPF_K:
2108		case BPF_JMP32 | BPF_JNE | BPF_K:
2109		case BPF_JMP32 | BPF_JGT | BPF_K:
2110		case BPF_JMP32 | BPF_JLT | BPF_K:
2111		case BPF_JMP32 | BPF_JGE | BPF_K:
2112		case BPF_JMP32 | BPF_JLE | BPF_K:
2113		case BPF_JMP32 | BPF_JSGT | BPF_K:
2114		case BPF_JMP32 | BPF_JSLE | BPF_K:
2115		case BPF_JMP32 | BPF_JSLT | BPF_K:
2116		case BPF_JMP32 | BPF_JSGE | BPF_K: {
2117			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2118			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2119			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2120			u8 sreg_lo = IA32_ECX;
2121			u8 sreg_hi = IA32_EBX;
2122			u32 hi;
2123
2124			if (dstk) {
2125				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2126				      STACK_VAR(dst_lo));
2127				if (is_jmp64)
2128					EMIT3(0x8B,
2129					      add_2reg(0x40, IA32_EBP,
2130						       IA32_EDX),
2131					      STACK_VAR(dst_hi));
2132			}
2133
2134			/* mov ecx,imm32 */
2135			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2136			if (is_jmp64) {
2137				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2138				/* mov ebx,imm32 */
2139				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2140				/* cmp dreg_hi,sreg_hi */
2141				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2142				EMIT2(IA32_JNE, 2);
2143			}
2144			/* cmp dreg_lo,sreg_lo */
2145			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2146
2147emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2148			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2149				return -EFAULT;
2150			jmp_offset = addrs[i + insn->off] - addrs[i];
2151			if (is_imm8(jmp_offset)) {
2152				EMIT2(jmp_cond, jmp_offset);
2153			} else if (is_simm32(jmp_offset)) {
2154				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2155			} else {
2156				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2157				return -EFAULT;
2158			}
2159			break;
2160		}
2161		case BPF_JMP | BPF_JSGT | BPF_K:
2162		case BPF_JMP | BPF_JSLE | BPF_K:
2163		case BPF_JMP | BPF_JSLT | BPF_K:
2164		case BPF_JMP | BPF_JSGE | BPF_K: {
2165			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2166			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2167			u8 sreg_lo = IA32_ECX;
2168			u8 sreg_hi = IA32_EBX;
2169			u32 hi;
2170
2171			if (dstk) {
2172				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2173				      STACK_VAR(dst_lo));
2174				EMIT3(0x8B,
2175				      add_2reg(0x40, IA32_EBP,
2176					       IA32_EDX),
2177				      STACK_VAR(dst_hi));
2178			}
2179
2180			/* mov ecx,imm32 */
2181			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2182			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2183			/* mov ebx,imm32 */
2184			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2185			/* cmp dreg_hi,sreg_hi */
2186			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2187			EMIT2(IA32_JNE, 10);
2188			/* cmp dreg_lo,sreg_lo */
2189			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2190
2191			/*
2192			 * For simplicity of branch offset computation,
2193			 * let's use fixed jump coding here.
2194			 */
2195emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
2196			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
2197			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2198				return -EFAULT;
2199			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
2200			if (is_simm32(jmp_offset)) {
2201				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2202			} else {
2203				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2204				return -EFAULT;
2205			}
2206			EMIT2(0xEB, 6);
2207
2208			/* Check the condition for high 32-bit comparison */
2209			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2210			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2211				return -EFAULT;
2212			jmp_offset = addrs[i + insn->off] - addrs[i];
2213			if (is_simm32(jmp_offset)) {
2214				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2215			} else {
2216				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2217				return -EFAULT;
2218			}
2219			break;
2220		}
2221		case BPF_JMP | BPF_JA:
2222			if (insn->off == -1)
2223				/* -1 jmp instructions will always jump
2224				 * backwards two bytes. Explicitly handling
2225				 * this case avoids wasting too many passes
2226				 * when there are long sequences of replaced
2227				 * dead code.
2228				 */
2229				jmp_offset = -2;
2230			else
2231				jmp_offset = addrs[i + insn->off] - addrs[i];
2232
2233			if (!jmp_offset)
2234				/* Optimize out nop jumps */
2235				break;
2236emit_jmp:
2237			if (is_imm8(jmp_offset)) {
2238				EMIT2(0xEB, jmp_offset);
2239			} else if (is_simm32(jmp_offset)) {
2240				EMIT1_off32(0xE9, jmp_offset);
2241			} else {
2242				pr_err("jmp gen bug %llx\n", jmp_offset);
2243				return -EFAULT;
2244			}
2245			break;
2246		/* STX XADD: lock *(u32 *)(dst + off) += src */
2247		case BPF_STX | BPF_XADD | BPF_W:
2248		/* STX XADD: lock *(u64 *)(dst + off) += src */
2249		case BPF_STX | BPF_XADD | BPF_DW:
2250			goto notyet;
2251		case BPF_JMP | BPF_EXIT:
2252			if (seen_exit) {
2253				jmp_offset = ctx->cleanup_addr - addrs[i];
2254				goto emit_jmp;
2255			}
2256			seen_exit = true;
2257			/* Update cleanup_addr */
2258			ctx->cleanup_addr = proglen;
2259			emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2260			break;
2261notyet:
2262			pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2263			return -EFAULT;
2264		default:
2265			/*
2266			 * This error will be seen if new instruction was added
2267			 * to interpreter, but not to JIT or if there is junk in
2268			 * bpf_prog
2269			 */
2270			pr_err("bpf_jit: unknown opcode %02x\n", code);
2271			return -EINVAL;
2272		}
2273
2274		ilen = prog - temp;
2275		if (ilen > BPF_MAX_INSN_SIZE) {
2276			pr_err("bpf_jit: fatal insn size error\n");
2277			return -EFAULT;
2278		}
2279
2280		if (image) {
2281			if (unlikely(proglen + ilen > oldproglen)) {
2282				pr_err("bpf_jit: fatal error\n");
2283				return -EFAULT;
2284			}
2285			memcpy(image + proglen, temp, ilen);
2286		}
2287		proglen += ilen;
2288		addrs[i] = proglen;
2289		prog = temp;
2290	}
2291	return proglen;
2292}
2293
2294bool bpf_jit_needs_zext(void)
2295{
2296	return true;
2297}
2298
2299struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2300{
2301	struct bpf_binary_header *header = NULL;
2302	struct bpf_prog *tmp, *orig_prog = prog;
2303	int proglen, oldproglen = 0;
2304	struct jit_context ctx = {};
2305	bool tmp_blinded = false;
2306	u8 *image = NULL;
2307	int *addrs;
2308	int pass;
2309	int i;
2310
2311	if (!prog->jit_requested)
2312		return orig_prog;
2313
2314	tmp = bpf_jit_blind_constants(prog);
2315	/*
2316	 * If blinding was requested and we failed during blinding,
2317	 * we must fall back to the interpreter.
2318	 */
2319	if (IS_ERR(tmp))
2320		return orig_prog;
2321	if (tmp != prog) {
2322		tmp_blinded = true;
2323		prog = tmp;
2324	}
2325
2326	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
2327	if (!addrs) {
2328		prog = orig_prog;
2329		goto out;
2330	}
2331
2332	/*
2333	 * Before first pass, make a rough estimation of addrs[]
2334	 * each BPF instruction is translated to less than 64 bytes
2335	 */
2336	for (proglen = 0, i = 0; i < prog->len; i++) {
2337		proglen += 64;
2338		addrs[i] = proglen;
2339	}
2340	ctx.cleanup_addr = proglen;
2341
2342	/*
2343	 * JITed image shrinks with every pass and the loop iterates
2344	 * until the image stops shrinking. Very large BPF programs
2345	 * may converge on the last pass. In such case do one more
2346	 * pass to emit the final image.
2347	 */
2348	for (pass = 0; pass < 20 || image; pass++) {
2349		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2350		if (proglen <= 0) {
2351out_image:
2352			image = NULL;
2353			if (header)
2354				bpf_jit_binary_free(header);
2355			prog = orig_prog;
2356			goto out_addrs;
2357		}
2358		if (image) {
2359			if (proglen != oldproglen) {
2360				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2361				       proglen, oldproglen);
2362				goto out_image;
2363			}
2364			break;
2365		}
2366		if (proglen == oldproglen) {
2367			header = bpf_jit_binary_alloc(proglen, &image,
2368						      1, jit_fill_hole);
2369			if (!header) {
2370				prog = orig_prog;
2371				goto out_addrs;
2372			}
2373		}
2374		oldproglen = proglen;
2375		cond_resched();
2376	}
2377
2378	if (bpf_jit_enable > 1)
2379		bpf_jit_dump(prog->len, proglen, pass + 1, image);
2380
2381	if (image) {
2382		bpf_jit_binary_lock_ro(header);
2383		prog->bpf_func = (void *)image;
2384		prog->jited = 1;
2385		prog->jited_len = proglen;
2386	} else {
2387		prog = orig_prog;
2388	}
2389
2390out_addrs:
2391	kfree(addrs);
2392out:
2393	if (tmp_blinded)
2394		bpf_jit_prog_release_other(prog, prog == orig_prog ?
2395					   tmp : orig_prog);
2396	return prog;
2397}