Linux Audio

Check our new training course

Linux kernel drivers training

May 6-19, 2025
Register
Loading...
v4.17
 
   1/*
   2 * bpf_jit_comp64.c: eBPF JIT compiler
   3 *
   4 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
   5 *		  IBM Corporation
   6 *
   7 * Based on the powerpc classic BPF JIT compiler by Matt Evans
   8 *
   9 * This program is free software; you can redistribute it and/or
  10 * modify it under the terms of the GNU General Public License
  11 * as published by the Free Software Foundation; version 2
  12 * of the License.
  13 */
  14#include <linux/moduleloader.h>
  15#include <asm/cacheflush.h>
 
  16#include <linux/netdevice.h>
  17#include <linux/filter.h>
  18#include <linux/if_vlan.h>
  19#include <asm/kprobes.h>
  20#include <linux/bpf.h>
  21
  22#include "bpf_jit64.h"
  23
  24static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
  25{
  26	memset32(area, BREAKPOINT_INSTRUCTION, size/4);
  27}
  28
  29static inline void bpf_flush_icache(void *start, void *end)
  30{
  31	smp_wmb();
  32	flush_icache_range((unsigned long)start, (unsigned long)end);
  33}
  34
  35static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
  36{
  37	return (ctx->seen & (1 << (31 - b2p[i])));
  38}
  39
  40static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
  41{
  42	ctx->seen |= (1 << (31 - b2p[i]));
  43}
  44
  45static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
  46{
  47	/*
  48	 * We only need a stack frame if:
  49	 * - we call other functions (kernel helpers), or
  50	 * - the bpf program uses its stack area
  51	 * The latter condition is deduced from the usage of BPF_REG_FP
  52	 */
  53	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
  54}
  55
  56/*
  57 * When not setting up our own stackframe, the redzone usage is:
  58 *
  59 *		[	prev sp		] <-------------
  60 *		[	  ...       	] 		|
  61 * sp (r1) --->	[    stack pointer	] --------------
  62 *		[   nv gpr save area	] 8*8
  63 *		[    tail_call_cnt	] 8
  64 *		[    local_tmp_var	] 8
  65 *		[   unused red zone	] 208 bytes protected
  66 */
  67static int bpf_jit_stack_local(struct codegen_context *ctx)
  68{
  69	if (bpf_has_stack_frame(ctx))
  70		return STACK_FRAME_MIN_SIZE + ctx->stack_size;
  71	else
  72		return -(BPF_PPC_STACK_SAVE + 16);
  73}
  74
  75static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
  76{
  77	return bpf_jit_stack_local(ctx) + 8;
  78}
  79
  80static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
  81{
  82	if (reg >= BPF_PPC_NVR_MIN && reg < 32)
  83		return (bpf_has_stack_frame(ctx) ?
  84			(BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
  85				- (8 * (32 - reg));
  86
  87	pr_err("BPF JIT is asking about unknown registers");
  88	BUG();
  89}
  90
  91static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx)
  92{
  93	/*
  94	 * Load skb->len and skb->data_len
  95	 * r3 points to skb
  96	 */
  97	PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len));
  98	PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len));
  99	/* header_len = len - data_len */
 100	PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]);
 101
 102	/* skb->data pointer */
 103	PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data));
 104}
 105
 106static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
 107{
 108	int i;
 109
 110	/*
 111	 * Initialize tail_call_cnt if we do tail calls.
 112	 * Otherwise, put in NOPs so that it can be skipped when we are
 113	 * invoked through a tail call.
 114	 */
 115	if (ctx->seen & SEEN_TAILCALL) {
 116		PPC_LI(b2p[TMP_REG_1], 0);
 117		/* this goes in the redzone */
 118		PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
 119	} else {
 120		PPC_NOP();
 121		PPC_NOP();
 122	}
 123
 124#define BPF_TAILCALL_PROLOGUE_SIZE	8
 125
 126	if (bpf_has_stack_frame(ctx)) {
 127		/*
 128		 * We need a stack frame, but we don't necessarily need to
 129		 * save/restore LR unless we call other functions
 130		 */
 131		if (ctx->seen & SEEN_FUNC) {
 132			EMIT(PPC_INST_MFLR | __PPC_RT(R0));
 133			PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
 134		}
 135
 136		PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
 137	}
 138
 139	/*
 140	 * Back up non-volatile regs -- BPF registers 6-10
 141	 * If we haven't created our own stack frame, we save these
 142	 * in the protected zone below the previous stack frame
 143	 */
 144	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 145		if (bpf_is_seen_register(ctx, i))
 146			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 147
 148	/*
 149	 * Save additional non-volatile regs if we cache skb
 150	 * Also, setup skb data
 151	 */
 152	if (ctx->seen & SEEN_SKB) {
 153		PPC_BPF_STL(b2p[SKB_HLEN_REG], 1,
 154				bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
 155		PPC_BPF_STL(b2p[SKB_DATA_REG], 1,
 156				bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
 157		bpf_jit_emit_skb_loads(image, ctx);
 158	}
 159
 160	/* Setup frame pointer to point to the bpf stack area */
 161	if (bpf_is_seen_register(ctx, BPF_REG_FP))
 162		PPC_ADDI(b2p[BPF_REG_FP], 1,
 163				STACK_FRAME_MIN_SIZE + ctx->stack_size);
 164}
 165
 166static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
 167{
 168	int i;
 169
 170	/* Restore NVRs */
 171	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 172		if (bpf_is_seen_register(ctx, i))
 173			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 174
 175	/* Restore non-volatile registers used for skb cache */
 176	if (ctx->seen & SEEN_SKB) {
 177		PPC_BPF_LL(b2p[SKB_HLEN_REG], 1,
 178				bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG]));
 179		PPC_BPF_LL(b2p[SKB_DATA_REG], 1,
 180				bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG]));
 181	}
 182
 183	/* Tear down our stack frame */
 184	if (bpf_has_stack_frame(ctx)) {
 185		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
 186		if (ctx->seen & SEEN_FUNC) {
 187			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
 188			PPC_MTLR(0);
 189		}
 190	}
 191}
 192
 193static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 194{
 195	bpf_jit_emit_common_epilogue(image, ctx);
 196
 197	/* Move result to r3 */
 198	PPC_MR(3, b2p[BPF_REG_0]);
 199
 200	PPC_BLR();
 201}
 202
 203static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func)
 
 204{
 205#ifdef PPC64_ELF_ABI_v1
 206	/* func points to the function descriptor */
 207	PPC_LI64(b2p[TMP_REG_2], func);
 208	/* Load actual entry point from function descriptor */
 209	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
 210	/* ... and move it to LR */
 211	PPC_MTLR(b2p[TMP_REG_1]);
 212	/*
 213	 * Load TOC from function descriptor at offset 8.
 214	 * We can clobber r2 since we get called through a
 215	 * function pointer (so caller will save/restore r2)
 216	 * and since we don't use a TOC ourself.
 217	 */
 218	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
 219#else
 220	/* We can clobber r12 */
 221	PPC_FUNC_ADDR(12, func);
 222	PPC_MTLR(12);
 223#endif
 224	PPC_BLRL();
 225}
 226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 227static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
 228{
 229	/*
 230	 * By now, the eBPF program has already setup parameters in r3, r4 and r5
 231	 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
 232	 * r4/BPF_REG_2 - pointer to bpf_array
 233	 * r5/BPF_REG_3 - index in bpf_array
 234	 */
 235	int b2p_bpf_array = b2p[BPF_REG_2];
 236	int b2p_index = b2p[BPF_REG_3];
 237
 238	/*
 239	 * if (index >= array->map.max_entries)
 240	 *   goto out;
 241	 */
 242	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
 243	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
 244	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
 245	PPC_BCC(COND_GE, out);
 246
 247	/*
 248	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 249	 *   goto out;
 250	 */
 251	PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
 252	PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
 253	PPC_BCC(COND_GT, out);
 254
 255	/*
 256	 * tail_call_cnt++;
 257	 */
 258	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
 259	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
 260
 261	/* prog = array->ptrs[index]; */
 262	PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
 263	PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
 264	PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
 265
 266	/*
 267	 * if (prog == NULL)
 268	 *   goto out;
 269	 */
 270	PPC_CMPLDI(b2p[TMP_REG_1], 0);
 271	PPC_BCC(COND_EQ, out);
 272
 273	/* goto *(prog->bpf_func + prologue_size); */
 274	PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
 275#ifdef PPC64_ELF_ABI_v1
 276	/* skip past the function descriptor */
 277	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
 278			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
 279#else
 280	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
 281#endif
 282	PPC_MTCTR(b2p[TMP_REG_1]);
 283
 284	/* tear down stack, restore NVRs, ... */
 285	bpf_jit_emit_common_epilogue(image, ctx);
 286
 287	PPC_BCTR();
 288	/* out: */
 289}
 290
 291/* Assemble the body code between the prologue & epilogue */
 292static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 293			      struct codegen_context *ctx,
 294			      u32 *addrs)
 295{
 296	const struct bpf_insn *insn = fp->insnsi;
 297	int flen = fp->len;
 298	int i;
 299
 300	/* Start of epilogue code - will only be valid 2nd pass onwards */
 301	u32 exit_addr = addrs[flen];
 302
 303	for (i = 0; i < flen; i++) {
 304		u32 code = insn[i].code;
 305		u32 dst_reg = b2p[insn[i].dst_reg];
 306		u32 src_reg = b2p[insn[i].src_reg];
 307		s16 off = insn[i].off;
 308		s32 imm = insn[i].imm;
 
 
 309		u64 imm64;
 310		u8 *func;
 311		u32 true_cond;
 
 312
 313		/*
 314		 * addrs[] maps a BPF bytecode address into a real offset from
 315		 * the start of the body code.
 316		 */
 317		addrs[i] = ctx->idx * 4;
 318
 319		/*
 320		 * As an optimization, we note down which non-volatile registers
 321		 * are used so that we can only save/restore those in our
 322		 * prologue and epilogue. We do this here regardless of whether
 323		 * the actual BPF instruction uses src/dst registers or not
 324		 * (for instance, BPF_CALL does not use them). The expectation
 325		 * is that those instructions will have src_reg/dst_reg set to
 326		 * 0. Even otherwise, we just lose some prologue/epilogue
 327		 * optimization but everything else should work without
 328		 * any issues.
 329		 */
 330		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
 331			bpf_set_seen_register(ctx, insn[i].dst_reg);
 332		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
 333			bpf_set_seen_register(ctx, insn[i].src_reg);
 334
 335		switch (code) {
 336		/*
 337		 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
 338		 */
 339		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
 340		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
 341			PPC_ADD(dst_reg, dst_reg, src_reg);
 342			goto bpf_alu32_trunc;
 343		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
 344		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
 345			PPC_SUB(dst_reg, dst_reg, src_reg);
 346			goto bpf_alu32_trunc;
 347		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
 348		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
 349		case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
 350		case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
 351			if (BPF_OP(code) == BPF_SUB)
 352				imm = -imm;
 353			if (imm) {
 354				if (imm >= -32768 && imm < 32768)
 355					PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
 356				else {
 357					PPC_LI32(b2p[TMP_REG_1], imm);
 358					PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
 359				}
 360			}
 361			goto bpf_alu32_trunc;
 362		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
 363		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
 364			if (BPF_CLASS(code) == BPF_ALU)
 365				PPC_MULW(dst_reg, dst_reg, src_reg);
 366			else
 367				PPC_MULD(dst_reg, dst_reg, src_reg);
 368			goto bpf_alu32_trunc;
 369		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
 370		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
 371			if (imm >= -32768 && imm < 32768)
 372				PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
 373			else {
 374				PPC_LI32(b2p[TMP_REG_1], imm);
 375				if (BPF_CLASS(code) == BPF_ALU)
 376					PPC_MULW(dst_reg, dst_reg,
 377							b2p[TMP_REG_1]);
 378				else
 379					PPC_MULD(dst_reg, dst_reg,
 380							b2p[TMP_REG_1]);
 381			}
 382			goto bpf_alu32_trunc;
 383		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
 384		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
 385			if (BPF_OP(code) == BPF_MOD) {
 386				PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
 387				PPC_MULW(b2p[TMP_REG_1], src_reg,
 388						b2p[TMP_REG_1]);
 389				PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
 390			} else
 391				PPC_DIVWU(dst_reg, dst_reg, src_reg);
 392			goto bpf_alu32_trunc;
 393		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
 394		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
 395			if (BPF_OP(code) == BPF_MOD) {
 396				PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg);
 397				PPC_MULD(b2p[TMP_REG_1], src_reg,
 398						b2p[TMP_REG_1]);
 399				PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
 400			} else
 401				PPC_DIVD(dst_reg, dst_reg, src_reg);
 402			break;
 403		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
 404		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
 405		case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
 406		case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
 407			if (imm == 0)
 408				return -EINVAL;
 409			else if (imm == 1)
 410				goto bpf_alu32_trunc;
 411
 412			PPC_LI32(b2p[TMP_REG_1], imm);
 413			switch (BPF_CLASS(code)) {
 414			case BPF_ALU:
 415				if (BPF_OP(code) == BPF_MOD) {
 416					PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
 417							b2p[TMP_REG_1]);
 418					PPC_MULW(b2p[TMP_REG_1],
 419							b2p[TMP_REG_1],
 420							b2p[TMP_REG_2]);
 421					PPC_SUB(dst_reg, dst_reg,
 422							b2p[TMP_REG_1]);
 423				} else
 424					PPC_DIVWU(dst_reg, dst_reg,
 425							b2p[TMP_REG_1]);
 426				break;
 427			case BPF_ALU64:
 428				if (BPF_OP(code) == BPF_MOD) {
 429					PPC_DIVD(b2p[TMP_REG_2], dst_reg,
 430							b2p[TMP_REG_1]);
 431					PPC_MULD(b2p[TMP_REG_1],
 432							b2p[TMP_REG_1],
 433							b2p[TMP_REG_2]);
 434					PPC_SUB(dst_reg, dst_reg,
 435							b2p[TMP_REG_1]);
 436				} else
 437					PPC_DIVD(dst_reg, dst_reg,
 438							b2p[TMP_REG_1]);
 439				break;
 440			}
 441			goto bpf_alu32_trunc;
 442		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
 443		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
 444			PPC_NEG(dst_reg, dst_reg);
 445			goto bpf_alu32_trunc;
 446
 447		/*
 448		 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
 449		 */
 450		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
 451		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
 452			PPC_AND(dst_reg, dst_reg, src_reg);
 453			goto bpf_alu32_trunc;
 454		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
 455		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
 456			if (!IMM_H(imm))
 457				PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
 458			else {
 459				/* Sign-extended */
 460				PPC_LI32(b2p[TMP_REG_1], imm);
 461				PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
 462			}
 463			goto bpf_alu32_trunc;
 464		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
 465		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
 466			PPC_OR(dst_reg, dst_reg, src_reg);
 467			goto bpf_alu32_trunc;
 468		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
 469		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
 470			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 471				/* Sign-extended */
 472				PPC_LI32(b2p[TMP_REG_1], imm);
 473				PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
 474			} else {
 475				if (IMM_L(imm))
 476					PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
 477				if (IMM_H(imm))
 478					PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
 479			}
 480			goto bpf_alu32_trunc;
 481		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
 482		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
 483			PPC_XOR(dst_reg, dst_reg, src_reg);
 484			goto bpf_alu32_trunc;
 485		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
 486		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
 487			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 488				/* Sign-extended */
 489				PPC_LI32(b2p[TMP_REG_1], imm);
 490				PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
 491			} else {
 492				if (IMM_L(imm))
 493					PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
 494				if (IMM_H(imm))
 495					PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
 496			}
 497			goto bpf_alu32_trunc;
 498		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
 499			/* slw clears top 32 bits */
 500			PPC_SLW(dst_reg, dst_reg, src_reg);
 
 
 
 501			break;
 502		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
 503			PPC_SLD(dst_reg, dst_reg, src_reg);
 504			break;
 505		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
 506			/* with imm 0, we still need to clear top 32 bits */
 507			PPC_SLWI(dst_reg, dst_reg, imm);
 
 
 508			break;
 509		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
 510			if (imm != 0)
 511				PPC_SLDI(dst_reg, dst_reg, imm);
 512			break;
 513		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
 514			PPC_SRW(dst_reg, dst_reg, src_reg);
 
 
 515			break;
 516		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
 517			PPC_SRD(dst_reg, dst_reg, src_reg);
 518			break;
 519		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
 520			PPC_SRWI(dst_reg, dst_reg, imm);
 
 
 521			break;
 522		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
 523			if (imm != 0)
 524				PPC_SRDI(dst_reg, dst_reg, imm);
 525			break;
 
 
 
 526		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
 527			PPC_SRAD(dst_reg, dst_reg, src_reg);
 528			break;
 
 
 
 529		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
 530			if (imm != 0)
 531				PPC_SRADI(dst_reg, dst_reg, imm);
 532			break;
 533
 534		/*
 535		 * MOV
 536		 */
 537		case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
 538		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
 
 
 
 
 
 539			PPC_MR(dst_reg, src_reg);
 540			goto bpf_alu32_trunc;
 541		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
 542		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
 543			PPC_LI32(dst_reg, imm);
 544			if (imm < 0)
 545				goto bpf_alu32_trunc;
 
 
 546			break;
 547
 548bpf_alu32_trunc:
 549		/* Truncate to 32-bits */
 550		if (BPF_CLASS(code) == BPF_ALU)
 551			PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
 552		break;
 553
 554		/*
 555		 * BPF_FROM_BE/LE
 556		 */
 557		case BPF_ALU | BPF_END | BPF_FROM_LE:
 558		case BPF_ALU | BPF_END | BPF_FROM_BE:
 559#ifdef __BIG_ENDIAN__
 560			if (BPF_SRC(code) == BPF_FROM_BE)
 561				goto emit_clear;
 562#else /* !__BIG_ENDIAN__ */
 563			if (BPF_SRC(code) == BPF_FROM_LE)
 564				goto emit_clear;
 565#endif
 566			switch (imm) {
 567			case 16:
 568				/* Rotate 8 bits left & mask with 0x0000ff00 */
 569				PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
 570				/* Rotate 8 bits right & insert LSB to reg */
 571				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
 572				/* Move result back to dst_reg */
 573				PPC_MR(dst_reg, b2p[TMP_REG_1]);
 574				break;
 575			case 32:
 576				/*
 577				 * Rotate word left by 8 bits:
 578				 * 2 bytes are already in their final position
 579				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
 580				 */
 581				PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
 582				/* Rotate 24 bits and insert byte 1 */
 583				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
 584				/* Rotate 24 bits and insert byte 3 */
 585				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
 586				PPC_MR(dst_reg, b2p[TMP_REG_1]);
 587				break;
 588			case 64:
 589				/*
 590				 * Way easier and faster(?) to store the value
 591				 * into stack and then use ldbrx
 592				 *
 593				 * ctx->seen will be reliable in pass2, but
 594				 * the instructions generated will remain the
 595				 * same across all passes
 596				 */
 597				PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx));
 598				PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
 599				PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
 600				break;
 601			}
 602			break;
 603
 604emit_clear:
 605			switch (imm) {
 606			case 16:
 607				/* zero-extend 16 bits into 64 bits */
 608				PPC_RLDICL(dst_reg, dst_reg, 0, 48);
 
 
 609				break;
 610			case 32:
 611				/* zero-extend 32 bits into 64 bits */
 612				PPC_RLDICL(dst_reg, dst_reg, 0, 32);
 
 613				break;
 614			case 64:
 615				/* nop */
 616				break;
 617			}
 618			break;
 619
 620		/*
 621		 * BPF_ST(X)
 622		 */
 623		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
 624		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
 625			if (BPF_CLASS(code) == BPF_ST) {
 626				PPC_LI(b2p[TMP_REG_1], imm);
 627				src_reg = b2p[TMP_REG_1];
 628			}
 629			PPC_STB(src_reg, dst_reg, off);
 630			break;
 631		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
 632		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
 633			if (BPF_CLASS(code) == BPF_ST) {
 634				PPC_LI(b2p[TMP_REG_1], imm);
 635				src_reg = b2p[TMP_REG_1];
 636			}
 637			PPC_STH(src_reg, dst_reg, off);
 638			break;
 639		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
 640		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
 641			if (BPF_CLASS(code) == BPF_ST) {
 642				PPC_LI32(b2p[TMP_REG_1], imm);
 643				src_reg = b2p[TMP_REG_1];
 644			}
 645			PPC_STW(src_reg, dst_reg, off);
 646			break;
 647		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
 648		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
 649			if (BPF_CLASS(code) == BPF_ST) {
 650				PPC_LI32(b2p[TMP_REG_1], imm);
 651				src_reg = b2p[TMP_REG_1];
 652			}
 653			PPC_STD(src_reg, dst_reg, off);
 654			break;
 655
 656		/*
 657		 * BPF_STX XADD (atomic_add)
 658		 */
 659		/* *(u32 *)(dst + off) += src */
 660		case BPF_STX | BPF_XADD | BPF_W:
 661			/* Get EA into TMP_REG_1 */
 662			PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
 663			/* error if EA is not word-aligned */
 664			PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03);
 665			PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12);
 666			PPC_LI(b2p[BPF_REG_0], 0);
 667			PPC_JMP(exit_addr);
 668			/* load value from memory into TMP_REG_2 */
 669			PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
 670			/* add value from src_reg into this */
 671			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
 672			/* store result back */
 673			PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
 674			/* we're done if this succeeded */
 675			PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4));
 676			/* otherwise, let's try once more */
 677			PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
 678			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
 679			PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
 680			/* exit if the store was not successful */
 681			PPC_LI(b2p[BPF_REG_0], 0);
 682			PPC_BCC(COND_NE, exit_addr);
 683			break;
 684		/* *(u64 *)(dst + off) += src */
 685		case BPF_STX | BPF_XADD | BPF_DW:
 686			PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
 687			/* error if EA is not doubleword-aligned */
 688			PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07);
 689			PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4));
 690			PPC_LI(b2p[BPF_REG_0], 0);
 691			PPC_JMP(exit_addr);
 692			PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
 693			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
 694			PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
 695			PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4));
 696			PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
 697			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
 698			PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
 699			PPC_LI(b2p[BPF_REG_0], 0);
 700			PPC_BCC(COND_NE, exit_addr);
 701			break;
 702
 703		/*
 704		 * BPF_LDX
 705		 */
 706		/* dst = *(u8 *)(ul) (src + off) */
 707		case BPF_LDX | BPF_MEM | BPF_B:
 708			PPC_LBZ(dst_reg, src_reg, off);
 
 
 709			break;
 710		/* dst = *(u16 *)(ul) (src + off) */
 711		case BPF_LDX | BPF_MEM | BPF_H:
 712			PPC_LHZ(dst_reg, src_reg, off);
 
 
 713			break;
 714		/* dst = *(u32 *)(ul) (src + off) */
 715		case BPF_LDX | BPF_MEM | BPF_W:
 716			PPC_LWZ(dst_reg, src_reg, off);
 
 
 717			break;
 718		/* dst = *(u64 *)(ul) (src + off) */
 719		case BPF_LDX | BPF_MEM | BPF_DW:
 720			PPC_LD(dst_reg, src_reg, off);
 721			break;
 722
 723		/*
 724		 * Doubleword load
 725		 * 16 byte instruction that uses two 'struct bpf_insn'
 726		 */
 727		case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
 728			imm64 = ((u64)(u32) insn[i].imm) |
 729				    (((u64)(u32) insn[i+1].imm) << 32);
 730			/* Adjust for two bpf instructions */
 731			addrs[++i] = ctx->idx * 4;
 732			PPC_LI64(dst_reg, imm64);
 733			break;
 734
 735		/*
 736		 * Return/Exit
 737		 */
 738		case BPF_JMP | BPF_EXIT:
 739			/*
 740			 * If this isn't the very last instruction, branch to
 741			 * the epilogue. If we _are_ the last instruction,
 742			 * we'll just fall through to the epilogue.
 743			 */
 744			if (i != flen - 1)
 745				PPC_JMP(exit_addr);
 746			/* else fall through to the epilogue */
 747			break;
 748
 749		/*
 750		 * Call kernel helper
 751		 */
 752		case BPF_JMP | BPF_CALL:
 753			ctx->seen |= SEEN_FUNC;
 754			func = (u8 *) __bpf_call_base + imm;
 755
 756			/* Save skb pointer if we need to re-cache skb data */
 757			if ((ctx->seen & SEEN_SKB) &&
 758			    bpf_helper_changes_pkt_data(func))
 759				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 760
 761			bpf_jit_emit_func_call(image, ctx, (u64)func);
 762
 
 
 
 
 763			/* move return value from r3 to BPF_REG_0 */
 764			PPC_MR(b2p[BPF_REG_0], 3);
 765
 766			/* refresh skb cache */
 767			if ((ctx->seen & SEEN_SKB) &&
 768			    bpf_helper_changes_pkt_data(func)) {
 769				/* reload skb pointer to r3 */
 770				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
 771				bpf_jit_emit_skb_loads(image, ctx);
 772			}
 773			break;
 774
 775		/*
 776		 * Jumps and branches
 777		 */
 778		case BPF_JMP | BPF_JA:
 779			PPC_JMP(addrs[i + 1 + off]);
 780			break;
 781
 782		case BPF_JMP | BPF_JGT | BPF_K:
 783		case BPF_JMP | BPF_JGT | BPF_X:
 784		case BPF_JMP | BPF_JSGT | BPF_K:
 785		case BPF_JMP | BPF_JSGT | BPF_X:
 
 
 
 
 786			true_cond = COND_GT;
 787			goto cond_branch;
 788		case BPF_JMP | BPF_JLT | BPF_K:
 789		case BPF_JMP | BPF_JLT | BPF_X:
 790		case BPF_JMP | BPF_JSLT | BPF_K:
 791		case BPF_JMP | BPF_JSLT | BPF_X:
 
 
 
 
 792			true_cond = COND_LT;
 793			goto cond_branch;
 794		case BPF_JMP | BPF_JGE | BPF_K:
 795		case BPF_JMP | BPF_JGE | BPF_X:
 796		case BPF_JMP | BPF_JSGE | BPF_K:
 797		case BPF_JMP | BPF_JSGE | BPF_X:
 
 
 
 
 798			true_cond = COND_GE;
 799			goto cond_branch;
 800		case BPF_JMP | BPF_JLE | BPF_K:
 801		case BPF_JMP | BPF_JLE | BPF_X:
 802		case BPF_JMP | BPF_JSLE | BPF_K:
 803		case BPF_JMP | BPF_JSLE | BPF_X:
 
 
 
 
 804			true_cond = COND_LE;
 805			goto cond_branch;
 806		case BPF_JMP | BPF_JEQ | BPF_K:
 807		case BPF_JMP | BPF_JEQ | BPF_X:
 
 
 808			true_cond = COND_EQ;
 809			goto cond_branch;
 810		case BPF_JMP | BPF_JNE | BPF_K:
 811		case BPF_JMP | BPF_JNE | BPF_X:
 
 
 812			true_cond = COND_NE;
 813			goto cond_branch;
 814		case BPF_JMP | BPF_JSET | BPF_K:
 815		case BPF_JMP | BPF_JSET | BPF_X:
 
 
 816			true_cond = COND_NE;
 817			/* Fall through */
 818
 819cond_branch:
 820			switch (code) {
 821			case BPF_JMP | BPF_JGT | BPF_X:
 822			case BPF_JMP | BPF_JLT | BPF_X:
 823			case BPF_JMP | BPF_JGE | BPF_X:
 824			case BPF_JMP | BPF_JLE | BPF_X:
 825			case BPF_JMP | BPF_JEQ | BPF_X:
 826			case BPF_JMP | BPF_JNE | BPF_X:
 
 
 
 
 
 
 827				/* unsigned comparison */
 828				PPC_CMPLD(dst_reg, src_reg);
 
 
 
 829				break;
 830			case BPF_JMP | BPF_JSGT | BPF_X:
 831			case BPF_JMP | BPF_JSLT | BPF_X:
 832			case BPF_JMP | BPF_JSGE | BPF_X:
 833			case BPF_JMP | BPF_JSLE | BPF_X:
 
 
 
 
 834				/* signed comparison */
 835				PPC_CMPD(dst_reg, src_reg);
 
 
 
 836				break;
 837			case BPF_JMP | BPF_JSET | BPF_X:
 838				PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg);
 
 
 
 
 
 
 
 
 
 
 839				break;
 840			case BPF_JMP | BPF_JNE | BPF_K:
 841			case BPF_JMP | BPF_JEQ | BPF_K:
 842			case BPF_JMP | BPF_JGT | BPF_K:
 843			case BPF_JMP | BPF_JLT | BPF_K:
 844			case BPF_JMP | BPF_JGE | BPF_K:
 845			case BPF_JMP | BPF_JLE | BPF_K:
 
 
 
 
 
 
 
 
 
 846				/*
 847				 * Need sign-extended load, so only positive
 848				 * values can be used as imm in cmpldi
 849				 */
 850				if (imm >= 0 && imm < 32768)
 851					PPC_CMPLDI(dst_reg, imm);
 852				else {
 
 
 
 853					/* sign-extending load */
 854					PPC_LI32(b2p[TMP_REG_1], imm);
 855					/* ... but unsigned comparison */
 856					PPC_CMPLD(dst_reg, b2p[TMP_REG_1]);
 
 
 
 
 
 857				}
 858				break;
 
 859			case BPF_JMP | BPF_JSGT | BPF_K:
 860			case BPF_JMP | BPF_JSLT | BPF_K:
 861			case BPF_JMP | BPF_JSGE | BPF_K:
 862			case BPF_JMP | BPF_JSLE | BPF_K:
 
 
 
 
 
 
 
 863				/*
 864				 * signed comparison, so any 16-bit value
 865				 * can be used in cmpdi
 866				 */
 867				if (imm >= -32768 && imm < 32768)
 868					PPC_CMPDI(dst_reg, imm);
 869				else {
 
 
 
 870					PPC_LI32(b2p[TMP_REG_1], imm);
 871					PPC_CMPD(dst_reg, b2p[TMP_REG_1]);
 
 
 
 
 
 872				}
 873				break;
 
 874			case BPF_JMP | BPF_JSET | BPF_K:
 
 875				/* andi does not sign-extend the immediate */
 876				if (imm >= 0 && imm < 32768)
 877					/* PPC_ANDI is _only/always_ dot-form */
 878					PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
 879				else {
 880					PPC_LI32(b2p[TMP_REG_1], imm);
 881					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
 882						    b2p[TMP_REG_1]);
 
 
 
 
 
 
 
 
 
 883				}
 884				break;
 885			}
 886			PPC_BCC(true_cond, addrs[i + 1 + off]);
 887			break;
 888
 889		/*
 890		 * Loads from packet header/data
 891		 * Assume 32-bit input value in imm and X (src_reg)
 892		 */
 893
 894		/* Absolute loads */
 895		case BPF_LD | BPF_W | BPF_ABS:
 896			func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word);
 897			goto common_load_abs;
 898		case BPF_LD | BPF_H | BPF_ABS:
 899			func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half);
 900			goto common_load_abs;
 901		case BPF_LD | BPF_B | BPF_ABS:
 902			func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte);
 903common_load_abs:
 904			/*
 905			 * Load from [imm]
 906			 * Load into r4, which can just be passed onto
 907			 *  skb load helpers as the second parameter
 908			 */
 909			PPC_LI32(4, imm);
 910			goto common_load;
 911
 912		/* Indirect loads */
 913		case BPF_LD | BPF_W | BPF_IND:
 914			func = (u8 *)sk_load_word;
 915			goto common_load_ind;
 916		case BPF_LD | BPF_H | BPF_IND:
 917			func = (u8 *)sk_load_half;
 918			goto common_load_ind;
 919		case BPF_LD | BPF_B | BPF_IND:
 920			func = (u8 *)sk_load_byte;
 921common_load_ind:
 922			/*
 923			 * Load from [src_reg + imm]
 924			 * Treat src_reg as a 32-bit value
 925			 */
 926			PPC_EXTSW(4, src_reg);
 927			if (imm) {
 928				if (imm >= -32768 && imm < 32768)
 929					PPC_ADDI(4, 4, IMM_L(imm));
 930				else {
 931					PPC_LI32(b2p[TMP_REG_1], imm);
 932					PPC_ADD(4, 4, b2p[TMP_REG_1]);
 933				}
 934			}
 935
 936common_load:
 937			ctx->seen |= SEEN_SKB;
 938			ctx->seen |= SEEN_FUNC;
 939			bpf_jit_emit_func_call(image, ctx, (u64)func);
 940
 941			/*
 942			 * Helper returns 'lt' condition on error, and an
 943			 * appropriate return value in BPF_REG_0
 944			 */
 945			PPC_BCC(COND_LT, exit_addr);
 946			break;
 947
 948		/*
 949		 * Tail call
 950		 */
 951		case BPF_JMP | BPF_TAIL_CALL:
 952			ctx->seen |= SEEN_TAILCALL;
 953			bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
 954			break;
 955
 956		default:
 957			/*
 958			 * The filter contains something cruel & unusual.
 959			 * We don't handle it, but also there shouldn't be
 960			 * anything missing from our list.
 961			 */
 962			pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
 963					code, i);
 964			return -ENOTSUPP;
 965		}
 966	}
 967
 968	/* Set end-of-body-code address for exit. */
 969	addrs[i] = ctx->idx * 4;
 970
 971	return 0;
 972}
 973
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 974struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 975{
 976	u32 proglen;
 977	u32 alloclen;
 978	u8 *image = NULL;
 979	u32 *code_base;
 980	u32 *addrs;
 
 981	struct codegen_context cgctx;
 982	int pass;
 983	int flen;
 984	struct bpf_binary_header *bpf_hdr;
 985	struct bpf_prog *org_fp = fp;
 986	struct bpf_prog *tmp_fp;
 987	bool bpf_blinded = false;
 
 988
 989	if (!fp->jit_requested)
 990		return org_fp;
 991
 992	tmp_fp = bpf_jit_blind_constants(org_fp);
 993	if (IS_ERR(tmp_fp))
 994		return org_fp;
 995
 996	if (tmp_fp != org_fp) {
 997		bpf_blinded = true;
 998		fp = tmp_fp;
 999	}
1000
 
 
 
 
 
 
 
 
 
 
1001	flen = fp->len;
1002	addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL);
 
 
 
 
 
 
 
 
 
 
 
1003	if (addrs == NULL) {
1004		fp = org_fp;
1005		goto out;
1006	}
1007
1008	memset(&cgctx, 0, sizeof(struct codegen_context));
1009
1010	/* Make sure that the stack is quadword aligned. */
1011	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
1012
1013	/* Scouting faux-generate pass 0 */
1014	if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) {
1015		/* We hit something illegal or unsupported. */
1016		fp = org_fp;
1017		goto out;
 
 
 
 
 
 
 
 
 
 
 
 
 
1018	}
1019
1020	/*
1021	 * Pretend to build prologue, given the features we've seen.  This will
1022	 * update ctgtx.idx as it pretends to output instructions, then we can
1023	 * calculate total size from idx.
1024	 */
1025	bpf_jit_build_prologue(0, &cgctx);
1026	bpf_jit_build_epilogue(0, &cgctx);
1027
1028	proglen = cgctx.idx * 4;
1029	alloclen = proglen + FUNCTION_DESCR_SIZE;
1030
1031	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
1032			bpf_jit_fill_ill_insns);
1033	if (!bpf_hdr) {
1034		fp = org_fp;
1035		goto out;
1036	}
1037
 
1038	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
1039
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1040	/* Code generation passes 1-2 */
1041	for (pass = 1; pass < 3; pass++) {
1042		/* Now build the prologue, body code & epilogue for real. */
1043		cgctx.idx = 0;
1044		bpf_jit_build_prologue(code_base, &cgctx);
1045		bpf_jit_build_body(fp, code_base, &cgctx, addrs);
1046		bpf_jit_build_epilogue(code_base, &cgctx);
1047
1048		if (bpf_jit_enable > 1)
1049			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
1050				proglen - (cgctx.idx * 4), cgctx.seen);
1051	}
1052
 
1053	if (bpf_jit_enable > 1)
1054		/*
1055		 * Note that we output the base address of the code_base
1056		 * rather than image, since opcodes are in code_base.
1057		 */
1058		bpf_jit_dump(flen, proglen, pass, code_base);
1059
1060#ifdef PPC64_ELF_ABI_v1
1061	/* Function descriptor nastiness: Address + TOC */
1062	((u64 *)image)[0] = (u64)code_base;
1063	((u64 *)image)[1] = local_paca->kernel_toc;
1064#endif
1065
1066	fp->bpf_func = (void *)image;
1067	fp->jited = 1;
1068	fp->jited_len = alloclen;
1069
1070	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
 
 
 
 
 
 
 
 
 
 
 
 
 
1071
1072out:
1073	kfree(addrs);
1074
1075	if (bpf_blinded)
1076		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
1077
1078	return fp;
1079}
1080
1081/* Overriding bpf_jit_free() as we don't set images read-only. */
1082void bpf_jit_free(struct bpf_prog *fp)
1083{
1084	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1085	struct bpf_binary_header *bpf_hdr = (void *)addr;
1086
1087	if (fp->jited)
1088		bpf_jit_binary_free(bpf_hdr);
1089
1090	bpf_prog_unlock_free(fp);
1091}
v5.4
   1// SPDX-License-Identifier: GPL-2.0-only
   2/*
   3 * bpf_jit_comp64.c: eBPF JIT compiler
   4 *
   5 * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
   6 *		  IBM Corporation
   7 *
   8 * Based on the powerpc classic BPF JIT compiler by Matt Evans
 
 
 
 
 
   9 */
  10#include <linux/moduleloader.h>
  11#include <asm/cacheflush.h>
  12#include <asm/asm-compat.h>
  13#include <linux/netdevice.h>
  14#include <linux/filter.h>
  15#include <linux/if_vlan.h>
  16#include <asm/kprobes.h>
  17#include <linux/bpf.h>
  18
  19#include "bpf_jit64.h"
  20
  21static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
  22{
  23	memset32(area, BREAKPOINT_INSTRUCTION, size/4);
  24}
  25
  26static inline void bpf_flush_icache(void *start, void *end)
  27{
  28	smp_wmb();
  29	flush_icache_range((unsigned long)start, (unsigned long)end);
  30}
  31
  32static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
  33{
  34	return (ctx->seen & (1 << (31 - b2p[i])));
  35}
  36
  37static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
  38{
  39	ctx->seen |= (1 << (31 - b2p[i]));
  40}
  41
  42static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
  43{
  44	/*
  45	 * We only need a stack frame if:
  46	 * - we call other functions (kernel helpers), or
  47	 * - the bpf program uses its stack area
  48	 * The latter condition is deduced from the usage of BPF_REG_FP
  49	 */
  50	return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
  51}
  52
  53/*
  54 * When not setting up our own stackframe, the redzone usage is:
  55 *
  56 *		[	prev sp		] <-------------
  57 *		[	  ...       	] 		|
  58 * sp (r1) --->	[    stack pointer	] --------------
  59 *		[   nv gpr save area	] 6*8
  60 *		[    tail_call_cnt	] 8
  61 *		[    local_tmp_var	] 8
  62 *		[   unused red zone	] 208 bytes protected
  63 */
  64static int bpf_jit_stack_local(struct codegen_context *ctx)
  65{
  66	if (bpf_has_stack_frame(ctx))
  67		return STACK_FRAME_MIN_SIZE + ctx->stack_size;
  68	else
  69		return -(BPF_PPC_STACK_SAVE + 16);
  70}
  71
  72static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx)
  73{
  74	return bpf_jit_stack_local(ctx) + 8;
  75}
  76
  77static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg)
  78{
  79	if (reg >= BPF_PPC_NVR_MIN && reg < 32)
  80		return (bpf_has_stack_frame(ctx) ?
  81			(BPF_PPC_STACKFRAME + ctx->stack_size) : 0)
  82				- (8 * (32 - reg));
  83
  84	pr_err("BPF JIT is asking about unknown registers");
  85	BUG();
  86}
  87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  88static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx)
  89{
  90	int i;
  91
  92	/*
  93	 * Initialize tail_call_cnt if we do tail calls.
  94	 * Otherwise, put in NOPs so that it can be skipped when we are
  95	 * invoked through a tail call.
  96	 */
  97	if (ctx->seen & SEEN_TAILCALL) {
  98		PPC_LI(b2p[TMP_REG_1], 0);
  99		/* this goes in the redzone */
 100		PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
 101	} else {
 102		PPC_NOP();
 103		PPC_NOP();
 104	}
 105
 106#define BPF_TAILCALL_PROLOGUE_SIZE	8
 107
 108	if (bpf_has_stack_frame(ctx)) {
 109		/*
 110		 * We need a stack frame, but we don't necessarily need to
 111		 * save/restore LR unless we call other functions
 112		 */
 113		if (ctx->seen & SEEN_FUNC) {
 114			EMIT(PPC_INST_MFLR | __PPC_RT(R0));
 115			PPC_BPF_STL(0, 1, PPC_LR_STKOFF);
 116		}
 117
 118		PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size));
 119	}
 120
 121	/*
 122	 * Back up non-volatile regs -- BPF registers 6-10
 123	 * If we haven't created our own stack frame, we save these
 124	 * in the protected zone below the previous stack frame
 125	 */
 126	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 127		if (bpf_is_seen_register(ctx, i))
 128			PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 129
 
 
 
 
 
 
 
 
 
 
 
 
 130	/* Setup frame pointer to point to the bpf stack area */
 131	if (bpf_is_seen_register(ctx, BPF_REG_FP))
 132		PPC_ADDI(b2p[BPF_REG_FP], 1,
 133				STACK_FRAME_MIN_SIZE + ctx->stack_size);
 134}
 135
 136static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
 137{
 138	int i;
 139
 140	/* Restore NVRs */
 141	for (i = BPF_REG_6; i <= BPF_REG_10; i++)
 142		if (bpf_is_seen_register(ctx, i))
 143			PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i]));
 144
 
 
 
 
 
 
 
 
 145	/* Tear down our stack frame */
 146	if (bpf_has_stack_frame(ctx)) {
 147		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
 148		if (ctx->seen & SEEN_FUNC) {
 149			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
 150			PPC_MTLR(0);
 151		}
 152	}
 153}
 154
 155static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
 156{
 157	bpf_jit_emit_common_epilogue(image, ctx);
 158
 159	/* Move result to r3 */
 160	PPC_MR(3, b2p[BPF_REG_0]);
 161
 162	PPC_BLR();
 163}
 164
 165static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
 166				       u64 func)
 167{
 168#ifdef PPC64_ELF_ABI_v1
 169	/* func points to the function descriptor */
 170	PPC_LI64(b2p[TMP_REG_2], func);
 171	/* Load actual entry point from function descriptor */
 172	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
 173	/* ... and move it to LR */
 174	PPC_MTLR(b2p[TMP_REG_1]);
 175	/*
 176	 * Load TOC from function descriptor at offset 8.
 177	 * We can clobber r2 since we get called through a
 178	 * function pointer (so caller will save/restore r2)
 179	 * and since we don't use a TOC ourself.
 180	 */
 181	PPC_BPF_LL(2, b2p[TMP_REG_2], 8);
 182#else
 183	/* We can clobber r12 */
 184	PPC_FUNC_ADDR(12, func);
 185	PPC_MTLR(12);
 186#endif
 187	PPC_BLRL();
 188}
 189
 190static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
 191				       u64 func)
 192{
 193	unsigned int i, ctx_idx = ctx->idx;
 194
 195	/* Load function address into r12 */
 196	PPC_LI64(12, func);
 197
 198	/* For bpf-to-bpf function calls, the callee's address is unknown
 199	 * until the last extra pass. As seen above, we use PPC_LI64() to
 200	 * load the callee's address, but this may optimize the number of
 201	 * instructions required based on the nature of the address.
 202	 *
 203	 * Since we don't want the number of instructions emitted to change,
 204	 * we pad the optimized PPC_LI64() call with NOPs to guarantee that
 205	 * we always have a five-instruction sequence, which is the maximum
 206	 * that PPC_LI64() can emit.
 207	 */
 208	for (i = ctx->idx - ctx_idx; i < 5; i++)
 209		PPC_NOP();
 210
 211#ifdef PPC64_ELF_ABI_v1
 212	/*
 213	 * Load TOC from function descriptor at offset 8.
 214	 * We can clobber r2 since we get called through a
 215	 * function pointer (so caller will save/restore r2)
 216	 * and since we don't use a TOC ourself.
 217	 */
 218	PPC_BPF_LL(2, 12, 8);
 219	/* Load actual entry point from function descriptor */
 220	PPC_BPF_LL(12, 12, 0);
 221#endif
 222
 223	PPC_MTLR(12);
 224	PPC_BLRL();
 225}
 226
 227static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
 228{
 229	/*
 230	 * By now, the eBPF program has already setup parameters in r3, r4 and r5
 231	 * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program
 232	 * r4/BPF_REG_2 - pointer to bpf_array
 233	 * r5/BPF_REG_3 - index in bpf_array
 234	 */
 235	int b2p_bpf_array = b2p[BPF_REG_2];
 236	int b2p_index = b2p[BPF_REG_3];
 237
 238	/*
 239	 * if (index >= array->map.max_entries)
 240	 *   goto out;
 241	 */
 242	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
 243	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
 244	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
 245	PPC_BCC(COND_GE, out);
 246
 247	/*
 248	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
 249	 *   goto out;
 250	 */
 251	PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
 252	PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
 253	PPC_BCC(COND_GT, out);
 254
 255	/*
 256	 * tail_call_cnt++;
 257	 */
 258	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
 259	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
 260
 261	/* prog = array->ptrs[index]; */
 262	PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
 263	PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
 264	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
 265
 266	/*
 267	 * if (prog == NULL)
 268	 *   goto out;
 269	 */
 270	PPC_CMPLDI(b2p[TMP_REG_1], 0);
 271	PPC_BCC(COND_EQ, out);
 272
 273	/* goto *(prog->bpf_func + prologue_size); */
 274	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
 275#ifdef PPC64_ELF_ABI_v1
 276	/* skip past the function descriptor */
 277	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
 278			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
 279#else
 280	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
 281#endif
 282	PPC_MTCTR(b2p[TMP_REG_1]);
 283
 284	/* tear down stack, restore NVRs, ... */
 285	bpf_jit_emit_common_epilogue(image, ctx);
 286
 287	PPC_BCTR();
 288	/* out: */
 289}
 290
 291/* Assemble the body code between the prologue & epilogue */
 292static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 293			      struct codegen_context *ctx,
 294			      u32 *addrs, bool extra_pass)
 295{
 296	const struct bpf_insn *insn = fp->insnsi;
 297	int flen = fp->len;
 298	int i, ret;
 299
 300	/* Start of epilogue code - will only be valid 2nd pass onwards */
 301	u32 exit_addr = addrs[flen];
 302
 303	for (i = 0; i < flen; i++) {
 304		u32 code = insn[i].code;
 305		u32 dst_reg = b2p[insn[i].dst_reg];
 306		u32 src_reg = b2p[insn[i].src_reg];
 307		s16 off = insn[i].off;
 308		s32 imm = insn[i].imm;
 309		bool func_addr_fixed;
 310		u64 func_addr;
 311		u64 imm64;
 
 312		u32 true_cond;
 313		u32 tmp_idx;
 314
 315		/*
 316		 * addrs[] maps a BPF bytecode address into a real offset from
 317		 * the start of the body code.
 318		 */
 319		addrs[i] = ctx->idx * 4;
 320
 321		/*
 322		 * As an optimization, we note down which non-volatile registers
 323		 * are used so that we can only save/restore those in our
 324		 * prologue and epilogue. We do this here regardless of whether
 325		 * the actual BPF instruction uses src/dst registers or not
 326		 * (for instance, BPF_CALL does not use them). The expectation
 327		 * is that those instructions will have src_reg/dst_reg set to
 328		 * 0. Even otherwise, we just lose some prologue/epilogue
 329		 * optimization but everything else should work without
 330		 * any issues.
 331		 */
 332		if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
 333			bpf_set_seen_register(ctx, insn[i].dst_reg);
 334		if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
 335			bpf_set_seen_register(ctx, insn[i].src_reg);
 336
 337		switch (code) {
 338		/*
 339		 * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG
 340		 */
 341		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
 342		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
 343			PPC_ADD(dst_reg, dst_reg, src_reg);
 344			goto bpf_alu32_trunc;
 345		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
 346		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
 347			PPC_SUB(dst_reg, dst_reg, src_reg);
 348			goto bpf_alu32_trunc;
 349		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
 350		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
 351		case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */
 352		case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */
 353			if (BPF_OP(code) == BPF_SUB)
 354				imm = -imm;
 355			if (imm) {
 356				if (imm >= -32768 && imm < 32768)
 357					PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
 358				else {
 359					PPC_LI32(b2p[TMP_REG_1], imm);
 360					PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
 361				}
 362			}
 363			goto bpf_alu32_trunc;
 364		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
 365		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
 366			if (BPF_CLASS(code) == BPF_ALU)
 367				PPC_MULW(dst_reg, dst_reg, src_reg);
 368			else
 369				PPC_MULD(dst_reg, dst_reg, src_reg);
 370			goto bpf_alu32_trunc;
 371		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
 372		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
 373			if (imm >= -32768 && imm < 32768)
 374				PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
 375			else {
 376				PPC_LI32(b2p[TMP_REG_1], imm);
 377				if (BPF_CLASS(code) == BPF_ALU)
 378					PPC_MULW(dst_reg, dst_reg,
 379							b2p[TMP_REG_1]);
 380				else
 381					PPC_MULD(dst_reg, dst_reg,
 382							b2p[TMP_REG_1]);
 383			}
 384			goto bpf_alu32_trunc;
 385		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
 386		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
 387			if (BPF_OP(code) == BPF_MOD) {
 388				PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
 389				PPC_MULW(b2p[TMP_REG_1], src_reg,
 390						b2p[TMP_REG_1]);
 391				PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
 392			} else
 393				PPC_DIVWU(dst_reg, dst_reg, src_reg);
 394			goto bpf_alu32_trunc;
 395		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
 396		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
 397			if (BPF_OP(code) == BPF_MOD) {
 398				PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg);
 399				PPC_MULD(b2p[TMP_REG_1], src_reg,
 400						b2p[TMP_REG_1]);
 401				PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
 402			} else
 403				PPC_DIVDU(dst_reg, dst_reg, src_reg);
 404			break;
 405		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
 406		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
 407		case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
 408		case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */
 409			if (imm == 0)
 410				return -EINVAL;
 411			else if (imm == 1)
 412				goto bpf_alu32_trunc;
 413
 414			PPC_LI32(b2p[TMP_REG_1], imm);
 415			switch (BPF_CLASS(code)) {
 416			case BPF_ALU:
 417				if (BPF_OP(code) == BPF_MOD) {
 418					PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
 419							b2p[TMP_REG_1]);
 420					PPC_MULW(b2p[TMP_REG_1],
 421							b2p[TMP_REG_1],
 422							b2p[TMP_REG_2]);
 423					PPC_SUB(dst_reg, dst_reg,
 424							b2p[TMP_REG_1]);
 425				} else
 426					PPC_DIVWU(dst_reg, dst_reg,
 427							b2p[TMP_REG_1]);
 428				break;
 429			case BPF_ALU64:
 430				if (BPF_OP(code) == BPF_MOD) {
 431					PPC_DIVDU(b2p[TMP_REG_2], dst_reg,
 432							b2p[TMP_REG_1]);
 433					PPC_MULD(b2p[TMP_REG_1],
 434							b2p[TMP_REG_1],
 435							b2p[TMP_REG_2]);
 436					PPC_SUB(dst_reg, dst_reg,
 437							b2p[TMP_REG_1]);
 438				} else
 439					PPC_DIVDU(dst_reg, dst_reg,
 440							b2p[TMP_REG_1]);
 441				break;
 442			}
 443			goto bpf_alu32_trunc;
 444		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
 445		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
 446			PPC_NEG(dst_reg, dst_reg);
 447			goto bpf_alu32_trunc;
 448
 449		/*
 450		 * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH
 451		 */
 452		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
 453		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
 454			PPC_AND(dst_reg, dst_reg, src_reg);
 455			goto bpf_alu32_trunc;
 456		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
 457		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
 458			if (!IMM_H(imm))
 459				PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
 460			else {
 461				/* Sign-extended */
 462				PPC_LI32(b2p[TMP_REG_1], imm);
 463				PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
 464			}
 465			goto bpf_alu32_trunc;
 466		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
 467		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
 468			PPC_OR(dst_reg, dst_reg, src_reg);
 469			goto bpf_alu32_trunc;
 470		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
 471		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
 472			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 473				/* Sign-extended */
 474				PPC_LI32(b2p[TMP_REG_1], imm);
 475				PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
 476			} else {
 477				if (IMM_L(imm))
 478					PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
 479				if (IMM_H(imm))
 480					PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
 481			}
 482			goto bpf_alu32_trunc;
 483		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
 484		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
 485			PPC_XOR(dst_reg, dst_reg, src_reg);
 486			goto bpf_alu32_trunc;
 487		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
 488		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
 489			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 490				/* Sign-extended */
 491				PPC_LI32(b2p[TMP_REG_1], imm);
 492				PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
 493			} else {
 494				if (IMM_L(imm))
 495					PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
 496				if (IMM_H(imm))
 497					PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
 498			}
 499			goto bpf_alu32_trunc;
 500		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
 501			/* slw clears top 32 bits */
 502			PPC_SLW(dst_reg, dst_reg, src_reg);
 503			/* skip zero extension move, but set address map. */
 504			if (insn_is_zext(&insn[i + 1]))
 505				addrs[++i] = ctx->idx * 4;
 506			break;
 507		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
 508			PPC_SLD(dst_reg, dst_reg, src_reg);
 509			break;
 510		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
 511			/* with imm 0, we still need to clear top 32 bits */
 512			PPC_SLWI(dst_reg, dst_reg, imm);
 513			if (insn_is_zext(&insn[i + 1]))
 514				addrs[++i] = ctx->idx * 4;
 515			break;
 516		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
 517			if (imm != 0)
 518				PPC_SLDI(dst_reg, dst_reg, imm);
 519			break;
 520		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
 521			PPC_SRW(dst_reg, dst_reg, src_reg);
 522			if (insn_is_zext(&insn[i + 1]))
 523				addrs[++i] = ctx->idx * 4;
 524			break;
 525		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
 526			PPC_SRD(dst_reg, dst_reg, src_reg);
 527			break;
 528		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
 529			PPC_SRWI(dst_reg, dst_reg, imm);
 530			if (insn_is_zext(&insn[i + 1]))
 531				addrs[++i] = ctx->idx * 4;
 532			break;
 533		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
 534			if (imm != 0)
 535				PPC_SRDI(dst_reg, dst_reg, imm);
 536			break;
 537		case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
 538			PPC_SRAW(dst_reg, dst_reg, src_reg);
 539			goto bpf_alu32_trunc;
 540		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
 541			PPC_SRAD(dst_reg, dst_reg, src_reg);
 542			break;
 543		case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
 544			PPC_SRAWI(dst_reg, dst_reg, imm);
 545			goto bpf_alu32_trunc;
 546		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
 547			if (imm != 0)
 548				PPC_SRADI(dst_reg, dst_reg, imm);
 549			break;
 550
 551		/*
 552		 * MOV
 553		 */
 554		case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */
 555		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
 556			if (imm == 1) {
 557				/* special mov32 for zext */
 558				PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
 559				break;
 560			}
 561			PPC_MR(dst_reg, src_reg);
 562			goto bpf_alu32_trunc;
 563		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
 564		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
 565			PPC_LI32(dst_reg, imm);
 566			if (imm < 0)
 567				goto bpf_alu32_trunc;
 568			else if (insn_is_zext(&insn[i + 1]))
 569				addrs[++i] = ctx->idx * 4;
 570			break;
 571
 572bpf_alu32_trunc:
 573		/* Truncate to 32-bits */
 574		if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
 575			PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
 576		break;
 577
 578		/*
 579		 * BPF_FROM_BE/LE
 580		 */
 581		case BPF_ALU | BPF_END | BPF_FROM_LE:
 582		case BPF_ALU | BPF_END | BPF_FROM_BE:
 583#ifdef __BIG_ENDIAN__
 584			if (BPF_SRC(code) == BPF_FROM_BE)
 585				goto emit_clear;
 586#else /* !__BIG_ENDIAN__ */
 587			if (BPF_SRC(code) == BPF_FROM_LE)
 588				goto emit_clear;
 589#endif
 590			switch (imm) {
 591			case 16:
 592				/* Rotate 8 bits left & mask with 0x0000ff00 */
 593				PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
 594				/* Rotate 8 bits right & insert LSB to reg */
 595				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
 596				/* Move result back to dst_reg */
 597				PPC_MR(dst_reg, b2p[TMP_REG_1]);
 598				break;
 599			case 32:
 600				/*
 601				 * Rotate word left by 8 bits:
 602				 * 2 bytes are already in their final position
 603				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
 604				 */
 605				PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
 606				/* Rotate 24 bits and insert byte 1 */
 607				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
 608				/* Rotate 24 bits and insert byte 3 */
 609				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
 610				PPC_MR(dst_reg, b2p[TMP_REG_1]);
 611				break;
 612			case 64:
 613				/*
 614				 * Way easier and faster(?) to store the value
 615				 * into stack and then use ldbrx
 616				 *
 617				 * ctx->seen will be reliable in pass2, but
 618				 * the instructions generated will remain the
 619				 * same across all passes
 620				 */
 621				PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
 622				PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
 623				PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
 624				break;
 625			}
 626			break;
 627
 628emit_clear:
 629			switch (imm) {
 630			case 16:
 631				/* zero-extend 16 bits into 64 bits */
 632				PPC_RLDICL(dst_reg, dst_reg, 0, 48);
 633				if (insn_is_zext(&insn[i + 1]))
 634					addrs[++i] = ctx->idx * 4;
 635				break;
 636			case 32:
 637				if (!fp->aux->verifier_zext)
 638					/* zero-extend 32 bits into 64 bits */
 639					PPC_RLDICL(dst_reg, dst_reg, 0, 32);
 640				break;
 641			case 64:
 642				/* nop */
 643				break;
 644			}
 645			break;
 646
 647		/*
 648		 * BPF_ST(X)
 649		 */
 650		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
 651		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
 652			if (BPF_CLASS(code) == BPF_ST) {
 653				PPC_LI(b2p[TMP_REG_1], imm);
 654				src_reg = b2p[TMP_REG_1];
 655			}
 656			PPC_STB(src_reg, dst_reg, off);
 657			break;
 658		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
 659		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
 660			if (BPF_CLASS(code) == BPF_ST) {
 661				PPC_LI(b2p[TMP_REG_1], imm);
 662				src_reg = b2p[TMP_REG_1];
 663			}
 664			PPC_STH(src_reg, dst_reg, off);
 665			break;
 666		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
 667		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
 668			if (BPF_CLASS(code) == BPF_ST) {
 669				PPC_LI32(b2p[TMP_REG_1], imm);
 670				src_reg = b2p[TMP_REG_1];
 671			}
 672			PPC_STW(src_reg, dst_reg, off);
 673			break;
 674		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
 675		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
 676			if (BPF_CLASS(code) == BPF_ST) {
 677				PPC_LI32(b2p[TMP_REG_1], imm);
 678				src_reg = b2p[TMP_REG_1];
 679			}
 680			PPC_BPF_STL(src_reg, dst_reg, off);
 681			break;
 682
 683		/*
 684		 * BPF_STX XADD (atomic_add)
 685		 */
 686		/* *(u32 *)(dst + off) += src */
 687		case BPF_STX | BPF_XADD | BPF_W:
 688			/* Get EA into TMP_REG_1 */
 689			PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
 690			tmp_idx = ctx->idx * 4;
 
 
 
 
 691			/* load value from memory into TMP_REG_2 */
 692			PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
 693			/* add value from src_reg into this */
 694			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
 695			/* store result back */
 696			PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
 697			/* we're done if this succeeded */
 698			PPC_BCC_SHORT(COND_NE, tmp_idx);
 
 
 
 
 
 
 
 699			break;
 700		/* *(u64 *)(dst + off) += src */
 701		case BPF_STX | BPF_XADD | BPF_DW:
 702			PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
 703			tmp_idx = ctx->idx * 4;
 
 
 
 
 
 
 
 
 704			PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
 705			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
 706			PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
 707			PPC_BCC_SHORT(COND_NE, tmp_idx);
 
 708			break;
 709
 710		/*
 711		 * BPF_LDX
 712		 */
 713		/* dst = *(u8 *)(ul) (src + off) */
 714		case BPF_LDX | BPF_MEM | BPF_B:
 715			PPC_LBZ(dst_reg, src_reg, off);
 716			if (insn_is_zext(&insn[i + 1]))
 717				addrs[++i] = ctx->idx * 4;
 718			break;
 719		/* dst = *(u16 *)(ul) (src + off) */
 720		case BPF_LDX | BPF_MEM | BPF_H:
 721			PPC_LHZ(dst_reg, src_reg, off);
 722			if (insn_is_zext(&insn[i + 1]))
 723				addrs[++i] = ctx->idx * 4;
 724			break;
 725		/* dst = *(u32 *)(ul) (src + off) */
 726		case BPF_LDX | BPF_MEM | BPF_W:
 727			PPC_LWZ(dst_reg, src_reg, off);
 728			if (insn_is_zext(&insn[i + 1]))
 729				addrs[++i] = ctx->idx * 4;
 730			break;
 731		/* dst = *(u64 *)(ul) (src + off) */
 732		case BPF_LDX | BPF_MEM | BPF_DW:
 733			PPC_BPF_LL(dst_reg, src_reg, off);
 734			break;
 735
 736		/*
 737		 * Doubleword load
 738		 * 16 byte instruction that uses two 'struct bpf_insn'
 739		 */
 740		case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
 741			imm64 = ((u64)(u32) insn[i].imm) |
 742				    (((u64)(u32) insn[i+1].imm) << 32);
 743			/* Adjust for two bpf instructions */
 744			addrs[++i] = ctx->idx * 4;
 745			PPC_LI64(dst_reg, imm64);
 746			break;
 747
 748		/*
 749		 * Return/Exit
 750		 */
 751		case BPF_JMP | BPF_EXIT:
 752			/*
 753			 * If this isn't the very last instruction, branch to
 754			 * the epilogue. If we _are_ the last instruction,
 755			 * we'll just fall through to the epilogue.
 756			 */
 757			if (i != flen - 1)
 758				PPC_JMP(exit_addr);
 759			/* else fall through to the epilogue */
 760			break;
 761
 762		/*
 763		 * Call kernel helper or bpf function
 764		 */
 765		case BPF_JMP | BPF_CALL:
 766			ctx->seen |= SEEN_FUNC;
 
 767
 768			ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass,
 769						    &func_addr, &func_addr_fixed);
 770			if (ret < 0)
 771				return ret;
 
 
 772
 773			if (func_addr_fixed)
 774				bpf_jit_emit_func_call_hlp(image, ctx, func_addr);
 775			else
 776				bpf_jit_emit_func_call_rel(image, ctx, func_addr);
 777			/* move return value from r3 to BPF_REG_0 */
 778			PPC_MR(b2p[BPF_REG_0], 3);
 
 
 
 
 
 
 
 
 779			break;
 780
 781		/*
 782		 * Jumps and branches
 783		 */
 784		case BPF_JMP | BPF_JA:
 785			PPC_JMP(addrs[i + 1 + off]);
 786			break;
 787
 788		case BPF_JMP | BPF_JGT | BPF_K:
 789		case BPF_JMP | BPF_JGT | BPF_X:
 790		case BPF_JMP | BPF_JSGT | BPF_K:
 791		case BPF_JMP | BPF_JSGT | BPF_X:
 792		case BPF_JMP32 | BPF_JGT | BPF_K:
 793		case BPF_JMP32 | BPF_JGT | BPF_X:
 794		case BPF_JMP32 | BPF_JSGT | BPF_K:
 795		case BPF_JMP32 | BPF_JSGT | BPF_X:
 796			true_cond = COND_GT;
 797			goto cond_branch;
 798		case BPF_JMP | BPF_JLT | BPF_K:
 799		case BPF_JMP | BPF_JLT | BPF_X:
 800		case BPF_JMP | BPF_JSLT | BPF_K:
 801		case BPF_JMP | BPF_JSLT | BPF_X:
 802		case BPF_JMP32 | BPF_JLT | BPF_K:
 803		case BPF_JMP32 | BPF_JLT | BPF_X:
 804		case BPF_JMP32 | BPF_JSLT | BPF_K:
 805		case BPF_JMP32 | BPF_JSLT | BPF_X:
 806			true_cond = COND_LT;
 807			goto cond_branch;
 808		case BPF_JMP | BPF_JGE | BPF_K:
 809		case BPF_JMP | BPF_JGE | BPF_X:
 810		case BPF_JMP | BPF_JSGE | BPF_K:
 811		case BPF_JMP | BPF_JSGE | BPF_X:
 812		case BPF_JMP32 | BPF_JGE | BPF_K:
 813		case BPF_JMP32 | BPF_JGE | BPF_X:
 814		case BPF_JMP32 | BPF_JSGE | BPF_K:
 815		case BPF_JMP32 | BPF_JSGE | BPF_X:
 816			true_cond = COND_GE;
 817			goto cond_branch;
 818		case BPF_JMP | BPF_JLE | BPF_K:
 819		case BPF_JMP | BPF_JLE | BPF_X:
 820		case BPF_JMP | BPF_JSLE | BPF_K:
 821		case BPF_JMP | BPF_JSLE | BPF_X:
 822		case BPF_JMP32 | BPF_JLE | BPF_K:
 823		case BPF_JMP32 | BPF_JLE | BPF_X:
 824		case BPF_JMP32 | BPF_JSLE | BPF_K:
 825		case BPF_JMP32 | BPF_JSLE | BPF_X:
 826			true_cond = COND_LE;
 827			goto cond_branch;
 828		case BPF_JMP | BPF_JEQ | BPF_K:
 829		case BPF_JMP | BPF_JEQ | BPF_X:
 830		case BPF_JMP32 | BPF_JEQ | BPF_K:
 831		case BPF_JMP32 | BPF_JEQ | BPF_X:
 832			true_cond = COND_EQ;
 833			goto cond_branch;
 834		case BPF_JMP | BPF_JNE | BPF_K:
 835		case BPF_JMP | BPF_JNE | BPF_X:
 836		case BPF_JMP32 | BPF_JNE | BPF_K:
 837		case BPF_JMP32 | BPF_JNE | BPF_X:
 838			true_cond = COND_NE;
 839			goto cond_branch;
 840		case BPF_JMP | BPF_JSET | BPF_K:
 841		case BPF_JMP | BPF_JSET | BPF_X:
 842		case BPF_JMP32 | BPF_JSET | BPF_K:
 843		case BPF_JMP32 | BPF_JSET | BPF_X:
 844			true_cond = COND_NE;
 845			/* Fall through */
 846
 847cond_branch:
 848			switch (code) {
 849			case BPF_JMP | BPF_JGT | BPF_X:
 850			case BPF_JMP | BPF_JLT | BPF_X:
 851			case BPF_JMP | BPF_JGE | BPF_X:
 852			case BPF_JMP | BPF_JLE | BPF_X:
 853			case BPF_JMP | BPF_JEQ | BPF_X:
 854			case BPF_JMP | BPF_JNE | BPF_X:
 855			case BPF_JMP32 | BPF_JGT | BPF_X:
 856			case BPF_JMP32 | BPF_JLT | BPF_X:
 857			case BPF_JMP32 | BPF_JGE | BPF_X:
 858			case BPF_JMP32 | BPF_JLE | BPF_X:
 859			case BPF_JMP32 | BPF_JEQ | BPF_X:
 860			case BPF_JMP32 | BPF_JNE | BPF_X:
 861				/* unsigned comparison */
 862				if (BPF_CLASS(code) == BPF_JMP32)
 863					PPC_CMPLW(dst_reg, src_reg);
 864				else
 865					PPC_CMPLD(dst_reg, src_reg);
 866				break;
 867			case BPF_JMP | BPF_JSGT | BPF_X:
 868			case BPF_JMP | BPF_JSLT | BPF_X:
 869			case BPF_JMP | BPF_JSGE | BPF_X:
 870			case BPF_JMP | BPF_JSLE | BPF_X:
 871			case BPF_JMP32 | BPF_JSGT | BPF_X:
 872			case BPF_JMP32 | BPF_JSLT | BPF_X:
 873			case BPF_JMP32 | BPF_JSGE | BPF_X:
 874			case BPF_JMP32 | BPF_JSLE | BPF_X:
 875				/* signed comparison */
 876				if (BPF_CLASS(code) == BPF_JMP32)
 877					PPC_CMPW(dst_reg, src_reg);
 878				else
 879					PPC_CMPD(dst_reg, src_reg);
 880				break;
 881			case BPF_JMP | BPF_JSET | BPF_X:
 882			case BPF_JMP32 | BPF_JSET | BPF_X:
 883				if (BPF_CLASS(code) == BPF_JMP) {
 884					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
 885						    src_reg);
 886				} else {
 887					int tmp_reg = b2p[TMP_REG_1];
 888
 889					PPC_AND(tmp_reg, dst_reg, src_reg);
 890					PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
 891						       31);
 892				}
 893				break;
 894			case BPF_JMP | BPF_JNE | BPF_K:
 895			case BPF_JMP | BPF_JEQ | BPF_K:
 896			case BPF_JMP | BPF_JGT | BPF_K:
 897			case BPF_JMP | BPF_JLT | BPF_K:
 898			case BPF_JMP | BPF_JGE | BPF_K:
 899			case BPF_JMP | BPF_JLE | BPF_K:
 900			case BPF_JMP32 | BPF_JNE | BPF_K:
 901			case BPF_JMP32 | BPF_JEQ | BPF_K:
 902			case BPF_JMP32 | BPF_JGT | BPF_K:
 903			case BPF_JMP32 | BPF_JLT | BPF_K:
 904			case BPF_JMP32 | BPF_JGE | BPF_K:
 905			case BPF_JMP32 | BPF_JLE | BPF_K:
 906			{
 907				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
 908
 909				/*
 910				 * Need sign-extended load, so only positive
 911				 * values can be used as imm in cmpldi
 912				 */
 913				if (imm >= 0 && imm < 32768) {
 914					if (is_jmp32)
 915						PPC_CMPLWI(dst_reg, imm);
 916					else
 917						PPC_CMPLDI(dst_reg, imm);
 918				} else {
 919					/* sign-extending load */
 920					PPC_LI32(b2p[TMP_REG_1], imm);
 921					/* ... but unsigned comparison */
 922					if (is_jmp32)
 923						PPC_CMPLW(dst_reg,
 924							  b2p[TMP_REG_1]);
 925					else
 926						PPC_CMPLD(dst_reg,
 927							  b2p[TMP_REG_1]);
 928				}
 929				break;
 930			}
 931			case BPF_JMP | BPF_JSGT | BPF_K:
 932			case BPF_JMP | BPF_JSLT | BPF_K:
 933			case BPF_JMP | BPF_JSGE | BPF_K:
 934			case BPF_JMP | BPF_JSLE | BPF_K:
 935			case BPF_JMP32 | BPF_JSGT | BPF_K:
 936			case BPF_JMP32 | BPF_JSLT | BPF_K:
 937			case BPF_JMP32 | BPF_JSGE | BPF_K:
 938			case BPF_JMP32 | BPF_JSLE | BPF_K:
 939			{
 940				bool is_jmp32 = BPF_CLASS(code) == BPF_JMP32;
 941
 942				/*
 943				 * signed comparison, so any 16-bit value
 944				 * can be used in cmpdi
 945				 */
 946				if (imm >= -32768 && imm < 32768) {
 947					if (is_jmp32)
 948						PPC_CMPWI(dst_reg, imm);
 949					else
 950						PPC_CMPDI(dst_reg, imm);
 951				} else {
 952					PPC_LI32(b2p[TMP_REG_1], imm);
 953					if (is_jmp32)
 954						PPC_CMPW(dst_reg,
 955							 b2p[TMP_REG_1]);
 956					else
 957						PPC_CMPD(dst_reg,
 958							 b2p[TMP_REG_1]);
 959				}
 960				break;
 961			}
 962			case BPF_JMP | BPF_JSET | BPF_K:
 963			case BPF_JMP32 | BPF_JSET | BPF_K:
 964				/* andi does not sign-extend the immediate */
 965				if (imm >= 0 && imm < 32768)
 966					/* PPC_ANDI is _only/always_ dot-form */
 967					PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
 968				else {
 969					int tmp_reg = b2p[TMP_REG_1];
 970
 971					PPC_LI32(tmp_reg, imm);
 972					if (BPF_CLASS(code) == BPF_JMP) {
 973						PPC_AND_DOT(tmp_reg, dst_reg,
 974							    tmp_reg);
 975					} else {
 976						PPC_AND(tmp_reg, dst_reg,
 977							tmp_reg);
 978						PPC_RLWINM_DOT(tmp_reg, tmp_reg,
 979							       0, 0, 31);
 980					}
 981				}
 982				break;
 983			}
 984			PPC_BCC(true_cond, addrs[i + 1 + off]);
 985			break;
 986
 987		/*
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 988		 * Tail call
 989		 */
 990		case BPF_JMP | BPF_TAIL_CALL:
 991			ctx->seen |= SEEN_TAILCALL;
 992			bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]);
 993			break;
 994
 995		default:
 996			/*
 997			 * The filter contains something cruel & unusual.
 998			 * We don't handle it, but also there shouldn't be
 999			 * anything missing from our list.
1000			 */
1001			pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n",
1002					code, i);
1003			return -ENOTSUPP;
1004		}
1005	}
1006
1007	/* Set end-of-body-code address for exit. */
1008	addrs[i] = ctx->idx * 4;
1009
1010	return 0;
1011}
1012
1013/* Fix the branch target addresses for subprog calls */
1014static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
1015				       struct codegen_context *ctx, u32 *addrs)
1016{
1017	const struct bpf_insn *insn = fp->insnsi;
1018	bool func_addr_fixed;
1019	u64 func_addr;
1020	u32 tmp_idx;
1021	int i, ret;
1022
1023	for (i = 0; i < fp->len; i++) {
1024		/*
1025		 * During the extra pass, only the branch target addresses for
1026		 * the subprog calls need to be fixed. All other instructions
1027		 * can left untouched.
1028		 *
1029		 * The JITed image length does not change because we already
1030		 * ensure that the JITed instruction sequence for these calls
1031		 * are of fixed length by padding them with NOPs.
1032		 */
1033		if (insn[i].code == (BPF_JMP | BPF_CALL) &&
1034		    insn[i].src_reg == BPF_PSEUDO_CALL) {
1035			ret = bpf_jit_get_func_addr(fp, &insn[i], true,
1036						    &func_addr,
1037						    &func_addr_fixed);
1038			if (ret < 0)
1039				return ret;
1040
1041			/*
1042			 * Save ctx->idx as this would currently point to the
1043			 * end of the JITed image and set it to the offset of
1044			 * the instruction sequence corresponding to the
1045			 * subprog call temporarily.
1046			 */
1047			tmp_idx = ctx->idx;
1048			ctx->idx = addrs[i] / 4;
1049			bpf_jit_emit_func_call_rel(image, ctx, func_addr);
1050
1051			/*
1052			 * Restore ctx->idx here. This is safe as the length
1053			 * of the JITed sequence remains unchanged.
1054			 */
1055			ctx->idx = tmp_idx;
1056		}
1057	}
1058
1059	return 0;
1060}
1061
1062struct powerpc64_jit_data {
1063	struct bpf_binary_header *header;
1064	u32 *addrs;
1065	u8 *image;
1066	u32 proglen;
1067	struct codegen_context ctx;
1068};
1069
1070bool bpf_jit_needs_zext(void)
1071{
1072	return true;
1073}
1074
1075struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1076{
1077	u32 proglen;
1078	u32 alloclen;
1079	u8 *image = NULL;
1080	u32 *code_base;
1081	u32 *addrs;
1082	struct powerpc64_jit_data *jit_data;
1083	struct codegen_context cgctx;
1084	int pass;
1085	int flen;
1086	struct bpf_binary_header *bpf_hdr;
1087	struct bpf_prog *org_fp = fp;
1088	struct bpf_prog *tmp_fp;
1089	bool bpf_blinded = false;
1090	bool extra_pass = false;
1091
1092	if (!fp->jit_requested)
1093		return org_fp;
1094
1095	tmp_fp = bpf_jit_blind_constants(org_fp);
1096	if (IS_ERR(tmp_fp))
1097		return org_fp;
1098
1099	if (tmp_fp != org_fp) {
1100		bpf_blinded = true;
1101		fp = tmp_fp;
1102	}
1103
1104	jit_data = fp->aux->jit_data;
1105	if (!jit_data) {
1106		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1107		if (!jit_data) {
1108			fp = org_fp;
1109			goto out;
1110		}
1111		fp->aux->jit_data = jit_data;
1112	}
1113
1114	flen = fp->len;
1115	addrs = jit_data->addrs;
1116	if (addrs) {
1117		cgctx = jit_data->ctx;
1118		image = jit_data->image;
1119		bpf_hdr = jit_data->header;
1120		proglen = jit_data->proglen;
1121		alloclen = proglen + FUNCTION_DESCR_SIZE;
1122		extra_pass = true;
1123		goto skip_init_ctx;
1124	}
1125
1126	addrs = kcalloc(flen + 1, sizeof(*addrs), GFP_KERNEL);
1127	if (addrs == NULL) {
1128		fp = org_fp;
1129		goto out_addrs;
1130	}
1131
1132	memset(&cgctx, 0, sizeof(struct codegen_context));
1133
1134	/* Make sure that the stack is quadword aligned. */
1135	cgctx.stack_size = round_up(fp->aux->stack_depth, 16);
1136
1137	/* Scouting faux-generate pass 0 */
1138	if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
1139		/* We hit something illegal or unsupported. */
1140		fp = org_fp;
1141		goto out_addrs;
1142	}
1143
1144	/*
1145	 * If we have seen a tail call, we need a second pass.
1146	 * This is because bpf_jit_emit_common_epilogue() is called
1147	 * from bpf_jit_emit_tail_call() with a not yet stable ctx->seen.
1148	 */
1149	if (cgctx.seen & SEEN_TAILCALL) {
1150		cgctx.idx = 0;
1151		if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) {
1152			fp = org_fp;
1153			goto out_addrs;
1154		}
1155	}
1156
1157	/*
1158	 * Pretend to build prologue, given the features we've seen.  This will
1159	 * update ctgtx.idx as it pretends to output instructions, then we can
1160	 * calculate total size from idx.
1161	 */
1162	bpf_jit_build_prologue(0, &cgctx);
1163	bpf_jit_build_epilogue(0, &cgctx);
1164
1165	proglen = cgctx.idx * 4;
1166	alloclen = proglen + FUNCTION_DESCR_SIZE;
1167
1168	bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4,
1169			bpf_jit_fill_ill_insns);
1170	if (!bpf_hdr) {
1171		fp = org_fp;
1172		goto out_addrs;
1173	}
1174
1175skip_init_ctx:
1176	code_base = (u32 *)(image + FUNCTION_DESCR_SIZE);
1177
1178	if (extra_pass) {
1179		/*
1180		 * Do not touch the prologue and epilogue as they will remain
1181		 * unchanged. Only fix the branch target address for subprog
1182		 * calls in the body.
1183		 *
1184		 * This does not change the offsets and lengths of the subprog
1185		 * call instruction sequences and hence, the size of the JITed
1186		 * image as well.
1187		 */
1188		bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs);
1189
1190		/* There is no need to perform the usual passes. */
1191		goto skip_codegen_passes;
1192	}
1193
1194	/* Code generation passes 1-2 */
1195	for (pass = 1; pass < 3; pass++) {
1196		/* Now build the prologue, body code & epilogue for real. */
1197		cgctx.idx = 0;
1198		bpf_jit_build_prologue(code_base, &cgctx);
1199		bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass);
1200		bpf_jit_build_epilogue(code_base, &cgctx);
1201
1202		if (bpf_jit_enable > 1)
1203			pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass,
1204				proglen - (cgctx.idx * 4), cgctx.seen);
1205	}
1206
1207skip_codegen_passes:
1208	if (bpf_jit_enable > 1)
1209		/*
1210		 * Note that we output the base address of the code_base
1211		 * rather than image, since opcodes are in code_base.
1212		 */
1213		bpf_jit_dump(flen, proglen, pass, code_base);
1214
1215#ifdef PPC64_ELF_ABI_v1
1216	/* Function descriptor nastiness: Address + TOC */
1217	((u64 *)image)[0] = (u64)code_base;
1218	((u64 *)image)[1] = local_paca->kernel_toc;
1219#endif
1220
1221	fp->bpf_func = (void *)image;
1222	fp->jited = 1;
1223	fp->jited_len = alloclen;
1224
1225	bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE));
1226	if (!fp->is_func || extra_pass) {
1227		bpf_prog_fill_jited_linfo(fp, addrs);
1228out_addrs:
1229		kfree(addrs);
1230		kfree(jit_data);
1231		fp->aux->jit_data = NULL;
1232	} else {
1233		jit_data->addrs = addrs;
1234		jit_data->ctx = cgctx;
1235		jit_data->proglen = proglen;
1236		jit_data->image = image;
1237		jit_data->header = bpf_hdr;
1238	}
1239
1240out:
 
 
1241	if (bpf_blinded)
1242		bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp);
1243
1244	return fp;
1245}
1246
1247/* Overriding bpf_jit_free() as we don't set images read-only. */
1248void bpf_jit_free(struct bpf_prog *fp)
1249{
1250	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
1251	struct bpf_binary_header *bpf_hdr = (void *)addr;
1252
1253	if (fp->jited)
1254		bpf_jit_binary_free(bpf_hdr);
1255
1256	bpf_prog_unlock_free(fp);
1257}