Linux Audio

Check our new training course

Loading...
Note: File does not exist in v5.4.
   1// SPDX-License-Identifier: GPL-2.0
   2/*
   3 * BPF JIT compiler for PA-RISC (32-bit)
   4 *
   5 * Copyright (c) 2023 Helge Deller <deller@gmx.de>
   6 *
   7 * The code is based on the BPF JIT compiler for RV64 by Björn Töpel and
   8 * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan.
   9 */
  10
  11#include <linux/bpf.h>
  12#include <linux/filter.h>
  13#include <linux/libgcc.h>
  14#include "bpf_jit.h"
  15
  16/*
  17 * Stack layout during BPF program execution (note: stack grows up):
  18 *
  19 *                     high
  20 *   HPPA32 sp =>  +----------+ <= HPPA32 fp
  21 *                 | saved sp |
  22 *                 | saved rp |
  23 *                 |   ...    | HPPA32 callee-saved registers
  24 *                 | curr args|
  25 *                 | local var|
  26 *                 +----------+ <= (sp - 4 * NR_SAVED_REGISTERS)
  27 *                 |  lo(R9)  |
  28 *                 |  hi(R9)  |
  29 *                 |  lo(FP)  | JIT scratch space for BPF registers
  30 *                 |  hi(FP)  |
  31 *                 |   ...    |
  32 *                 +----------+ <= (sp - 4 * NR_SAVED_REGISTERS
  33 *                 |          |        - 4 * BPF_JIT_SCRATCH_REGS)
  34 *                 |          |
  35 *                 |   ...    | BPF program stack
  36 *                 |          |
  37 *                 |   ...    | Function call stack
  38 *                 |          |
  39 *                 +----------+
  40 *                     low
  41 */
  42
  43enum {
  44	/* Stack layout - these are offsets from top of JIT scratch space. */
  45	BPF_R8_HI,
  46	BPF_R8_LO,
  47	BPF_R9_HI,
  48	BPF_R9_LO,
  49	BPF_FP_HI,
  50	BPF_FP_LO,
  51	BPF_AX_HI,
  52	BPF_AX_LO,
  53	BPF_R0_TEMP_HI,
  54	BPF_R0_TEMP_LO,
  55	BPF_JIT_SCRATCH_REGS,
  56};
  57
  58/* Number of callee-saved registers stored to stack: rp, r3-r18. */
  59#define NR_SAVED_REGISTERS	(18 - 3 + 1 + 8)
  60
  61/* Offset from fp for BPF registers stored on stack. */
  62#define STACK_OFFSET(k)	(- (NR_SAVED_REGISTERS + k + 1))
  63#define STACK_ALIGN	FRAME_SIZE
  64
  65#define EXIT_PTR_LOAD(reg)	hppa_ldw(-0x08, HPPA_REG_SP, reg)
  66#define EXIT_PTR_STORE(reg)	hppa_stw(reg, -0x08, HPPA_REG_SP)
  67#define EXIT_PTR_JUMP(reg, nop)	hppa_bv(HPPA_REG_ZERO, reg, nop)
  68
  69#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
  70#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
  71#define TMP_REG_R0	(MAX_BPF_JIT_REG + 2)
  72
  73static const s8 regmap[][2] = {
  74	/* Return value from in-kernel function, and exit value from eBPF. */
  75	[BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1},		/* HI/LOW */
  76
  77	/* Arguments from eBPF program to in-kernel function. */
  78	[BPF_REG_1] = {HPPA_R(3), HPPA_R(4)},
  79	[BPF_REG_2] = {HPPA_R(5), HPPA_R(6)},
  80	[BPF_REG_3] = {HPPA_R(7), HPPA_R(8)},
  81	[BPF_REG_4] = {HPPA_R(9), HPPA_R(10)},
  82	[BPF_REG_5] = {HPPA_R(11), HPPA_R(12)},
  83
  84	[BPF_REG_6] = {HPPA_R(13), HPPA_R(14)},
  85	[BPF_REG_7] = {HPPA_R(15), HPPA_R(16)},
  86	/*
  87	 * Callee-saved registers that in-kernel function will preserve.
  88	 * Stored on the stack.
  89	 */
  90	[BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
  91	[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
  92
  93	/* Read-only frame pointer to access BPF stack. Not needed. */
  94	[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
  95
  96	/* Temporary register for blinding constants. Stored on the stack. */
  97	[BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
  98	/*
  99	 * Temporary registers used by the JIT to operate on registers stored
 100	 * on the stack. Save t0 and t1 to be used as temporaries in generated
 101	 * code.
 102	 */
 103	[TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2},
 104	[TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4},
 105
 106	/* temporary space for BPF_R0 during libgcc and millicode calls */
 107	[TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)},
 108};
 109
 110static s8 hi(const s8 *r)
 111{
 112	return r[0];
 113}
 114
 115static s8 lo(const s8 *r)
 116{
 117	return r[1];
 118}
 119
 120static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
 121{
 122	REG_SET_SEEN(ctx, rd);
 123	if (OPTIMIZE_HPPA && (rs == rd))
 124		return;
 125	REG_SET_SEEN(ctx, rs);
 126	emit(hppa_copy(rs, rd), ctx);
 127}
 128
 129static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx)
 130{
 131	REG_SET_SEEN(ctx, r1);
 132	REG_SET_SEEN(ctx, r2);
 133	REG_SET_SEEN(ctx, r3);
 134	if (OPTIMIZE_HPPA && (r1 == r2)) {
 135		emit(hppa_copy(HPPA_REG_ZERO, r3), ctx);
 136	} else {
 137		emit(hppa_xor(r1, r2, r3), ctx);
 138	}
 139}
 140
 141static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx)
 142{
 143	u32 lower = im11(imm);
 144
 145	REG_SET_SEEN(ctx, rd);
 146	if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
 147		emit(hppa_ldi(imm, rd), ctx);
 148		return;
 149	}
 150	emit(hppa_ldil(imm, rd), ctx);
 151	if (OPTIMIZE_HPPA && (lower == 0))
 152		return;
 153	emit(hppa_ldo(lower, rd, rd), ctx);
 154}
 155
 156static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx)
 157{
 158	/* Emit immediate into lower bits. */
 159	REG_SET_SEEN(ctx, lo(rd));
 160	emit_imm(lo(rd), imm, ctx);
 161
 162	/* Sign-extend into upper bits. */
 163	REG_SET_SEEN(ctx, hi(rd));
 164	if (imm >= 0)
 165		emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
 166	else
 167		emit(hppa_ldi(-1, hi(rd)), ctx);
 168}
 169
 170static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo,
 171		       struct hppa_jit_context *ctx)
 172{
 173	emit_imm(hi(rd), imm_hi, ctx);
 174	emit_imm(lo(rd), imm_lo, ctx);
 175}
 176
 177static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
 178{
 179	const s8 *r0 = regmap[BPF_REG_0];
 180	int i;
 181
 182	if (is_tail_call) {
 183		/*
 184		 * goto *(t0 + 4);
 185		 * Skips first instruction of prologue which initializes tail
 186		 * call counter. Assumes t0 contains address of target program,
 187		 * see emit_bpf_tail_call.
 188		 */
 189		emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
 190		emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
 191		/* in delay slot: */
 192		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
 193
 194		return;
 195	}
 196
 197	/* load epilogue function pointer and jump to it. */
 198	/* exit point is either directly below, or the outest TCC exit function */
 199	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
 200	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
 201
 202	/* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */
 203	emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx);
 204
 205	/* Restore callee-saved registers. */
 206	for (i = 3; i <= 18; i++) {
 207		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
 208			continue;
 209		emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx);
 210	}
 211
 212	/* load original return pointer (stored by outest TCC function) */
 213	emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx);
 214	emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
 215	/* in delay slot: */
 216	emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx);
 217}
 218
 219static bool is_stacked(s8 reg)
 220{
 221	return reg < 0;
 222}
 223
 224static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp,
 225		u16 offset_sp, struct hppa_jit_context *ctx)
 226{
 227	if (is_stacked(hi(reg))) {
 228		emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx);
 229		emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx);
 230		reg = tmp;
 231	}
 232	REG_SET_SEEN(ctx, hi(reg));
 233	REG_SET_SEEN(ctx, lo(reg));
 234	return reg;
 235}
 236
 237static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp,
 238			       struct hppa_jit_context *ctx)
 239{
 240	return bpf_get_reg64_offset(reg, tmp, 0, ctx);
 241}
 242
 243static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp,
 244		bool must_load, struct hppa_jit_context *ctx)
 245{
 246	if (!OPTIMIZE_HPPA)
 247		return bpf_get_reg64(reg, tmp, ctx);
 248
 249	if (is_stacked(hi(reg))) {
 250		if (must_load)
 251			emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx);
 252		reg = tmp;
 253	}
 254	REG_SET_SEEN(ctx, hi(reg));
 255	REG_SET_SEEN(ctx, lo(reg));
 256	return reg;
 257}
 258
 259
 260static void bpf_put_reg64(const s8 *reg, const s8 *src,
 261			  struct hppa_jit_context *ctx)
 262{
 263	if (is_stacked(hi(reg))) {
 264		emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
 265		emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
 266	}
 267}
 268
 269static void bpf_save_R0(struct hppa_jit_context *ctx)
 270{
 271	bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
 272}
 273
 274static void bpf_restore_R0(struct hppa_jit_context *ctx)
 275{
 276	bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
 277}
 278
 279
 280static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp,
 281			       struct hppa_jit_context *ctx)
 282{
 283	if (is_stacked(lo(reg))) {
 284		emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx);
 285		reg = tmp;
 286	}
 287	REG_SET_SEEN(ctx, lo(reg));
 288	return reg;
 289}
 290
 291static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp,
 292		struct hppa_jit_context *ctx)
 293{
 294	if (!OPTIMIZE_HPPA)
 295		return bpf_get_reg32(reg, tmp, ctx);
 296
 297	if (is_stacked(hi(reg))) {
 298		reg = tmp;
 299	}
 300	REG_SET_SEEN(ctx, lo(reg));
 301	return reg;
 302}
 303
 304static void bpf_put_reg32(const s8 *reg, const s8 *src,
 305			  struct hppa_jit_context *ctx)
 306{
 307	if (is_stacked(lo(reg))) {
 308		REG_SET_SEEN(ctx, lo(src));
 309		emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
 310		if (1 && !ctx->prog->aux->verifier_zext) {
 311			REG_SET_SEEN(ctx, hi(reg));
 312			emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
 313		}
 314	} else if (1 && !ctx->prog->aux->verifier_zext) {
 315		REG_SET_SEEN(ctx, hi(reg));
 316		emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
 317	}
 318}
 319
 320/* extern hppa millicode functions */
 321extern void $$mulI(void);
 322extern void $$divU(void);
 323extern void $$remU(void);
 324
 325static void emit_call_millicode(void *func, const s8 arg0,
 326		const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
 327{
 328	u32 func_addr;
 329
 330	emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
 331	emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
 332
 333	/* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */
 334	if (arg0 != HPPA_REG_RET1)
 335		bpf_save_R0(ctx);
 336
 337	func_addr = (uintptr_t) dereference_function_descriptor(func);
 338	emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
 339	/* skip the following be_l instruction if divisor is zero. */
 340	if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
 341		if (BPF_OP(opcode) == BPF_DIV)
 342			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
 343		else
 344			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
 345		emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx);
 346	}
 347	/* Note: millicode functions use r31 as return pointer instead of rp */
 348	emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
 349	emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */
 350
 351	/* Note: millicode functions return result in RET1, not RET0 */
 352	emit_hppa_copy(HPPA_REG_RET1, arg0, ctx);
 353
 354	/* restore HPPA_REG_RET0/1, temp. save in dest. */
 355	if (arg0 != HPPA_REG_RET1)
 356		bpf_restore_R0(ctx);
 357}
 358
 359static void emit_call_libgcc_ll(void *func, const s8 *arg0,
 360		const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx)
 361{
 362	u32 func_addr;
 363
 364	emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx);
 365	emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx);
 366	emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx);
 367	emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx);
 368
 369	/* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */
 370	if (hi(arg0) != HPPA_REG_RET0)
 371		bpf_save_R0(ctx);
 372
 373	/* prepare stack */
 374	emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
 375
 376	func_addr = (uintptr_t) dereference_function_descriptor(func);
 377	emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
 378        /* zero out the following be_l instruction if divisor is 0 (and set default values) */
 379	if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
 380		emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
 381		if (BPF_OP(opcode) == BPF_DIV)
 382			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
 383		else
 384			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
 385		emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx);
 386	}
 387	emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
 388	emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
 389
 390	/* restore stack */
 391	emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
 392
 393	emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx);
 394	emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx);
 395
 396	/* restore HPPA_REG_RET0/_RET1 */
 397	if (hi(arg0) != HPPA_REG_RET0)
 398		bpf_restore_R0(ctx);
 399}
 400
 401static void emit_jump(s32 paoff, bool force_far,
 402			       struct hppa_jit_context *ctx)
 403{
 404	unsigned long pc, addr;
 405
 406	/* Note: allocate 2 instructions for jumps if force_far is set. */
 407	if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) {
 408		/* use BL,short branch followed by nop() */
 409		emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx);
 410		if (force_far)
 411			emit(hppa_nop(), ctx);
 412		return;
 413	}
 414
 415	pc = (uintptr_t) &ctx->insns[ctx->ninsns];
 416	addr = pc + (paoff * HPPA_INSN_SIZE);
 417	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
 418	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31
 419}
 420
 421static void emit_alu_i64(const s8 *dst, s32 imm,
 422			 struct hppa_jit_context *ctx, const u8 op)
 423{
 424	const s8 *tmp1 = regmap[TMP_REG_1];
 425	const s8 *rd;
 426
 427	if (0 && op == BPF_MOV)
 428		rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
 429	else
 430		rd = bpf_get_reg64(dst, tmp1, ctx);
 431
 432	/* dst = dst OP imm */
 433	switch (op) {
 434	case BPF_MOV:
 435		emit_imm32(rd, imm, ctx);
 436		break;
 437	case BPF_AND:
 438		emit_imm(HPPA_REG_T0, imm, ctx);
 439		emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
 440		if (imm >= 0)
 441			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
 442		break;
 443	case BPF_OR:
 444		emit_imm(HPPA_REG_T0, imm, ctx);
 445		emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
 446		if (imm < 0)
 447			emit_imm(hi(rd), -1, ctx);
 448		break;
 449	case BPF_XOR:
 450		emit_imm(HPPA_REG_T0, imm, ctx);
 451		emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
 452		if (imm < 0) {
 453			emit_imm(HPPA_REG_T0, -1, ctx);
 454			emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx);
 455		}
 456		break;
 457	case BPF_LSH:
 458		if (imm == 0)
 459			break;
 460		if (imm > 32) {
 461			imm -= 32;
 462			emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx);
 463			emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
 464		} else if (imm == 32) {
 465			emit_hppa_copy(lo(rd), hi(rd), ctx);
 466			emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
 467		} else {
 468			emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx);
 469			emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
 470		}
 471		break;
 472	case BPF_RSH:
 473		if (imm == 0)
 474			break;
 475		if (imm > 32) {
 476			imm -= 32;
 477			emit(hppa_shr(hi(rd), imm, lo(rd)), ctx);
 478			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
 479		} else if (imm == 32) {
 480			emit_hppa_copy(hi(rd), lo(rd), ctx);
 481			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
 482		} else {
 483			emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
 484			emit(hppa_shr(hi(rd), imm, hi(rd)), ctx);
 485		}
 486		break;
 487	case BPF_ARSH:
 488		if (imm == 0)
 489			break;
 490		if (imm > 32) {
 491			imm -= 32;
 492			emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx);
 493			emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
 494		} else if (imm == 32) {
 495			emit_hppa_copy(hi(rd), lo(rd), ctx);
 496			emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
 497		} else {
 498			emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
 499			emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx);
 500		}
 501		break;
 502	default:
 503		WARN_ON(1);
 504	}
 505
 506	bpf_put_reg64(dst, rd, ctx);
 507}
 508
 509static void emit_alu_i32(const s8 *dst, s32 imm,
 510			 struct hppa_jit_context *ctx, const u8 op)
 511{
 512	const s8 *tmp1 = regmap[TMP_REG_1];
 513	const s8 *rd = bpf_get_reg32(dst, tmp1, ctx);
 514
 515	if (op == BPF_MOV)
 516		rd = bpf_get_reg32_ref(dst, tmp1, ctx);
 517	else
 518		rd = bpf_get_reg32(dst, tmp1, ctx);
 519
 520	/* dst = dst OP imm */
 521	switch (op) {
 522	case BPF_MOV:
 523		emit_imm(lo(rd), imm, ctx);
 524		break;
 525	case BPF_ADD:
 526		emit_imm(HPPA_REG_T0, imm, ctx);
 527		emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
 528		break;
 529	case BPF_SUB:
 530		emit_imm(HPPA_REG_T0, imm, ctx);
 531		emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
 532		break;
 533	case BPF_AND:
 534		emit_imm(HPPA_REG_T0, imm, ctx);
 535		emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
 536		break;
 537	case BPF_OR:
 538		emit_imm(HPPA_REG_T0, imm, ctx);
 539		emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
 540		break;
 541	case BPF_XOR:
 542		emit_imm(HPPA_REG_T0, imm, ctx);
 543		emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
 544		break;
 545	case BPF_LSH:
 546		if (imm != 0)
 547			emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
 548		break;
 549	case BPF_RSH:
 550		if (imm != 0)
 551			emit(hppa_shr(lo(rd), imm, lo(rd)), ctx);
 552		break;
 553	case BPF_ARSH:
 554		if (imm != 0)
 555			emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx);
 556		break;
 557	default:
 558		WARN_ON(1);
 559	}
 560
 561	bpf_put_reg32(dst, rd, ctx);
 562}
 563
 564static void emit_alu_r64(const s8 *dst, const s8 *src,
 565			 struct hppa_jit_context *ctx, const u8 op)
 566{
 567	const s8 *tmp1 = regmap[TMP_REG_1];
 568	const s8 *tmp2 = regmap[TMP_REG_2];
 569	const s8 *rd;
 570	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
 571
 572	if (op == BPF_MOV)
 573		rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
 574	else
 575		rd = bpf_get_reg64(dst, tmp1, ctx);
 576
 577	/* dst = dst OP src */
 578	switch (op) {
 579	case BPF_MOV:
 580		emit_hppa_copy(lo(rs), lo(rd), ctx);
 581		emit_hppa_copy(hi(rs), hi(rd), ctx);
 582		break;
 583	case BPF_ADD:
 584		emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
 585		emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx);
 586		break;
 587	case BPF_SUB:
 588		emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
 589		emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx);
 590		break;
 591	case BPF_AND:
 592		emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
 593		emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx);
 594		break;
 595	case BPF_OR:
 596		emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
 597		emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx);
 598		break;
 599	case BPF_XOR:
 600		emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
 601		emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx);
 602		break;
 603	case BPF_MUL:
 604		emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx);
 605		break;
 606	case BPF_DIV:
 607		emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx);
 608		break;
 609	case BPF_MOD:
 610		emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx);
 611		break;
 612	case BPF_LSH:
 613		emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx);
 614		break;
 615	case BPF_RSH:
 616		emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx);
 617		break;
 618	case BPF_ARSH:
 619		emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx);
 620		break;
 621	case BPF_NEG:
 622		emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);
 623		emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx);
 624		break;
 625	default:
 626		WARN_ON(1);
 627	}
 628
 629	bpf_put_reg64(dst, rd, ctx);
 630}
 631
 632static void emit_alu_r32(const s8 *dst, const s8 *src,
 633			 struct hppa_jit_context *ctx, const u8 op)
 634{
 635	const s8 *tmp1 = regmap[TMP_REG_1];
 636	const s8 *tmp2 = regmap[TMP_REG_2];
 637	const s8 *rd;
 638	const s8 *rs = bpf_get_reg32(src, tmp2, ctx);
 639
 640	if (op == BPF_MOV)
 641		rd = bpf_get_reg32_ref(dst, tmp1, ctx);
 642	else
 643		rd = bpf_get_reg32(dst, tmp1, ctx);
 644
 645	/* dst = dst OP src */
 646	switch (op) {
 647	case BPF_MOV:
 648		emit_hppa_copy(lo(rs), lo(rd), ctx);
 649		break;
 650	case BPF_ADD:
 651		emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
 652		break;
 653	case BPF_SUB:
 654		emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
 655		break;
 656	case BPF_AND:
 657		emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
 658		break;
 659	case BPF_OR:
 660		emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
 661		break;
 662	case BPF_XOR:
 663		emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
 664		break;
 665	case BPF_MUL:
 666		emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx);
 667		break;
 668	case BPF_DIV:
 669		emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx);
 670		break;
 671	case BPF_MOD:
 672		emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx);
 673		break;
 674	case BPF_LSH:
 675		emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
 676		emit(hppa_mtsar(HPPA_REG_T0), ctx);
 677		emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx);
 678		break;
 679	case BPF_RSH:
 680		emit(hppa_mtsar(lo(rs)), ctx);
 681		emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx);
 682		break;
 683	case BPF_ARSH: /* sign extending arithmetic shift right */
 684		// emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx);
 685		emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
 686		emit(hppa_mtsar(HPPA_REG_T0), ctx);
 687		emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx);
 688		break;
 689	case BPF_NEG:
 690		emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);  // sub r0,rd,rd
 691		break;
 692	default:
 693		WARN_ON(1);
 694	}
 695
 696	bpf_put_reg32(dst, rd, ctx);
 697}
 698
 699static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff,
 700			   struct hppa_jit_context *ctx, const u8 op)
 701{
 702	int e, s = ctx->ninsns;
 703	const s8 *tmp1 = regmap[TMP_REG_1];
 704	const s8 *tmp2 = regmap[TMP_REG_2];
 705
 706	const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx);
 707	const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx);
 708
 709	/*
 710	 * NO_JUMP skips over the rest of the instructions and the
 711	 * emit_jump, meaning the BPF branch is not taken.
 712	 * JUMP skips directly to the emit_jump, meaning
 713	 * the BPF branch is taken.
 714	 *
 715	 * The fallthrough case results in the BPF branch being taken.
 716	 */
 717#define NO_JUMP(idx)	(2 + (idx) - 1)
 718#define JUMP(idx)	(0 + (idx) - 1)
 719
 720	switch (op) {
 721	case BPF_JEQ:
 722		emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 723		emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 724		break;
 725	case BPF_JGT:
 726		emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
 727		emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 728		emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 729		break;
 730	case BPF_JLT:
 731		emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
 732		emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 733		emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 734		break;
 735	case BPF_JGE:
 736		emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
 737		emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 738		emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 739		break;
 740	case BPF_JLE:
 741		emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
 742		emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 743		emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 744		break;
 745	case BPF_JNE:
 746		emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx);
 747		emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 748		break;
 749	case BPF_JSGT:
 750		emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
 751		emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 752		emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 753		break;
 754	case BPF_JSLT:
 755		emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
 756		emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 757		emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 758		break;
 759	case BPF_JSGE:
 760		emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
 761		emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 762		emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 763		break;
 764	case BPF_JSLE:
 765		emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
 766		emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
 767		emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
 768		break;
 769	case BPF_JSET:
 770		emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx);
 771		emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx);
 772		emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx);
 773		emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx);
 774		break;
 775	default:
 776		WARN_ON(1);
 777	}
 778
 779#undef NO_JUMP
 780#undef JUMP
 781
 782	e = ctx->ninsns;
 783	/* Adjust for extra insns. */
 784	paoff -= (e - s);
 785	emit_jump(paoff, true, ctx);
 786	return 0;
 787}
 788
 789static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx)
 790{
 791	int e, s;
 792	bool far = false;
 793	int off;
 794
 795	if (op == BPF_JSET) {
 796		/*
 797		 * BPF_JSET is a special case: it has no inverse so we always
 798		 * treat it as a far branch.
 799		 */
 800		emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
 801		paoff -= 1; /* reduce offset due to hppa_and() above */
 802		rd = HPPA_REG_T0;
 803		rs = HPPA_REG_ZERO;
 804		op = BPF_JNE;
 805	}
 806
 807	s = ctx->ninsns;
 808
 809	if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) {
 810		op = invert_bpf_cond(op);
 811		far = true;
 812	}
 813
 814	/*
 815	 * For a far branch, the condition is negated and we jump over the
 816	 * branch itself, and the three instructions from emit_jump.
 817	 * For a near branch, just use paoff.
 818	 */
 819	off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT;
 820
 821	switch (op) {
 822	/* IF (dst COND src) JUMP off */
 823	case BPF_JEQ:
 824		emit(hppa_beq(rd, rs, off), ctx);
 825		break;
 826	case BPF_JGT:
 827		emit(hppa_bgtu(rd, rs, off), ctx);
 828		break;
 829	case BPF_JLT:
 830		emit(hppa_bltu(rd, rs, off), ctx);
 831		break;
 832	case BPF_JGE:
 833		emit(hppa_bgeu(rd, rs, off), ctx);
 834		break;
 835	case BPF_JLE:
 836		emit(hppa_bleu(rd, rs, off), ctx);
 837		break;
 838	case BPF_JNE:
 839		emit(hppa_bne(rd, rs, off), ctx);
 840		break;
 841	case BPF_JSGT:
 842		emit(hppa_bgt(rd, rs, off), ctx);
 843		break;
 844	case BPF_JSLT:
 845		emit(hppa_blt(rd, rs, off), ctx);
 846		break;
 847	case BPF_JSGE:
 848		emit(hppa_bge(rd, rs, off), ctx);
 849		break;
 850	case BPF_JSLE:
 851		emit(hppa_ble(rd, rs, off), ctx);
 852		break;
 853	default:
 854		WARN_ON(1);
 855	}
 856
 857	if (far) {
 858		e = ctx->ninsns;
 859		/* Adjust for extra insns. */
 860		paoff -= (e - s);
 861		emit_jump(paoff, true, ctx);
 862	}
 863	return 0;
 864}
 865
 866static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff,
 867			   struct hppa_jit_context *ctx, const u8 op)
 868{
 869	int e, s = ctx->ninsns;
 870	const s8 *tmp1 = regmap[TMP_REG_1];
 871	const s8 *tmp2 = regmap[TMP_REG_2];
 872
 873	const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx);
 874	const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx);
 875
 876	e = ctx->ninsns;
 877	/* Adjust for extra insns. */
 878	paoff -= (e - s);
 879
 880	if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx))
 881		return -1;
 882
 883	return 0;
 884}
 885
 886static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx)
 887{
 888	const s8 *tmp = regmap[TMP_REG_1];
 889	const s8 *r0 = regmap[BPF_REG_0];
 890	const s8 *reg;
 891	const int offset_sp = 2 * STACK_ALIGN;
 892
 893	/* prepare stack */
 894	emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
 895
 896	/* load R1 & R2 in registers, R3-R5 to stack. */
 897	reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx);
 898	emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx);
 899	emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx);
 900
 901	reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx);
 902	emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx);
 903	emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx);
 904
 905	reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx);
 906	emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx);
 907	emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx);
 908
 909	reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx);
 910	emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx);
 911	emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx);
 912
 913	reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx);
 914	emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx);
 915	emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx);
 916
 917	/* backup TCC */
 918	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
 919		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
 920
 921	/*
 922	 * Use ldil() to load absolute address. Don't use emit_imm as the
 923	 * number of emitted instructions should not depend on the value of
 924	 * addr.
 925	 */
 926	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
 927	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
 928	/* set return address in delay slot */
 929	emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
 930
 931	/* restore TCC */
 932	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
 933		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
 934
 935	/* restore stack */
 936	emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
 937
 938	/* set return value. */
 939	emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx);
 940	emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx);
 941}
 942
 943static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
 944{
 945	/*
 946	 * R1 -> &ctx
 947	 * R2 -> &array
 948	 * R3 -> index
 949	 */
 950	int off;
 951	const s8 *arr_reg = regmap[BPF_REG_2];
 952	const s8 *idx_reg = regmap[BPF_REG_3];
 953	struct bpf_array bpfa;
 954	struct bpf_prog bpfp;
 955
 956	/* get address of TCC main exit function for error case into rp */
 957	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
 958
 959	/* max_entries = array->map.max_entries; */
 960	off = offsetof(struct bpf_array, map.max_entries);
 961	BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
 962	emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx);
 963
 964	/*
 965	 * if (index >= max_entries)
 966	 *   goto out;
 967	 */
 968	emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
 969	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
 970
 971	/*
 972	 * if (--tcc < 0)
 973	 *   goto out;
 974	 */
 975	REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
 976	emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
 977	emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
 978	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
 979
 980	/*
 981	 * prog = array->ptrs[index];
 982	 * if (!prog)
 983	 *   goto out;
 984	 */
 985	BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4);
 986	emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx);
 987	off = offsetof(struct bpf_array, ptrs);
 988	BUILD_BUG_ON(!relative_bits_ok(off, 11));
 989	emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
 990	emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
 991	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
 992
 993	/*
 994	 * tcc = temp_tcc;
 995	 * goto *(prog->bpf_func + 4);
 996	 */
 997	off = offsetof(struct bpf_prog, bpf_func);
 998	BUILD_BUG_ON(!relative_bits_ok(off, 11));
 999	BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4);
1000	emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
1001	/* Epilogue jumps to *(t0 + 4). */
1002	__build_epilogue(true, ctx);
1003	return 0;
1004}
1005
1006static int emit_load_r64(const s8 *dst, const s8 *src, s16 off,
1007			 struct hppa_jit_context *ctx, const u8 size)
1008{
1009	const s8 *tmp1 = regmap[TMP_REG_1];
1010	const s8 *tmp2 = regmap[TMP_REG_2];
1011	const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx);
1012	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1013	s8 srcreg;
1014
1015	/* need to calculate address since offset does not fit in 14 bits? */
1016	if (relative_bits_ok(off, 14))
1017		srcreg = lo(rs);
1018	else {
1019		/* need to use R1 here, since addil puts result into R1 */
1020		srcreg = HPPA_REG_R1;
1021		emit(hppa_addil(off, lo(rs)), ctx);
1022		off = im11(off);
1023	}
1024
1025	/* LDX: dst = *(size *)(src + off) */
1026	switch (size) {
1027	case BPF_B:
1028		emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx);
1029		if (!ctx->prog->aux->verifier_zext)
1030			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1031		break;
1032	case BPF_H:
1033		emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx);
1034		if (!ctx->prog->aux->verifier_zext)
1035			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1036		break;
1037	case BPF_W:
1038		emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx);
1039		if (!ctx->prog->aux->verifier_zext)
1040			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1041		break;
1042	case BPF_DW:
1043		emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx);
1044		emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx);
1045		break;
1046	}
1047
1048	bpf_put_reg64(dst, rd, ctx);
1049	return 0;
1050}
1051
1052static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
1053			  struct hppa_jit_context *ctx, const u8 size,
1054			  const u8 mode)
1055{
1056	const s8 *tmp1 = regmap[TMP_REG_1];
1057	const s8 *tmp2 = regmap[TMP_REG_2];
1058	const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1059	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1060	s8 dstreg;
1061
1062	/* need to calculate address since offset does not fit in 14 bits? */
1063	if (relative_bits_ok(off, 14))
1064		dstreg = lo(rd);
1065	else {
1066		/* need to use R1 here, since addil puts result into R1 */
1067		dstreg = HPPA_REG_R1;
1068		emit(hppa_addil(off, lo(rd)), ctx);
1069		off = im11(off);
1070	}
1071
1072	/* ST: *(size *)(dst + off) = imm */
1073	switch (size) {
1074	case BPF_B:
1075		emit(hppa_stb(lo(rs), off + 0, dstreg), ctx);
1076		break;
1077	case BPF_H:
1078		emit(hppa_sth(lo(rs), off + 0, dstreg), ctx);
1079		break;
1080	case BPF_W:
1081		emit(hppa_stw(lo(rs), off + 0, dstreg), ctx);
1082		break;
1083	case BPF_DW:
1084		emit(hppa_stw(hi(rs), off + 0, dstreg), ctx);
1085		emit(hppa_stw(lo(rs), off + 4, dstreg), ctx);
1086		break;
1087	}
1088
1089	return 0;
1090}
1091
1092static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx)
1093{
1094	emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx);
1095	emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
1096	emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
1097}
1098
1099static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
1100{
1101	emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx);
1102	emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx);
1103	emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx);
1104}
1105
1106static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx)
1107{
1108	const s8 *rd;
1109	const s8 *tmp1 = regmap[TMP_REG_1];
1110
1111	rd = bpf_get_reg64(dst, tmp1, ctx);
1112	emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1113	bpf_put_reg64(dst, rd, ctx);
1114}
1115
1116int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
1117		      bool extra_pass)
1118{
1119	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1120		BPF_CLASS(insn->code) == BPF_JMP;
1121	int s, e, paoff, i = insn - ctx->prog->insnsi;
1122	u8 code = insn->code;
1123	s16 off = insn->off;
1124	s32 imm = insn->imm;
1125
1126	const s8 *dst = regmap[insn->dst_reg];
1127	const s8 *src = regmap[insn->src_reg];
1128	const s8 *tmp1 = regmap[TMP_REG_1];
1129	const s8 *tmp2 = regmap[TMP_REG_2];
1130
1131	if (0) printk("CLASS %03d  CODE %#02x ALU64:%d BPF_SIZE %#02x  "
1132		"BPF_CODE %#02x  src_reg %d  dst_reg %d\n",
1133		BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code),
1134		BPF_OP(code), insn->src_reg, insn->dst_reg);
1135
1136	switch (code) {
1137	/* dst = src */
1138	case BPF_ALU64 | BPF_MOV | BPF_X:
1139
1140	case BPF_ALU64 | BPF_ADD | BPF_X:
1141	case BPF_ALU64 | BPF_ADD | BPF_K:
1142
1143	case BPF_ALU64 | BPF_SUB | BPF_X:
1144	case BPF_ALU64 | BPF_SUB | BPF_K:
1145
1146	case BPF_ALU64 | BPF_AND | BPF_X:
1147	case BPF_ALU64 | BPF_OR | BPF_X:
1148	case BPF_ALU64 | BPF_XOR | BPF_X:
1149
1150	case BPF_ALU64 | BPF_MUL | BPF_X:
1151	case BPF_ALU64 | BPF_MUL | BPF_K:
1152
1153	case BPF_ALU64 | BPF_DIV | BPF_X:
1154	case BPF_ALU64 | BPF_DIV | BPF_K:
1155
1156	case BPF_ALU64 | BPF_MOD | BPF_X:
1157	case BPF_ALU64 | BPF_MOD | BPF_K:
1158
1159	case BPF_ALU64 | BPF_LSH | BPF_X:
1160	case BPF_ALU64 | BPF_RSH | BPF_X:
1161	case BPF_ALU64 | BPF_ARSH | BPF_X:
1162		if (BPF_SRC(code) == BPF_K) {
1163			emit_imm32(tmp2, imm, ctx);
1164			src = tmp2;
1165		}
1166		emit_alu_r64(dst, src, ctx, BPF_OP(code));
1167		break;
1168
1169	/* dst = -dst */
1170	case BPF_ALU64 | BPF_NEG:
1171		emit_alu_r64(dst, tmp2, ctx, BPF_OP(code));
1172		break;
1173
1174	case BPF_ALU64 | BPF_MOV | BPF_K:
1175	case BPF_ALU64 | BPF_AND | BPF_K:
1176	case BPF_ALU64 | BPF_OR | BPF_K:
1177	case BPF_ALU64 | BPF_XOR | BPF_K:
1178	case BPF_ALU64 | BPF_LSH | BPF_K:
1179	case BPF_ALU64 | BPF_RSH | BPF_K:
1180	case BPF_ALU64 | BPF_ARSH | BPF_K:
1181		emit_alu_i64(dst, imm, ctx, BPF_OP(code));
1182		break;
1183
1184	case BPF_ALU | BPF_MOV | BPF_X:
1185		if (imm == 1) {
1186			/* Special mov32 for zext. */
1187			emit_zext64(dst, ctx);
1188			break;
1189		}
1190		fallthrough;
1191	/* dst = dst OP src */
1192	case BPF_ALU | BPF_ADD | BPF_X:
1193	case BPF_ALU | BPF_SUB | BPF_X:
1194	case BPF_ALU | BPF_AND | BPF_X:
1195	case BPF_ALU | BPF_OR | BPF_X:
1196	case BPF_ALU | BPF_XOR | BPF_X:
1197
1198	case BPF_ALU | BPF_MUL | BPF_X:
1199	case BPF_ALU | BPF_MUL | BPF_K:
1200
1201	case BPF_ALU | BPF_DIV | BPF_X:
1202	case BPF_ALU | BPF_DIV | BPF_K:
1203
1204	case BPF_ALU | BPF_MOD | BPF_X:
1205	case BPF_ALU | BPF_MOD | BPF_K:
1206
1207	case BPF_ALU | BPF_LSH | BPF_X:
1208	case BPF_ALU | BPF_RSH | BPF_X:
1209	case BPF_ALU | BPF_ARSH | BPF_X:
1210		if (BPF_SRC(code) == BPF_K) {
1211			emit_imm32(tmp2, imm, ctx);
1212			src = tmp2;
1213		}
1214		emit_alu_r32(dst, src, ctx, BPF_OP(code));
1215		break;
1216
1217	/* dst = dst OP imm */
1218	case BPF_ALU | BPF_MOV | BPF_K:
1219	case BPF_ALU | BPF_ADD | BPF_K:
1220	case BPF_ALU | BPF_SUB | BPF_K:
1221	case BPF_ALU | BPF_AND | BPF_K:
1222	case BPF_ALU | BPF_OR | BPF_K:
1223	case BPF_ALU | BPF_XOR | BPF_K:
1224	case BPF_ALU | BPF_LSH | BPF_K:
1225	case BPF_ALU | BPF_RSH | BPF_K:
1226	case BPF_ALU | BPF_ARSH | BPF_K:
1227		/*
1228		 * mul,div,mod are handled in the BPF_X case.
1229		 */
1230		emit_alu_i32(dst, imm, ctx, BPF_OP(code));
1231		break;
1232
1233	/* dst = -dst */
1234	case BPF_ALU | BPF_NEG:
1235		/*
1236		 * src is ignored---choose tmp2 as a dummy register since it
1237		 * is not on the stack.
1238		 */
1239		emit_alu_r32(dst, tmp2, ctx, BPF_OP(code));
1240		break;
1241
1242	/* dst = BSWAP##imm(dst) */
1243	case BPF_ALU | BPF_END | BPF_FROM_BE:
1244	{
1245		const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1246
1247		switch (imm) {
1248		case 16:
1249			/* zero-extend 16 bits into 64 bits */
1250			emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx);
1251			fallthrough;
1252		case 32:
1253			/* zero-extend 32 bits into 64 bits */
1254			if (!ctx->prog->aux->verifier_zext)
1255				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1256			break;
1257		case 64:
1258			/* Do nothing. */
1259			break;
1260		default:
1261			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1262			return -1;
1263		}
1264
1265		bpf_put_reg64(dst, rd, ctx);
1266		break;
1267	}
1268
1269	case BPF_ALU | BPF_END | BPF_FROM_LE:
1270	{
1271		const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1272
1273		switch (imm) {
1274		case 16:
1275			emit_rev16(lo(rd), ctx);
1276			if (!ctx->prog->aux->verifier_zext)
1277				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1278			break;
1279		case 32:
1280			emit_rev32(lo(rd), lo(rd), ctx);
1281			if (!ctx->prog->aux->verifier_zext)
1282				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1283			break;
1284		case 64:
1285			/* Swap upper and lower halves, then each half. */
1286			emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx);
1287			emit_rev32(lo(rd), hi(rd), ctx);
1288			emit_rev32(HPPA_REG_T0, lo(rd), ctx);
1289			break;
1290		default:
1291			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1292			return -1;
1293		}
1294
1295		bpf_put_reg64(dst, rd, ctx);
1296		break;
1297	}
1298	/* JUMP off */
1299	case BPF_JMP | BPF_JA:
1300		paoff = hppa_offset(i, off, ctx);
1301		emit_jump(paoff, false, ctx);
1302		break;
1303	/* function call */
1304	case BPF_JMP | BPF_CALL:
1305	{
1306		bool fixed;
1307		int ret;
1308		u64 addr;
1309
1310		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
1311					    &fixed);
1312		if (ret < 0)
1313			return ret;
1314		emit_call(fixed, addr, ctx);
1315		break;
1316	}
1317	/* tail call */
1318	case BPF_JMP | BPF_TAIL_CALL:
1319		REG_SET_SEEN_ALL(ctx);
1320		if (emit_bpf_tail_call(i, ctx))
1321			return -1;
1322		break;
1323	/* IF (dst COND imm) JUMP off */
1324	case BPF_JMP | BPF_JEQ | BPF_X:
1325	case BPF_JMP | BPF_JEQ | BPF_K:
1326	case BPF_JMP32 | BPF_JEQ | BPF_X:
1327	case BPF_JMP32 | BPF_JEQ | BPF_K:
1328
1329	case BPF_JMP | BPF_JNE | BPF_X:
1330	case BPF_JMP | BPF_JNE | BPF_K:
1331	case BPF_JMP32 | BPF_JNE | BPF_X:
1332	case BPF_JMP32 | BPF_JNE | BPF_K:
1333
1334	case BPF_JMP | BPF_JLE | BPF_X:
1335	case BPF_JMP | BPF_JLE | BPF_K:
1336	case BPF_JMP32 | BPF_JLE | BPF_X:
1337	case BPF_JMP32 | BPF_JLE | BPF_K:
1338
1339	case BPF_JMP | BPF_JLT | BPF_X:
1340	case BPF_JMP | BPF_JLT | BPF_K:
1341	case BPF_JMP32 | BPF_JLT | BPF_X:
1342	case BPF_JMP32 | BPF_JLT | BPF_K:
1343
1344	case BPF_JMP | BPF_JGE | BPF_X:
1345	case BPF_JMP | BPF_JGE | BPF_K:
1346	case BPF_JMP32 | BPF_JGE | BPF_X:
1347	case BPF_JMP32 | BPF_JGE | BPF_K:
1348
1349	case BPF_JMP | BPF_JGT | BPF_X:
1350	case BPF_JMP | BPF_JGT | BPF_K:
1351	case BPF_JMP32 | BPF_JGT | BPF_X:
1352	case BPF_JMP32 | BPF_JGT | BPF_K:
1353
1354	case BPF_JMP | BPF_JSLE | BPF_X:
1355	case BPF_JMP | BPF_JSLE | BPF_K:
1356	case BPF_JMP32 | BPF_JSLE | BPF_X:
1357	case BPF_JMP32 | BPF_JSLE | BPF_K:
1358
1359	case BPF_JMP | BPF_JSLT | BPF_X:
1360	case BPF_JMP | BPF_JSLT | BPF_K:
1361	case BPF_JMP32 | BPF_JSLT | BPF_X:
1362	case BPF_JMP32 | BPF_JSLT | BPF_K:
1363
1364	case BPF_JMP | BPF_JSGE | BPF_X:
1365	case BPF_JMP | BPF_JSGE | BPF_K:
1366	case BPF_JMP32 | BPF_JSGE | BPF_X:
1367	case BPF_JMP32 | BPF_JSGE | BPF_K:
1368
1369	case BPF_JMP | BPF_JSGT | BPF_X:
1370	case BPF_JMP | BPF_JSGT | BPF_K:
1371	case BPF_JMP32 | BPF_JSGT | BPF_X:
1372	case BPF_JMP32 | BPF_JSGT | BPF_K:
1373
1374	case BPF_JMP | BPF_JSET | BPF_X:
1375	case BPF_JMP | BPF_JSET | BPF_K:
1376	case BPF_JMP32 | BPF_JSET | BPF_X:
1377	case BPF_JMP32 | BPF_JSET | BPF_K:
1378		paoff = hppa_offset(i, off, ctx);
1379		if (BPF_SRC(code) == BPF_K) {
1380			s = ctx->ninsns;
1381			emit_imm32(tmp2, imm, ctx);
1382			src = tmp2;
1383			e = ctx->ninsns;
1384			paoff -= (e - s);
1385		}
1386		if (is64)
1387			emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code));
1388		else
1389			emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code));
1390		break;
1391	/* function return */
1392	case BPF_JMP | BPF_EXIT:
1393		if (i == ctx->prog->len - 1)
1394			break;
1395		/* load epilogue function pointer and jump to it. */
1396		emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
1397		emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
1398		break;
1399
1400	/* dst = imm64 */
1401	case BPF_LD | BPF_IMM | BPF_DW:
1402	{
1403		struct bpf_insn insn1 = insn[1];
1404		u32 upper = insn1.imm;
1405		u32 lower = imm;
1406		const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
1407
1408		if (0 && bpf_pseudo_func(insn)) {
1409			WARN_ON(upper); /* we are 32-bit! */
1410			upper = 0;
1411			lower = (uintptr_t) dereference_function_descriptor(lower);
1412		}
1413
1414		emit_imm64(rd, upper, lower, ctx);
1415		bpf_put_reg64(dst, rd, ctx);
1416		return 1;
1417	}
1418
1419	/* LDX: dst = *(size *)(src + off) */
1420	case BPF_LDX | BPF_MEM | BPF_B:
1421	case BPF_LDX | BPF_MEM | BPF_H:
1422	case BPF_LDX | BPF_MEM | BPF_W:
1423	case BPF_LDX | BPF_MEM | BPF_DW:
1424		if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code)))
1425			return -1;
1426		break;
1427
1428	/* speculation barrier */
1429	case BPF_ST | BPF_NOSPEC:
1430		break;
1431
1432	/* ST: *(size *)(dst + off) = imm */
1433	case BPF_ST | BPF_MEM | BPF_B:
1434	case BPF_ST | BPF_MEM | BPF_H:
1435	case BPF_ST | BPF_MEM | BPF_W:
1436	case BPF_ST | BPF_MEM | BPF_DW:
1437
1438	case BPF_STX | BPF_MEM | BPF_B:
1439	case BPF_STX | BPF_MEM | BPF_H:
1440	case BPF_STX | BPF_MEM | BPF_W:
1441	case BPF_STX | BPF_MEM | BPF_DW:
1442		if (BPF_CLASS(code) == BPF_ST) {
1443			emit_imm32(tmp2, imm, ctx);
1444			src = tmp2;
1445		}
1446
1447		if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code),
1448				   BPF_MODE(code)))
1449			return -1;
1450		break;
1451
1452	case BPF_STX | BPF_ATOMIC | BPF_W:
1453	case BPF_STX | BPF_ATOMIC | BPF_DW:
1454		pr_info_once(
1455			"bpf-jit: not supported: atomic operation %02x ***\n",
1456			insn->imm);
1457		return -EFAULT;
1458
1459	default:
1460		pr_err("bpf-jit: unknown opcode %02x\n", code);
1461		return -EINVAL;
1462	}
1463
1464	return 0;
1465}
1466
1467void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1468{
1469	const s8 *tmp = regmap[TMP_REG_1];
1470	const s8 *dst, *reg;
1471	int stack_adjust = 0;
1472	int i;
1473	unsigned long addr;
1474	int bpf_stack_adjust;
1475
1476	/*
1477	 * stack on hppa grows up, so if tail calls are used we need to
1478	 * allocate the maximum stack size
1479	 */
1480	if (REG_ALL_SEEN(ctx))
1481		bpf_stack_adjust = MAX_BPF_STACK;
1482	else
1483		bpf_stack_adjust = ctx->prog->aux->stack_depth;
1484	bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1485
1486	/* make space for callee-saved registers. */
1487	stack_adjust += NR_SAVED_REGISTERS * REG_SIZE;
1488	/* make space for BPF registers on stack. */
1489	stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE;
1490	/* make space for BPF stack. */
1491	stack_adjust += bpf_stack_adjust;
1492	/* round up for stack alignment. */
1493	stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1494
1495	/*
1496	 * The first instruction sets the tail-call-counter (TCC) register.
1497	 * This instruction is skipped by tail calls.
1498	 * Use a temporary register instead of a caller-saved register initially.
1499	 */
1500	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1501
1502	/*
1503	 * skip all initializations when called as BPF TAIL call.
1504	 */
1505	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1506	emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
1507
1508	/* set up hppa stack frame. */
1509	emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx);			// copy sp,r1 (=prev_sp)
1510	emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx);	// ldo stack_adjust(sp),sp (increase stack)
1511	emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx);	// stw prev_sp,-0x04(sp)
1512	emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx);		// stw rp,-0x14(sp)
1513
1514	REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1515	REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1516	REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1517	REG_FORCE_SEEN(ctx, HPPA_REG_T3);
1518	REG_FORCE_SEEN(ctx, HPPA_REG_T4);
1519	REG_FORCE_SEEN(ctx, HPPA_REG_T5);
1520
1521	/* save callee-save registers. */
1522	for (i = 3; i <= 18; i++) {
1523		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1524			continue;
1525		emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx);	// stw ri,-save_area(sp)
1526	}
1527
1528	/*
1529	 * now really set the tail call counter (TCC) register.
1530	 */
1531	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1532		emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1533
1534	/*
1535	 * save epilogue function pointer for outer TCC call chain.
1536	 * The main TCC call stores the final RP on stack.
1537	 */
1538	addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1539	/* skip first two instructions of exit function, which jump to exit */
1540	addr += 2 * HPPA_INSN_SIZE;
1541	emit(hppa_ldil(addr, HPPA_REG_T2), ctx);
1542	emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx);
1543	emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1544
1545	/* load R1 & R2 from registers, R3-R5 from stack. */
1546	/* use HPPA_REG_R1 which holds the old stack value */
1547	dst = regmap[BPF_REG_5];
1548	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1549	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1550		if (REG_WAS_SEEN(ctx, hi(reg)))
1551			emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx);
1552		if (REG_WAS_SEEN(ctx, lo(reg)))
1553			emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx);
1554		bpf_put_reg64(dst, tmp, ctx);
1555	}
1556
1557	dst = regmap[BPF_REG_4];
1558	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1559	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1560		if (REG_WAS_SEEN(ctx, hi(reg)))
1561			emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx);
1562		if (REG_WAS_SEEN(ctx, lo(reg)))
1563			emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx);
1564		bpf_put_reg64(dst, tmp, ctx);
1565	}
1566
1567	dst = regmap[BPF_REG_3];
1568	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1569	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1570		if (REG_WAS_SEEN(ctx, hi(reg)))
1571			emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx);
1572		if (REG_WAS_SEEN(ctx, lo(reg)))
1573			emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx);
1574		bpf_put_reg64(dst, tmp, ctx);
1575	}
1576
1577	dst = regmap[BPF_REG_2];
1578	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1579	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1580		if (REG_WAS_SEEN(ctx, hi(reg)))
1581			emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx);
1582		if (REG_WAS_SEEN(ctx, lo(reg)))
1583			emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx);
1584		bpf_put_reg64(dst, tmp, ctx);
1585	}
1586
1587	dst = regmap[BPF_REG_1];
1588	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1589	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1590		if (REG_WAS_SEEN(ctx, hi(reg)))
1591			emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx);
1592		if (REG_WAS_SEEN(ctx, lo(reg)))
1593			emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx);
1594		bpf_put_reg64(dst, tmp, ctx);
1595	}
1596
1597	/* Set up BPF frame pointer. */
1598	dst = regmap[BPF_REG_FP];
1599	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1600	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1601		if (REG_WAS_SEEN(ctx, lo(reg)))
1602			emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS),
1603				HPPA_REG_SP, lo(reg)), ctx);
1604		if (REG_WAS_SEEN(ctx, hi(reg)))
1605			emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
1606		bpf_put_reg64(dst, tmp, ctx);
1607	}
1608
1609	emit(hppa_nop(), ctx);
1610}
1611
1612void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1613{
1614	__build_epilogue(false, ctx);
1615}