Linux Audio

Check our new training course

Loading...
Note: File does not exist in v6.8.
   1// SPDX-License-Identifier: GPL-2.0
   2/* BPF JIT compiler for RV64G
   3 *
   4 * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com>
   5 *
   6 */
   7
   8#include <linux/bpf.h>
   9#include <linux/filter.h>
  10#include <asm/cacheflush.h>
  11
  12enum {
  13	RV_REG_ZERO =	0,	/* The constant value 0 */
  14	RV_REG_RA =	1,	/* Return address */
  15	RV_REG_SP =	2,	/* Stack pointer */
  16	RV_REG_GP =	3,	/* Global pointer */
  17	RV_REG_TP =	4,	/* Thread pointer */
  18	RV_REG_T0 =	5,	/* Temporaries */
  19	RV_REG_T1 =	6,
  20	RV_REG_T2 =	7,
  21	RV_REG_FP =	8,
  22	RV_REG_S1 =	9,	/* Saved registers */
  23	RV_REG_A0 =	10,	/* Function argument/return values */
  24	RV_REG_A1 =	11,	/* Function arguments */
  25	RV_REG_A2 =	12,
  26	RV_REG_A3 =	13,
  27	RV_REG_A4 =	14,
  28	RV_REG_A5 =	15,
  29	RV_REG_A6 =	16,
  30	RV_REG_A7 =	17,
  31	RV_REG_S2 =	18,	/* Saved registers */
  32	RV_REG_S3 =	19,
  33	RV_REG_S4 =	20,
  34	RV_REG_S5 =	21,
  35	RV_REG_S6 =	22,
  36	RV_REG_S7 =	23,
  37	RV_REG_S8 =	24,
  38	RV_REG_S9 =	25,
  39	RV_REG_S10 =	26,
  40	RV_REG_S11 =	27,
  41	RV_REG_T3 =	28,	/* Temporaries */
  42	RV_REG_T4 =	29,
  43	RV_REG_T5 =	30,
  44	RV_REG_T6 =	31,
  45};
  46
  47#define RV_REG_TCC RV_REG_A6
  48#define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
  49
  50static const int regmap[] = {
  51	[BPF_REG_0] =	RV_REG_A5,
  52	[BPF_REG_1] =	RV_REG_A0,
  53	[BPF_REG_2] =	RV_REG_A1,
  54	[BPF_REG_3] =	RV_REG_A2,
  55	[BPF_REG_4] =	RV_REG_A3,
  56	[BPF_REG_5] =	RV_REG_A4,
  57	[BPF_REG_6] =	RV_REG_S1,
  58	[BPF_REG_7] =	RV_REG_S2,
  59	[BPF_REG_8] =	RV_REG_S3,
  60	[BPF_REG_9] =	RV_REG_S4,
  61	[BPF_REG_FP] =	RV_REG_S5,
  62	[BPF_REG_AX] =	RV_REG_T0,
  63};
  64
  65enum {
  66	RV_CTX_F_SEEN_TAIL_CALL =	0,
  67	RV_CTX_F_SEEN_CALL =		RV_REG_RA,
  68	RV_CTX_F_SEEN_S1 =		RV_REG_S1,
  69	RV_CTX_F_SEEN_S2 =		RV_REG_S2,
  70	RV_CTX_F_SEEN_S3 =		RV_REG_S3,
  71	RV_CTX_F_SEEN_S4 =		RV_REG_S4,
  72	RV_CTX_F_SEEN_S5 =		RV_REG_S5,
  73	RV_CTX_F_SEEN_S6 =		RV_REG_S6,
  74};
  75
  76struct rv_jit_context {
  77	struct bpf_prog *prog;
  78	u32 *insns; /* RV insns */
  79	int ninsns;
  80	int epilogue_offset;
  81	int *offset; /* BPF to RV */
  82	unsigned long flags;
  83	int stack_size;
  84};
  85
  86struct rv_jit_data {
  87	struct bpf_binary_header *header;
  88	u8 *image;
  89	struct rv_jit_context ctx;
  90};
  91
  92static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
  93{
  94	u8 reg = regmap[bpf_reg];
  95
  96	switch (reg) {
  97	case RV_CTX_F_SEEN_S1:
  98	case RV_CTX_F_SEEN_S2:
  99	case RV_CTX_F_SEEN_S3:
 100	case RV_CTX_F_SEEN_S4:
 101	case RV_CTX_F_SEEN_S5:
 102	case RV_CTX_F_SEEN_S6:
 103		__set_bit(reg, &ctx->flags);
 104	}
 105	return reg;
 106};
 107
 108static bool seen_reg(int reg, struct rv_jit_context *ctx)
 109{
 110	switch (reg) {
 111	case RV_CTX_F_SEEN_CALL:
 112	case RV_CTX_F_SEEN_S1:
 113	case RV_CTX_F_SEEN_S2:
 114	case RV_CTX_F_SEEN_S3:
 115	case RV_CTX_F_SEEN_S4:
 116	case RV_CTX_F_SEEN_S5:
 117	case RV_CTX_F_SEEN_S6:
 118		return test_bit(reg, &ctx->flags);
 119	}
 120	return false;
 121}
 122
 123static void mark_call(struct rv_jit_context *ctx)
 124{
 125	__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
 126}
 127
 128static bool seen_call(struct rv_jit_context *ctx)
 129{
 130	return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
 131}
 132
 133static void mark_tail_call(struct rv_jit_context *ctx)
 134{
 135	__set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
 136}
 137
 138static bool seen_tail_call(struct rv_jit_context *ctx)
 139{
 140	return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
 141}
 142
 143static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
 144{
 145	mark_tail_call(ctx);
 146
 147	if (seen_call(ctx)) {
 148		__set_bit(RV_CTX_F_SEEN_S6, &ctx->flags);
 149		return RV_REG_S6;
 150	}
 151	return RV_REG_A6;
 152}
 153
 154static void emit(const u32 insn, struct rv_jit_context *ctx)
 155{
 156	if (ctx->insns)
 157		ctx->insns[ctx->ninsns] = insn;
 158
 159	ctx->ninsns++;
 160}
 161
 162static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode)
 163{
 164	return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
 165		(rd << 7) | opcode;
 166}
 167
 168static u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode)
 169{
 170	return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) |
 171		opcode;
 172}
 173
 174static u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode)
 175{
 176	u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f;
 177
 178	return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
 179		(imm4_0 << 7) | opcode;
 180}
 181
 182static u32 rv_sb_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode)
 183{
 184	u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4);
 185	u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10);
 186
 187	return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) |
 188		(imm4_1 << 7) | opcode;
 189}
 190
 191static u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode)
 192{
 193	return (imm31_12 << 12) | (rd << 7) | opcode;
 194}
 195
 196static u32 rv_uj_insn(u32 imm20_1, u8 rd, u8 opcode)
 197{
 198	u32 imm;
 199
 200	imm = (imm20_1 & 0x80000) |  ((imm20_1 & 0x3ff) << 9) |
 201	      ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11);
 202
 203	return (imm << 12) | (rd << 7) | opcode;
 204}
 205
 206static u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1,
 207		       u8 funct3, u8 rd, u8 opcode)
 208{
 209	u8 funct7 = (funct5 << 2) | (aq << 1) | rl;
 210
 211	return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode);
 212}
 213
 214static u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0)
 215{
 216	return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b);
 217}
 218
 219static u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0)
 220{
 221	return rv_i_insn(imm11_0, rs1, 0, rd, 0x13);
 222}
 223
 224static u32 rv_addw(u8 rd, u8 rs1, u8 rs2)
 225{
 226	return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b);
 227}
 228
 229static u32 rv_add(u8 rd, u8 rs1, u8 rs2)
 230{
 231	return rv_r_insn(0, rs2, rs1, 0, rd, 0x33);
 232}
 233
 234static u32 rv_subw(u8 rd, u8 rs1, u8 rs2)
 235{
 236	return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b);
 237}
 238
 239static u32 rv_sub(u8 rd, u8 rs1, u8 rs2)
 240{
 241	return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33);
 242}
 243
 244static u32 rv_and(u8 rd, u8 rs1, u8 rs2)
 245{
 246	return rv_r_insn(0, rs2, rs1, 7, rd, 0x33);
 247}
 248
 249static u32 rv_or(u8 rd, u8 rs1, u8 rs2)
 250{
 251	return rv_r_insn(0, rs2, rs1, 6, rd, 0x33);
 252}
 253
 254static u32 rv_xor(u8 rd, u8 rs1, u8 rs2)
 255{
 256	return rv_r_insn(0, rs2, rs1, 4, rd, 0x33);
 257}
 258
 259static u32 rv_mulw(u8 rd, u8 rs1, u8 rs2)
 260{
 261	return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b);
 262}
 263
 264static u32 rv_mul(u8 rd, u8 rs1, u8 rs2)
 265{
 266	return rv_r_insn(1, rs2, rs1, 0, rd, 0x33);
 267}
 268
 269static u32 rv_divuw(u8 rd, u8 rs1, u8 rs2)
 270{
 271	return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b);
 272}
 273
 274static u32 rv_divu(u8 rd, u8 rs1, u8 rs2)
 275{
 276	return rv_r_insn(1, rs2, rs1, 5, rd, 0x33);
 277}
 278
 279static u32 rv_remuw(u8 rd, u8 rs1, u8 rs2)
 280{
 281	return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b);
 282}
 283
 284static u32 rv_remu(u8 rd, u8 rs1, u8 rs2)
 285{
 286	return rv_r_insn(1, rs2, rs1, 7, rd, 0x33);
 287}
 288
 289static u32 rv_sllw(u8 rd, u8 rs1, u8 rs2)
 290{
 291	return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b);
 292}
 293
 294static u32 rv_sll(u8 rd, u8 rs1, u8 rs2)
 295{
 296	return rv_r_insn(0, rs2, rs1, 1, rd, 0x33);
 297}
 298
 299static u32 rv_srlw(u8 rd, u8 rs1, u8 rs2)
 300{
 301	return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b);
 302}
 303
 304static u32 rv_srl(u8 rd, u8 rs1, u8 rs2)
 305{
 306	return rv_r_insn(0, rs2, rs1, 5, rd, 0x33);
 307}
 308
 309static u32 rv_sraw(u8 rd, u8 rs1, u8 rs2)
 310{
 311	return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b);
 312}
 313
 314static u32 rv_sra(u8 rd, u8 rs1, u8 rs2)
 315{
 316	return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33);
 317}
 318
 319static u32 rv_lui(u8 rd, u32 imm31_12)
 320{
 321	return rv_u_insn(imm31_12, rd, 0x37);
 322}
 323
 324static u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0)
 325{
 326	return rv_i_insn(imm11_0, rs1, 1, rd, 0x13);
 327}
 328
 329static u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0)
 330{
 331	return rv_i_insn(imm11_0, rs1, 7, rd, 0x13);
 332}
 333
 334static u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0)
 335{
 336	return rv_i_insn(imm11_0, rs1, 6, rd, 0x13);
 337}
 338
 339static u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0)
 340{
 341	return rv_i_insn(imm11_0, rs1, 4, rd, 0x13);
 342}
 343
 344static u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0)
 345{
 346	return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b);
 347}
 348
 349static u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0)
 350{
 351	return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b);
 352}
 353
 354static u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0)
 355{
 356	return rv_i_insn(imm11_0, rs1, 5, rd, 0x13);
 357}
 358
 359static u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0)
 360{
 361	return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b);
 362}
 363
 364static u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0)
 365{
 366	return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13);
 367}
 368
 369static u32 rv_jal(u8 rd, u32 imm20_1)
 370{
 371	return rv_uj_insn(imm20_1, rd, 0x6f);
 372}
 373
 374static u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0)
 375{
 376	return rv_i_insn(imm11_0, rs1, 0, rd, 0x67);
 377}
 378
 379static u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1)
 380{
 381	return rv_sb_insn(imm12_1, rs2, rs1, 0, 0x63);
 382}
 383
 384static u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1)
 385{
 386	return rv_sb_insn(imm12_1, rs2, rs1, 6, 0x63);
 387}
 388
 389static u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1)
 390{
 391	return rv_sb_insn(imm12_1, rs2, rs1, 7, 0x63);
 392}
 393
 394static u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1)
 395{
 396	return rv_sb_insn(imm12_1, rs2, rs1, 1, 0x63);
 397}
 398
 399static u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1)
 400{
 401	return rv_sb_insn(imm12_1, rs2, rs1, 4, 0x63);
 402}
 403
 404static u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1)
 405{
 406	return rv_sb_insn(imm12_1, rs2, rs1, 5, 0x63);
 407}
 408
 409static u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2)
 410{
 411	return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23);
 412}
 413
 414static u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2)
 415{
 416	return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23);
 417}
 418
 419static u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2)
 420{
 421	return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23);
 422}
 423
 424static u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2)
 425{
 426	return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23);
 427}
 428
 429static u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1)
 430{
 431	return rv_i_insn(imm11_0, rs1, 4, rd, 0x03);
 432}
 433
 434static u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1)
 435{
 436	return rv_i_insn(imm11_0, rs1, 5, rd, 0x03);
 437}
 438
 439static u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1)
 440{
 441	return rv_i_insn(imm11_0, rs1, 6, rd, 0x03);
 442}
 443
 444static u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1)
 445{
 446	return rv_i_insn(imm11_0, rs1, 3, rd, 0x03);
 447}
 448
 449static u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
 450{
 451	return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f);
 452}
 453
 454static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
 455{
 456	return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
 457}
 458
 459static bool is_12b_int(s64 val)
 460{
 461	return -(1 << 11) <= val && val < (1 << 11);
 462}
 463
 464static bool is_13b_int(s64 val)
 465{
 466	return -(1 << 12) <= val && val < (1 << 12);
 467}
 468
 469static bool is_21b_int(s64 val)
 470{
 471	return -(1L << 20) <= val && val < (1L << 20);
 472}
 473
 474static bool is_32b_int(s64 val)
 475{
 476	return -(1L << 31) <= val && val < (1L << 31);
 477}
 478
 479static int is_12b_check(int off, int insn)
 480{
 481	if (!is_12b_int(off)) {
 482		pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
 483		       insn, (int)off);
 484		return -1;
 485	}
 486	return 0;
 487}
 488
 489static int is_13b_check(int off, int insn)
 490{
 491	if (!is_13b_int(off)) {
 492		pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
 493		       insn, (int)off);
 494		return -1;
 495	}
 496	return 0;
 497}
 498
 499static int is_21b_check(int off, int insn)
 500{
 501	if (!is_21b_int(off)) {
 502		pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
 503		       insn, (int)off);
 504		return -1;
 505	}
 506	return 0;
 507}
 508
 509static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
 510{
 511	/* Note that the immediate from the add is sign-extended,
 512	 * which means that we need to compensate this by adding 2^12,
 513	 * when the 12th bit is set. A simpler way of doing this, and
 514	 * getting rid of the check, is to just add 2**11 before the
 515	 * shift. The "Loading a 32-Bit constant" example from the
 516	 * "Computer Organization and Design, RISC-V edition" book by
 517	 * Patterson/Hennessy highlights this fact.
 518	 *
 519	 * This also means that we need to process LSB to MSB.
 520	 */
 521	s64 upper = (val + (1 << 11)) >> 12, lower = val & 0xfff;
 522	int shift;
 523
 524	if (is_32b_int(val)) {
 525		if (upper)
 526			emit(rv_lui(rd, upper), ctx);
 527
 528		if (!upper) {
 529			emit(rv_addi(rd, RV_REG_ZERO, lower), ctx);
 530			return;
 531		}
 532
 533		emit(rv_addiw(rd, rd, lower), ctx);
 534		return;
 535	}
 536
 537	shift = __ffs(upper);
 538	upper >>= shift;
 539	shift += 12;
 540
 541	emit_imm(rd, upper, ctx);
 542
 543	emit(rv_slli(rd, rd, shift), ctx);
 544	if (lower)
 545		emit(rv_addi(rd, rd, lower), ctx);
 546}
 547
 548static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx)
 549{
 550	int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to];
 551
 552	return (to - from) << 2;
 553}
 554
 555static int epilogue_offset(struct rv_jit_context *ctx)
 556{
 557	int to = ctx->epilogue_offset, from = ctx->ninsns;
 558
 559	return (to - from) << 2;
 560}
 561
 562static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
 563{
 564	int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
 565
 566	if (seen_reg(RV_REG_RA, ctx)) {
 567		emit(rv_ld(RV_REG_RA, store_offset, RV_REG_SP), ctx);
 568		store_offset -= 8;
 569	}
 570	emit(rv_ld(RV_REG_FP, store_offset, RV_REG_SP), ctx);
 571	store_offset -= 8;
 572	if (seen_reg(RV_REG_S1, ctx)) {
 573		emit(rv_ld(RV_REG_S1, store_offset, RV_REG_SP), ctx);
 574		store_offset -= 8;
 575	}
 576	if (seen_reg(RV_REG_S2, ctx)) {
 577		emit(rv_ld(RV_REG_S2, store_offset, RV_REG_SP), ctx);
 578		store_offset -= 8;
 579	}
 580	if (seen_reg(RV_REG_S3, ctx)) {
 581		emit(rv_ld(RV_REG_S3, store_offset, RV_REG_SP), ctx);
 582		store_offset -= 8;
 583	}
 584	if (seen_reg(RV_REG_S4, ctx)) {
 585		emit(rv_ld(RV_REG_S4, store_offset, RV_REG_SP), ctx);
 586		store_offset -= 8;
 587	}
 588	if (seen_reg(RV_REG_S5, ctx)) {
 589		emit(rv_ld(RV_REG_S5, store_offset, RV_REG_SP), ctx);
 590		store_offset -= 8;
 591	}
 592	if (seen_reg(RV_REG_S6, ctx)) {
 593		emit(rv_ld(RV_REG_S6, store_offset, RV_REG_SP), ctx);
 594		store_offset -= 8;
 595	}
 596
 597	emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
 598	/* Set return value. */
 599	emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
 600	emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx);
 601}
 602
 603static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
 604{
 605	emit(rv_slli(reg, reg, 32), ctx);
 606	emit(rv_srli(reg, reg, 32), ctx);
 607}
 608
 609static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 610{
 611	int tc_ninsn, off, start_insn = ctx->ninsns;
 612	u8 tcc = rv_tail_call_reg(ctx);
 613
 614	/* a0: &ctx
 615	 * a1: &array
 616	 * a2: index
 617	 *
 618	 * if (index >= array->map.max_entries)
 619	 *	goto out;
 620	 */
 621	tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] :
 622		   ctx->offset[0];
 623	emit_zext_32(RV_REG_A2, ctx);
 624
 625	off = offsetof(struct bpf_array, map.max_entries);
 626	if (is_12b_check(off, insn))
 627		return -1;
 628	emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
 629	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
 630	if (is_13b_check(off, insn))
 631		return -1;
 632	emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx);
 633
 634	/* if (--TCC < 0)
 635	 *     goto out;
 636	 */
 637	emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
 638	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
 639	if (is_13b_check(off, insn))
 640		return -1;
 641	emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx);
 642
 643	/* prog = array->ptrs[index];
 644	 * if (!prog)
 645	 *     goto out;
 646	 */
 647	emit(rv_slli(RV_REG_T2, RV_REG_A2, 3), ctx);
 648	emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_A1), ctx);
 649	off = offsetof(struct bpf_array, ptrs);
 650	if (is_12b_check(off, insn))
 651		return -1;
 652	emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
 653	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
 654	if (is_13b_check(off, insn))
 655		return -1;
 656	emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx);
 657
 658	/* goto *(prog->bpf_func + 4); */
 659	off = offsetof(struct bpf_prog, bpf_func);
 660	if (is_12b_check(off, insn))
 661		return -1;
 662	emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
 663	emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx);
 664	emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
 665	__build_epilogue(RV_REG_T3, ctx);
 666	return 0;
 667}
 668
 669static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
 670		      struct rv_jit_context *ctx)
 671{
 672	u8 code = insn->code;
 673
 674	switch (code) {
 675	case BPF_JMP | BPF_JA:
 676	case BPF_JMP | BPF_CALL:
 677	case BPF_JMP | BPF_EXIT:
 678	case BPF_JMP | BPF_TAIL_CALL:
 679		break;
 680	default:
 681		*rd = bpf_to_rv_reg(insn->dst_reg, ctx);
 682	}
 683
 684	if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) ||
 685	    code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) ||
 686	    code & BPF_LDX || code & BPF_STX)
 687		*rs = bpf_to_rv_reg(insn->src_reg, ctx);
 688}
 689
 690static int rv_offset_check(int *rvoff, s16 off, int insn,
 691			   struct rv_jit_context *ctx)
 692{
 693	*rvoff = rv_offset(insn + off, insn, ctx);
 694	return is_13b_check(*rvoff, insn);
 695}
 696
 697static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
 698{
 699	emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
 700	emit_zext_32(RV_REG_T2, ctx);
 701	emit(rv_addi(RV_REG_T1, *rs, 0), ctx);
 702	emit_zext_32(RV_REG_T1, ctx);
 703	*rd = RV_REG_T2;
 704	*rs = RV_REG_T1;
 705}
 706
 707static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
 708{
 709	emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
 710	emit(rv_addiw(RV_REG_T1, *rs, 0), ctx);
 711	*rd = RV_REG_T2;
 712	*rs = RV_REG_T1;
 713}
 714
 715static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
 716{
 717	emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
 718	emit_zext_32(RV_REG_T2, ctx);
 719	emit_zext_32(RV_REG_T1, ctx);
 720	*rd = RV_REG_T2;
 721}
 722
 723static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
 724{
 725	emit(rv_addiw(RV_REG_T2, *rd, 0), ctx);
 726	*rd = RV_REG_T2;
 727}
 728
 729static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 730		     bool extra_pass)
 731{
 732	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
 733		    BPF_CLASS(insn->code) == BPF_JMP;
 734	struct bpf_prog_aux *aux = ctx->prog->aux;
 735	int rvoff, i = insn - ctx->prog->insnsi;
 736	u8 rd = -1, rs = -1, code = insn->code;
 737	s16 off = insn->off;
 738	s32 imm = insn->imm;
 739
 740	init_regs(&rd, &rs, insn, ctx);
 741
 742	switch (code) {
 743	/* dst = src */
 744	case BPF_ALU | BPF_MOV | BPF_X:
 745	case BPF_ALU64 | BPF_MOV | BPF_X:
 746		if (imm == 1) {
 747			/* Special mov32 for zext */
 748			emit_zext_32(rd, ctx);
 749			break;
 750		}
 751		emit(is64 ? rv_addi(rd, rs, 0) : rv_addiw(rd, rs, 0), ctx);
 752		if (!is64 && !aux->verifier_zext)
 753			emit_zext_32(rd, ctx);
 754		break;
 755
 756	/* dst = dst OP src */
 757	case BPF_ALU | BPF_ADD | BPF_X:
 758	case BPF_ALU64 | BPF_ADD | BPF_X:
 759		emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx);
 760		if (!is64 && !aux->verifier_zext)
 761			emit_zext_32(rd, ctx);
 762		break;
 763	case BPF_ALU | BPF_SUB | BPF_X:
 764	case BPF_ALU64 | BPF_SUB | BPF_X:
 765		emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx);
 766		if (!is64 && !aux->verifier_zext)
 767			emit_zext_32(rd, ctx);
 768		break;
 769	case BPF_ALU | BPF_AND | BPF_X:
 770	case BPF_ALU64 | BPF_AND | BPF_X:
 771		emit(rv_and(rd, rd, rs), ctx);
 772		if (!is64 && !aux->verifier_zext)
 773			emit_zext_32(rd, ctx);
 774		break;
 775	case BPF_ALU | BPF_OR | BPF_X:
 776	case BPF_ALU64 | BPF_OR | BPF_X:
 777		emit(rv_or(rd, rd, rs), ctx);
 778		if (!is64 && !aux->verifier_zext)
 779			emit_zext_32(rd, ctx);
 780		break;
 781	case BPF_ALU | BPF_XOR | BPF_X:
 782	case BPF_ALU64 | BPF_XOR | BPF_X:
 783		emit(rv_xor(rd, rd, rs), ctx);
 784		if (!is64 && !aux->verifier_zext)
 785			emit_zext_32(rd, ctx);
 786		break;
 787	case BPF_ALU | BPF_MUL | BPF_X:
 788	case BPF_ALU64 | BPF_MUL | BPF_X:
 789		emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
 790		if (!is64 && !aux->verifier_zext)
 791			emit_zext_32(rd, ctx);
 792		break;
 793	case BPF_ALU | BPF_DIV | BPF_X:
 794	case BPF_ALU64 | BPF_DIV | BPF_X:
 795		emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
 796		if (!is64 && !aux->verifier_zext)
 797			emit_zext_32(rd, ctx);
 798		break;
 799	case BPF_ALU | BPF_MOD | BPF_X:
 800	case BPF_ALU64 | BPF_MOD | BPF_X:
 801		emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
 802		if (!is64 && !aux->verifier_zext)
 803			emit_zext_32(rd, ctx);
 804		break;
 805	case BPF_ALU | BPF_LSH | BPF_X:
 806	case BPF_ALU64 | BPF_LSH | BPF_X:
 807		emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
 808		if (!is64)
 809			emit_zext_32(rd, ctx);
 810		break;
 811	case BPF_ALU | BPF_RSH | BPF_X:
 812	case BPF_ALU64 | BPF_RSH | BPF_X:
 813		emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
 814		if (!is64 && !aux->verifier_zext)
 815			emit_zext_32(rd, ctx);
 816		break;
 817	case BPF_ALU | BPF_ARSH | BPF_X:
 818	case BPF_ALU64 | BPF_ARSH | BPF_X:
 819		emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
 820		if (!is64 && !aux->verifier_zext)
 821			emit_zext_32(rd, ctx);
 822		break;
 823
 824	/* dst = -dst */
 825	case BPF_ALU | BPF_NEG:
 826	case BPF_ALU64 | BPF_NEG:
 827		emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) :
 828		     rv_subw(rd, RV_REG_ZERO, rd), ctx);
 829		if (!is64 && !aux->verifier_zext)
 830			emit_zext_32(rd, ctx);
 831		break;
 832
 833	/* dst = BSWAP##imm(dst) */
 834	case BPF_ALU | BPF_END | BPF_FROM_LE:
 835	{
 836		int shift = 64 - imm;
 837
 838		emit(rv_slli(rd, rd, shift), ctx);
 839		emit(rv_srli(rd, rd, shift), ctx);
 840		break;
 841	}
 842	case BPF_ALU | BPF_END | BPF_FROM_BE:
 843		emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
 844
 845		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 846		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 847		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 848		emit(rv_srli(rd, rd, 8), ctx);
 849		if (imm == 16)
 850			goto out_be;
 851
 852		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 853		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 854		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 855		emit(rv_srli(rd, rd, 8), ctx);
 856
 857		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 858		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 859		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 860		emit(rv_srli(rd, rd, 8), ctx);
 861		if (imm == 32)
 862			goto out_be;
 863
 864		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 865		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 866		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 867		emit(rv_srli(rd, rd, 8), ctx);
 868
 869		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 870		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 871		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 872		emit(rv_srli(rd, rd, 8), ctx);
 873
 874		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 875		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 876		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 877		emit(rv_srli(rd, rd, 8), ctx);
 878
 879		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 880		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 881		emit(rv_slli(RV_REG_T2, RV_REG_T2, 8), ctx);
 882		emit(rv_srli(rd, rd, 8), ctx);
 883out_be:
 884		emit(rv_andi(RV_REG_T1, rd, 0xff), ctx);
 885		emit(rv_add(RV_REG_T2, RV_REG_T2, RV_REG_T1), ctx);
 886
 887		emit(rv_addi(rd, RV_REG_T2, 0), ctx);
 888		break;
 889
 890	/* dst = imm */
 891	case BPF_ALU | BPF_MOV | BPF_K:
 892	case BPF_ALU64 | BPF_MOV | BPF_K:
 893		emit_imm(rd, imm, ctx);
 894		if (!is64 && !aux->verifier_zext)
 895			emit_zext_32(rd, ctx);
 896		break;
 897
 898	/* dst = dst OP imm */
 899	case BPF_ALU | BPF_ADD | BPF_K:
 900	case BPF_ALU64 | BPF_ADD | BPF_K:
 901		if (is_12b_int(imm)) {
 902			emit(is64 ? rv_addi(rd, rd, imm) :
 903			     rv_addiw(rd, rd, imm), ctx);
 904		} else {
 905			emit_imm(RV_REG_T1, imm, ctx);
 906			emit(is64 ? rv_add(rd, rd, RV_REG_T1) :
 907			     rv_addw(rd, rd, RV_REG_T1), ctx);
 908		}
 909		if (!is64 && !aux->verifier_zext)
 910			emit_zext_32(rd, ctx);
 911		break;
 912	case BPF_ALU | BPF_SUB | BPF_K:
 913	case BPF_ALU64 | BPF_SUB | BPF_K:
 914		if (is_12b_int(-imm)) {
 915			emit(is64 ? rv_addi(rd, rd, -imm) :
 916			     rv_addiw(rd, rd, -imm), ctx);
 917		} else {
 918			emit_imm(RV_REG_T1, imm, ctx);
 919			emit(is64 ? rv_sub(rd, rd, RV_REG_T1) :
 920			     rv_subw(rd, rd, RV_REG_T1), ctx);
 921		}
 922		if (!is64 && !aux->verifier_zext)
 923			emit_zext_32(rd, ctx);
 924		break;
 925	case BPF_ALU | BPF_AND | BPF_K:
 926	case BPF_ALU64 | BPF_AND | BPF_K:
 927		if (is_12b_int(imm)) {
 928			emit(rv_andi(rd, rd, imm), ctx);
 929		} else {
 930			emit_imm(RV_REG_T1, imm, ctx);
 931			emit(rv_and(rd, rd, RV_REG_T1), ctx);
 932		}
 933		if (!is64 && !aux->verifier_zext)
 934			emit_zext_32(rd, ctx);
 935		break;
 936	case BPF_ALU | BPF_OR | BPF_K:
 937	case BPF_ALU64 | BPF_OR | BPF_K:
 938		if (is_12b_int(imm)) {
 939			emit(rv_ori(rd, rd, imm), ctx);
 940		} else {
 941			emit_imm(RV_REG_T1, imm, ctx);
 942			emit(rv_or(rd, rd, RV_REG_T1), ctx);
 943		}
 944		if (!is64 && !aux->verifier_zext)
 945			emit_zext_32(rd, ctx);
 946		break;
 947	case BPF_ALU | BPF_XOR | BPF_K:
 948	case BPF_ALU64 | BPF_XOR | BPF_K:
 949		if (is_12b_int(imm)) {
 950			emit(rv_xori(rd, rd, imm), ctx);
 951		} else {
 952			emit_imm(RV_REG_T1, imm, ctx);
 953			emit(rv_xor(rd, rd, RV_REG_T1), ctx);
 954		}
 955		if (!is64 && !aux->verifier_zext)
 956			emit_zext_32(rd, ctx);
 957		break;
 958	case BPF_ALU | BPF_MUL | BPF_K:
 959	case BPF_ALU64 | BPF_MUL | BPF_K:
 960		emit_imm(RV_REG_T1, imm, ctx);
 961		emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
 962		     rv_mulw(rd, rd, RV_REG_T1), ctx);
 963		if (!is64 && !aux->verifier_zext)
 964			emit_zext_32(rd, ctx);
 965		break;
 966	case BPF_ALU | BPF_DIV | BPF_K:
 967	case BPF_ALU64 | BPF_DIV | BPF_K:
 968		emit_imm(RV_REG_T1, imm, ctx);
 969		emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
 970		     rv_divuw(rd, rd, RV_REG_T1), ctx);
 971		if (!is64 && !aux->verifier_zext)
 972			emit_zext_32(rd, ctx);
 973		break;
 974	case BPF_ALU | BPF_MOD | BPF_K:
 975	case BPF_ALU64 | BPF_MOD | BPF_K:
 976		emit_imm(RV_REG_T1, imm, ctx);
 977		emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
 978		     rv_remuw(rd, rd, RV_REG_T1), ctx);
 979		if (!is64 && !aux->verifier_zext)
 980			emit_zext_32(rd, ctx);
 981		break;
 982	case BPF_ALU | BPF_LSH | BPF_K:
 983	case BPF_ALU64 | BPF_LSH | BPF_K:
 984		emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
 985		if (!is64)
 986			emit_zext_32(rd, ctx);
 987		break;
 988	case BPF_ALU | BPF_RSH | BPF_K:
 989	case BPF_ALU64 | BPF_RSH | BPF_K:
 990		emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
 991		if (!is64)
 992			emit_zext_32(rd, ctx);
 993		break;
 994	case BPF_ALU | BPF_ARSH | BPF_K:
 995	case BPF_ALU64 | BPF_ARSH | BPF_K:
 996		emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
 997		if (!is64)
 998			emit_zext_32(rd, ctx);
 999		break;
1000
1001	/* JUMP off */
1002	case BPF_JMP | BPF_JA:
1003		rvoff = rv_offset(i + off, i, ctx);
1004		if (!is_21b_int(rvoff)) {
1005			pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
1006			       i, rvoff);
1007			return -1;
1008		}
1009
1010		emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
1011		break;
1012
1013	/* IF (dst COND src) JUMP off */
1014	case BPF_JMP | BPF_JEQ | BPF_X:
1015	case BPF_JMP32 | BPF_JEQ | BPF_X:
1016		if (rv_offset_check(&rvoff, off, i, ctx))
1017			return -1;
1018		if (!is64)
1019			emit_zext_32_rd_rs(&rd, &rs, ctx);
1020		emit(rv_beq(rd, rs, rvoff >> 1), ctx);
1021		break;
1022	case BPF_JMP | BPF_JGT | BPF_X:
1023	case BPF_JMP32 | BPF_JGT | BPF_X:
1024		if (rv_offset_check(&rvoff, off, i, ctx))
1025			return -1;
1026		if (!is64)
1027			emit_zext_32_rd_rs(&rd, &rs, ctx);
1028		emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
1029		break;
1030	case BPF_JMP | BPF_JLT | BPF_X:
1031	case BPF_JMP32 | BPF_JLT | BPF_X:
1032		if (rv_offset_check(&rvoff, off, i, ctx))
1033			return -1;
1034		if (!is64)
1035			emit_zext_32_rd_rs(&rd, &rs, ctx);
1036		emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
1037		break;
1038	case BPF_JMP | BPF_JGE | BPF_X:
1039	case BPF_JMP32 | BPF_JGE | BPF_X:
1040		if (rv_offset_check(&rvoff, off, i, ctx))
1041			return -1;
1042		if (!is64)
1043			emit_zext_32_rd_rs(&rd, &rs, ctx);
1044		emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
1045		break;
1046	case BPF_JMP | BPF_JLE | BPF_X:
1047	case BPF_JMP32 | BPF_JLE | BPF_X:
1048		if (rv_offset_check(&rvoff, off, i, ctx))
1049			return -1;
1050		if (!is64)
1051			emit_zext_32_rd_rs(&rd, &rs, ctx);
1052		emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
1053		break;
1054	case BPF_JMP | BPF_JNE | BPF_X:
1055	case BPF_JMP32 | BPF_JNE | BPF_X:
1056		if (rv_offset_check(&rvoff, off, i, ctx))
1057			return -1;
1058		if (!is64)
1059			emit_zext_32_rd_rs(&rd, &rs, ctx);
1060		emit(rv_bne(rd, rs, rvoff >> 1), ctx);
1061		break;
1062	case BPF_JMP | BPF_JSGT | BPF_X:
1063	case BPF_JMP32 | BPF_JSGT | BPF_X:
1064		if (rv_offset_check(&rvoff, off, i, ctx))
1065			return -1;
1066		if (!is64)
1067			emit_sext_32_rd_rs(&rd, &rs, ctx);
1068		emit(rv_blt(rs, rd, rvoff >> 1), ctx);
1069		break;
1070	case BPF_JMP | BPF_JSLT | BPF_X:
1071	case BPF_JMP32 | BPF_JSLT | BPF_X:
1072		if (rv_offset_check(&rvoff, off, i, ctx))
1073			return -1;
1074		if (!is64)
1075			emit_sext_32_rd_rs(&rd, &rs, ctx);
1076		emit(rv_blt(rd, rs, rvoff >> 1), ctx);
1077		break;
1078	case BPF_JMP | BPF_JSGE | BPF_X:
1079	case BPF_JMP32 | BPF_JSGE | BPF_X:
1080		if (rv_offset_check(&rvoff, off, i, ctx))
1081			return -1;
1082		if (!is64)
1083			emit_sext_32_rd_rs(&rd, &rs, ctx);
1084		emit(rv_bge(rd, rs, rvoff >> 1), ctx);
1085		break;
1086	case BPF_JMP | BPF_JSLE | BPF_X:
1087	case BPF_JMP32 | BPF_JSLE | BPF_X:
1088		if (rv_offset_check(&rvoff, off, i, ctx))
1089			return -1;
1090		if (!is64)
1091			emit_sext_32_rd_rs(&rd, &rs, ctx);
1092		emit(rv_bge(rs, rd, rvoff >> 1), ctx);
1093		break;
1094	case BPF_JMP | BPF_JSET | BPF_X:
1095	case BPF_JMP32 | BPF_JSET | BPF_X:
1096		if (rv_offset_check(&rvoff, off, i, ctx))
1097			return -1;
1098		if (!is64)
1099			emit_zext_32_rd_rs(&rd, &rs, ctx);
1100		emit(rv_and(RV_REG_T1, rd, rs), ctx);
1101		emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
1102		break;
1103
1104	/* IF (dst COND imm) JUMP off */
1105	case BPF_JMP | BPF_JEQ | BPF_K:
1106	case BPF_JMP32 | BPF_JEQ | BPF_K:
1107		if (rv_offset_check(&rvoff, off, i, ctx))
1108			return -1;
1109		emit_imm(RV_REG_T1, imm, ctx);
1110		if (!is64)
1111			emit_zext_32_rd_t1(&rd, ctx);
1112		emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx);
1113		break;
1114	case BPF_JMP | BPF_JGT | BPF_K:
1115	case BPF_JMP32 | BPF_JGT | BPF_K:
1116		if (rv_offset_check(&rvoff, off, i, ctx))
1117			return -1;
1118		emit_imm(RV_REG_T1, imm, ctx);
1119		if (!is64)
1120			emit_zext_32_rd_t1(&rd, ctx);
1121		emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx);
1122		break;
1123	case BPF_JMP | BPF_JLT | BPF_K:
1124	case BPF_JMP32 | BPF_JLT | BPF_K:
1125		if (rv_offset_check(&rvoff, off, i, ctx))
1126			return -1;
1127		emit_imm(RV_REG_T1, imm, ctx);
1128		if (!is64)
1129			emit_zext_32_rd_t1(&rd, ctx);
1130		emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx);
1131		break;
1132	case BPF_JMP | BPF_JGE | BPF_K:
1133	case BPF_JMP32 | BPF_JGE | BPF_K:
1134		if (rv_offset_check(&rvoff, off, i, ctx))
1135			return -1;
1136		emit_imm(RV_REG_T1, imm, ctx);
1137		if (!is64)
1138			emit_zext_32_rd_t1(&rd, ctx);
1139		emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx);
1140		break;
1141	case BPF_JMP | BPF_JLE | BPF_K:
1142	case BPF_JMP32 | BPF_JLE | BPF_K:
1143		if (rv_offset_check(&rvoff, off, i, ctx))
1144			return -1;
1145		emit_imm(RV_REG_T1, imm, ctx);
1146		if (!is64)
1147			emit_zext_32_rd_t1(&rd, ctx);
1148		emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx);
1149		break;
1150	case BPF_JMP | BPF_JNE | BPF_K:
1151	case BPF_JMP32 | BPF_JNE | BPF_K:
1152		if (rv_offset_check(&rvoff, off, i, ctx))
1153			return -1;
1154		emit_imm(RV_REG_T1, imm, ctx);
1155		if (!is64)
1156			emit_zext_32_rd_t1(&rd, ctx);
1157		emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx);
1158		break;
1159	case BPF_JMP | BPF_JSGT | BPF_K:
1160	case BPF_JMP32 | BPF_JSGT | BPF_K:
1161		if (rv_offset_check(&rvoff, off, i, ctx))
1162			return -1;
1163		emit_imm(RV_REG_T1, imm, ctx);
1164		if (!is64)
1165			emit_sext_32_rd(&rd, ctx);
1166		emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx);
1167		break;
1168	case BPF_JMP | BPF_JSLT | BPF_K:
1169	case BPF_JMP32 | BPF_JSLT | BPF_K:
1170		if (rv_offset_check(&rvoff, off, i, ctx))
1171			return -1;
1172		emit_imm(RV_REG_T1, imm, ctx);
1173		if (!is64)
1174			emit_sext_32_rd(&rd, ctx);
1175		emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx);
1176		break;
1177	case BPF_JMP | BPF_JSGE | BPF_K:
1178	case BPF_JMP32 | BPF_JSGE | BPF_K:
1179		if (rv_offset_check(&rvoff, off, i, ctx))
1180			return -1;
1181		emit_imm(RV_REG_T1, imm, ctx);
1182		if (!is64)
1183			emit_sext_32_rd(&rd, ctx);
1184		emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx);
1185		break;
1186	case BPF_JMP | BPF_JSLE | BPF_K:
1187	case BPF_JMP32 | BPF_JSLE | BPF_K:
1188		if (rv_offset_check(&rvoff, off, i, ctx))
1189			return -1;
1190		emit_imm(RV_REG_T1, imm, ctx);
1191		if (!is64)
1192			emit_sext_32_rd(&rd, ctx);
1193		emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx);
1194		break;
1195	case BPF_JMP | BPF_JSET | BPF_K:
1196	case BPF_JMP32 | BPF_JSET | BPF_K:
1197		if (rv_offset_check(&rvoff, off, i, ctx))
1198			return -1;
1199		emit_imm(RV_REG_T1, imm, ctx);
1200		if (!is64)
1201			emit_zext_32_rd_t1(&rd, ctx);
1202		emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
1203		emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
1204		break;
1205
1206	/* function call */
1207	case BPF_JMP | BPF_CALL:
1208	{
1209		bool fixed;
1210		int i, ret;
1211		u64 addr;
1212
1213		mark_call(ctx);
1214		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
1215					    &fixed);
1216		if (ret < 0)
1217			return ret;
1218		if (fixed) {
1219			emit_imm(RV_REG_T1, addr, ctx);
1220		} else {
1221			i = ctx->ninsns;
1222			emit_imm(RV_REG_T1, addr, ctx);
1223			for (i = ctx->ninsns - i; i < 8; i++) {
1224				/* nop */
1225				emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0),
1226				     ctx);
1227			}
1228		}
1229		emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx);
1230		rd = bpf_to_rv_reg(BPF_REG_0, ctx);
1231		emit(rv_addi(rd, RV_REG_A0, 0), ctx);
1232		break;
1233	}
1234	/* tail call */
1235	case BPF_JMP | BPF_TAIL_CALL:
1236		if (emit_bpf_tail_call(i, ctx))
1237			return -1;
1238		break;
1239
1240	/* function return */
1241	case BPF_JMP | BPF_EXIT:
1242		if (i == ctx->prog->len - 1)
1243			break;
1244
1245		rvoff = epilogue_offset(ctx);
1246		if (is_21b_check(rvoff, i))
1247			return -1;
1248		emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
1249		break;
1250
1251	/* dst = imm64 */
1252	case BPF_LD | BPF_IMM | BPF_DW:
1253	{
1254		struct bpf_insn insn1 = insn[1];
1255		u64 imm64;
1256
1257		imm64 = (u64)insn1.imm << 32 | (u32)imm;
1258		emit_imm(rd, imm64, ctx);
1259		return 1;
1260	}
1261
1262	/* LDX: dst = *(size *)(src + off) */
1263	case BPF_LDX | BPF_MEM | BPF_B:
1264		if (is_12b_int(off)) {
1265			emit(rv_lbu(rd, off, rs), ctx);
1266			break;
1267		}
1268
1269		emit_imm(RV_REG_T1, off, ctx);
1270		emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1271		emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
1272		if (insn_is_zext(&insn[1]))
1273			return 1;
1274		break;
1275	case BPF_LDX | BPF_MEM | BPF_H:
1276		if (is_12b_int(off)) {
1277			emit(rv_lhu(rd, off, rs), ctx);
1278			break;
1279		}
1280
1281		emit_imm(RV_REG_T1, off, ctx);
1282		emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1283		emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
1284		if (insn_is_zext(&insn[1]))
1285			return 1;
1286		break;
1287	case BPF_LDX | BPF_MEM | BPF_W:
1288		if (is_12b_int(off)) {
1289			emit(rv_lwu(rd, off, rs), ctx);
1290			break;
1291		}
1292
1293		emit_imm(RV_REG_T1, off, ctx);
1294		emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1295		emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
1296		if (insn_is_zext(&insn[1]))
1297			return 1;
1298		break;
1299	case BPF_LDX | BPF_MEM | BPF_DW:
1300		if (is_12b_int(off)) {
1301			emit(rv_ld(rd, off, rs), ctx);
1302			break;
1303		}
1304
1305		emit_imm(RV_REG_T1, off, ctx);
1306		emit(rv_add(RV_REG_T1, RV_REG_T1, rs), ctx);
1307		emit(rv_ld(rd, 0, RV_REG_T1), ctx);
1308		break;
1309
1310	/* ST: *(size *)(dst + off) = imm */
1311	case BPF_ST | BPF_MEM | BPF_B:
1312		emit_imm(RV_REG_T1, imm, ctx);
1313		if (is_12b_int(off)) {
1314			emit(rv_sb(rd, off, RV_REG_T1), ctx);
1315			break;
1316		}
1317
1318		emit_imm(RV_REG_T2, off, ctx);
1319		emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
1320		emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
1321		break;
1322
1323	case BPF_ST | BPF_MEM | BPF_H:
1324		emit_imm(RV_REG_T1, imm, ctx);
1325		if (is_12b_int(off)) {
1326			emit(rv_sh(rd, off, RV_REG_T1), ctx);
1327			break;
1328		}
1329
1330		emit_imm(RV_REG_T2, off, ctx);
1331		emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
1332		emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
1333		break;
1334	case BPF_ST | BPF_MEM | BPF_W:
1335		emit_imm(RV_REG_T1, imm, ctx);
1336		if (is_12b_int(off)) {
1337			emit(rv_sw(rd, off, RV_REG_T1), ctx);
1338			break;
1339		}
1340
1341		emit_imm(RV_REG_T2, off, ctx);
1342		emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
1343		emit(rv_sw(RV_REG_T2, 0, RV_REG_T1), ctx);
1344		break;
1345	case BPF_ST | BPF_MEM | BPF_DW:
1346		emit_imm(RV_REG_T1, imm, ctx);
1347		if (is_12b_int(off)) {
1348			emit(rv_sd(rd, off, RV_REG_T1), ctx);
1349			break;
1350		}
1351
1352		emit_imm(RV_REG_T2, off, ctx);
1353		emit(rv_add(RV_REG_T2, RV_REG_T2, rd), ctx);
1354		emit(rv_sd(RV_REG_T2, 0, RV_REG_T1), ctx);
1355		break;
1356
1357	/* STX: *(size *)(dst + off) = src */
1358	case BPF_STX | BPF_MEM | BPF_B:
1359		if (is_12b_int(off)) {
1360			emit(rv_sb(rd, off, rs), ctx);
1361			break;
1362		}
1363
1364		emit_imm(RV_REG_T1, off, ctx);
1365		emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
1366		emit(rv_sb(RV_REG_T1, 0, rs), ctx);
1367		break;
1368	case BPF_STX | BPF_MEM | BPF_H:
1369		if (is_12b_int(off)) {
1370			emit(rv_sh(rd, off, rs), ctx);
1371			break;
1372		}
1373
1374		emit_imm(RV_REG_T1, off, ctx);
1375		emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
1376		emit(rv_sh(RV_REG_T1, 0, rs), ctx);
1377		break;
1378	case BPF_STX | BPF_MEM | BPF_W:
1379		if (is_12b_int(off)) {
1380			emit(rv_sw(rd, off, rs), ctx);
1381			break;
1382		}
1383
1384		emit_imm(RV_REG_T1, off, ctx);
1385		emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
1386		emit(rv_sw(RV_REG_T1, 0, rs), ctx);
1387		break;
1388	case BPF_STX | BPF_MEM | BPF_DW:
1389		if (is_12b_int(off)) {
1390			emit(rv_sd(rd, off, rs), ctx);
1391			break;
1392		}
1393
1394		emit_imm(RV_REG_T1, off, ctx);
1395		emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
1396		emit(rv_sd(RV_REG_T1, 0, rs), ctx);
1397		break;
1398	/* STX XADD: lock *(u32 *)(dst + off) += src */
1399	case BPF_STX | BPF_XADD | BPF_W:
1400	/* STX XADD: lock *(u64 *)(dst + off) += src */
1401	case BPF_STX | BPF_XADD | BPF_DW:
1402		if (off) {
1403			if (is_12b_int(off)) {
1404				emit(rv_addi(RV_REG_T1, rd, off), ctx);
1405			} else {
1406				emit_imm(RV_REG_T1, off, ctx);
1407				emit(rv_add(RV_REG_T1, RV_REG_T1, rd), ctx);
1408			}
1409
1410			rd = RV_REG_T1;
1411		}
1412
1413		emit(BPF_SIZE(code) == BPF_W ?
1414		     rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) :
1415		     rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx);
1416		break;
1417	default:
1418		pr_err("bpf-jit: unknown opcode %02x\n", code);
1419		return -EINVAL;
1420	}
1421
1422	return 0;
1423}
1424
1425static void build_prologue(struct rv_jit_context *ctx)
1426{
1427	int stack_adjust = 0, store_offset, bpf_stack_adjust;
1428
1429	if (seen_reg(RV_REG_RA, ctx))
1430		stack_adjust += 8;
1431	stack_adjust += 8; /* RV_REG_FP */
1432	if (seen_reg(RV_REG_S1, ctx))
1433		stack_adjust += 8;
1434	if (seen_reg(RV_REG_S2, ctx))
1435		stack_adjust += 8;
1436	if (seen_reg(RV_REG_S3, ctx))
1437		stack_adjust += 8;
1438	if (seen_reg(RV_REG_S4, ctx))
1439		stack_adjust += 8;
1440	if (seen_reg(RV_REG_S5, ctx))
1441		stack_adjust += 8;
1442	if (seen_reg(RV_REG_S6, ctx))
1443		stack_adjust += 8;
1444
1445	stack_adjust = round_up(stack_adjust, 16);
1446	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
1447	stack_adjust += bpf_stack_adjust;
1448
1449	store_offset = stack_adjust - 8;
1450
1451	/* First instruction is always setting the tail-call-counter
1452	 * (TCC) register. This instruction is skipped for tail calls.
1453	 */
1454	emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
1455
1456	emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
1457
1458	if (seen_reg(RV_REG_RA, ctx)) {
1459		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_RA), ctx);
1460		store_offset -= 8;
1461	}
1462	emit(rv_sd(RV_REG_SP, store_offset, RV_REG_FP), ctx);
1463	store_offset -= 8;
1464	if (seen_reg(RV_REG_S1, ctx)) {
1465		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S1), ctx);
1466		store_offset -= 8;
1467	}
1468	if (seen_reg(RV_REG_S2, ctx)) {
1469		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S2), ctx);
1470		store_offset -= 8;
1471	}
1472	if (seen_reg(RV_REG_S3, ctx)) {
1473		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S3), ctx);
1474		store_offset -= 8;
1475	}
1476	if (seen_reg(RV_REG_S4, ctx)) {
1477		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S4), ctx);
1478		store_offset -= 8;
1479	}
1480	if (seen_reg(RV_REG_S5, ctx)) {
1481		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S5), ctx);
1482		store_offset -= 8;
1483	}
1484	if (seen_reg(RV_REG_S6, ctx)) {
1485		emit(rv_sd(RV_REG_SP, store_offset, RV_REG_S6), ctx);
1486		store_offset -= 8;
1487	}
1488
1489	emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
1490
1491	if (bpf_stack_adjust)
1492		emit(rv_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust), ctx);
1493
1494	/* Program contains calls and tail calls, so RV_REG_TCC need
1495	 * to be saved across calls.
1496	 */
1497	if (seen_tail_call(ctx) && seen_call(ctx))
1498		emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx);
1499
1500	ctx->stack_size = stack_adjust;
1501}
1502
1503static void build_epilogue(struct rv_jit_context *ctx)
1504{
1505	__build_epilogue(RV_REG_RA, ctx);
1506}
1507
1508static int build_body(struct rv_jit_context *ctx, bool extra_pass)
1509{
1510	const struct bpf_prog *prog = ctx->prog;
1511	int i;
1512
1513	for (i = 0; i < prog->len; i++) {
1514		const struct bpf_insn *insn = &prog->insnsi[i];
1515		int ret;
1516
1517		ret = emit_insn(insn, ctx, extra_pass);
1518		if (ret > 0) {
1519			i++;
1520			if (ctx->insns == NULL)
1521				ctx->offset[i] = ctx->ninsns;
1522			continue;
1523		}
1524		if (ctx->insns == NULL)
1525			ctx->offset[i] = ctx->ninsns;
1526		if (ret)
1527			return ret;
1528	}
1529	return 0;
1530}
1531
1532static void bpf_fill_ill_insns(void *area, unsigned int size)
1533{
1534	memset(area, 0, size);
1535}
1536
1537static void bpf_flush_icache(void *start, void *end)
1538{
1539	flush_icache_range((unsigned long)start, (unsigned long)end);
1540}
1541
1542bool bpf_jit_needs_zext(void)
1543{
1544	return true;
1545}
1546
1547struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1548{
1549	bool tmp_blinded = false, extra_pass = false;
1550	struct bpf_prog *tmp, *orig_prog = prog;
1551	struct rv_jit_data *jit_data;
1552	struct rv_jit_context *ctx;
1553	unsigned int image_size;
1554
1555	if (!prog->jit_requested)
1556		return orig_prog;
1557
1558	tmp = bpf_jit_blind_constants(prog);
1559	if (IS_ERR(tmp))
1560		return orig_prog;
1561	if (tmp != prog) {
1562		tmp_blinded = true;
1563		prog = tmp;
1564	}
1565
1566	jit_data = prog->aux->jit_data;
1567	if (!jit_data) {
1568		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1569		if (!jit_data) {
1570			prog = orig_prog;
1571			goto out;
1572		}
1573		prog->aux->jit_data = jit_data;
1574	}
1575
1576	ctx = &jit_data->ctx;
1577
1578	if (ctx->offset) {
1579		extra_pass = true;
1580		image_size = sizeof(u32) * ctx->ninsns;
1581		goto skip_init_ctx;
1582	}
1583
1584	ctx->prog = prog;
1585	ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL);
1586	if (!ctx->offset) {
1587		prog = orig_prog;
1588		goto out_offset;
1589	}
1590
1591	/* First pass generates the ctx->offset, but does not emit an image. */
1592	if (build_body(ctx, extra_pass)) {
1593		prog = orig_prog;
1594		goto out_offset;
1595	}
1596	build_prologue(ctx);
1597	ctx->epilogue_offset = ctx->ninsns;
1598	build_epilogue(ctx);
1599
1600	/* Allocate image, now that we know the size. */
1601	image_size = sizeof(u32) * ctx->ninsns;
1602	jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image,
1603						sizeof(u32),
1604						bpf_fill_ill_insns);
1605	if (!jit_data->header) {
1606		prog = orig_prog;
1607		goto out_offset;
1608	}
1609
1610	/* Second, real pass, that acutally emits the image. */
1611	ctx->insns = (u32 *)jit_data->image;
1612skip_init_ctx:
1613	ctx->ninsns = 0;
1614
1615	build_prologue(ctx);
1616	if (build_body(ctx, extra_pass)) {
1617		bpf_jit_binary_free(jit_data->header);
1618		prog = orig_prog;
1619		goto out_offset;
1620	}
1621	build_epilogue(ctx);
1622
1623	if (bpf_jit_enable > 1)
1624		bpf_jit_dump(prog->len, image_size, 2, ctx->insns);
1625
1626	prog->bpf_func = (void *)ctx->insns;
1627	prog->jited = 1;
1628	prog->jited_len = image_size;
1629
1630	bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
1631
1632	if (!prog->is_func || extra_pass) {
1633out_offset:
1634		kfree(ctx->offset);
1635		kfree(jit_data);
1636		prog->aux->jit_data = NULL;
1637	}
1638out:
1639	if (tmp_blinded)
1640		bpf_jit_prog_release_other(prog, prog == orig_prog ?
1641					   tmp : orig_prog);
1642	return prog;
1643}