1 // SPDX-License-Identifier: GPL-2.0
3 * BPF JIT compiler for PA-RISC (32-bit)
7 * The code is based on the BPF JIT compiler for RV64 by Björn Töpel and
8 * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan.
11 #include <linux/bpf.h>
12 #include <linux/filter.h>
13 #include <linux/libgcc.h>
17 * Stack layout during BPF program execution (note: stack grows up):
20 * HPPA32 sp => +----------+ <= HPPA32 fp
23 * | ... | HPPA32 callee-saved registers
26 * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS)
29 * | lo(FP) | JIT scratch space for BPF registers
32 * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS
33 * | | - 4 * BPF_JIT_SCRATCH_REGS)
35 * | ... | BPF program stack
37 * | ... | Function call stack
44 /* Stack layout - these are offsets from top of JIT scratch space. */
58 /* Number of callee-saved registers stored to stack: rp, r3-r18. */
59 #define NR_SAVED_REGISTERS (18 - 3 + 1 + 8)
61 /* Offset from fp for BPF registers stored on stack. */
62 #define STACK_OFFSET(k) (- (NR_SAVED_REGISTERS + k + 1))
63 #define STACK_ALIGN FRAME_SIZE
65 #define EXIT_PTR_LOAD(reg) hppa_ldw(-0x08, HPPA_REG_SP, reg)
66 #define EXIT_PTR_STORE(reg) hppa_stw(reg, -0x08, HPPA_REG_SP)
67 #define EXIT_PTR_JUMP(reg, nop) hppa_bv(HPPA_REG_ZERO, reg, nop)
69 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
70 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
71 #define TMP_REG_R0 (MAX_BPF_JIT_REG + 2)
73 static const s8 regmap[][2] = {
74 /* Return value from in-kernel function, and exit value from eBPF. */
75 [BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1}, /* HI/LOW */
77 /* Arguments from eBPF program to in-kernel function. */
78 [BPF_REG_1] = {HPPA_R(3), HPPA_R(4)},
79 [BPF_REG_2] = {HPPA_R(5), HPPA_R(6)},
80 [BPF_REG_3] = {HPPA_R(7), HPPA_R(8)},
81 [BPF_REG_4] = {HPPA_R(9), HPPA_R(10)},
82 [BPF_REG_5] = {HPPA_R(11), HPPA_R(12)},
84 [BPF_REG_6] = {HPPA_R(13), HPPA_R(14)},
85 [BPF_REG_7] = {HPPA_R(15), HPPA_R(16)},
87 * Callee-saved registers that in-kernel function will preserve.
88 * Stored on the stack.
90 [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
91 [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
93 /* Read-only frame pointer to access BPF stack. Not needed. */
94 [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
96 /* Temporary register for blinding constants. Stored on the stack. */
97 [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
99 * Temporary registers used by the JIT to operate on registers stored
100 * on the stack. Save t0 and t1 to be used as temporaries in generated
103 [TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2},
104 [TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4},
106 /* temporary space for BPF_R0 during libgcc and millicode calls */
107 [TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)},
110 static s8 hi(const s8 *r)
115 static s8 lo(const s8 *r)
120 static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
122 REG_SET_SEEN(ctx, rd);
123 if (OPTIMIZE_HPPA && (rs == rd))
125 REG_SET_SEEN(ctx, rs);
126 emit(hppa_copy(rs, rd), ctx);
129 static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx)
131 REG_SET_SEEN(ctx, r1);
132 REG_SET_SEEN(ctx, r2);
133 REG_SET_SEEN(ctx, r3);
134 if (OPTIMIZE_HPPA && (r1 == r2)) {
135 emit(hppa_copy(HPPA_REG_ZERO, r3), ctx);
137 emit(hppa_xor(r1, r2, r3), ctx);
141 static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx)
143 u32 lower = im11(imm);
145 REG_SET_SEEN(ctx, rd);
146 if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
147 emit(hppa_ldi(imm, rd), ctx);
150 emit(hppa_ldil(imm, rd), ctx);
151 if (OPTIMIZE_HPPA && (lower == 0))
153 emit(hppa_ldo(lower, rd, rd), ctx);
156 static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx)
158 /* Emit immediate into lower bits. */
159 REG_SET_SEEN(ctx, lo(rd));
160 emit_imm(lo(rd), imm, ctx);
162 /* Sign-extend into upper bits. */
163 REG_SET_SEEN(ctx, hi(rd));
165 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
167 emit(hppa_ldi(-1, hi(rd)), ctx);
170 static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo,
171 struct hppa_jit_context *ctx)
173 emit_imm(hi(rd), imm_hi, ctx);
174 emit_imm(lo(rd), imm_lo, ctx);
177 static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
179 const s8 *r0 = regmap[BPF_REG_0];
185 * Skips first instruction of prologue which initializes tail
186 * call counter. Assumes t0 contains address of target program,
187 * see emit_bpf_tail_call.
189 emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
190 emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
192 emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
197 /* load epilogue function pointer and jump to it. */
198 /* exit point is either directly below, or the outest TCC exit function */
199 emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
200 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
202 /* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */
203 emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx);
205 /* Restore callee-saved registers. */
206 for (i = 3; i <= 18; i++) {
207 if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
209 emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx);
212 /* load original return pointer (stored by outest TCC function) */
213 emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx);
214 emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
216 emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx);
219 static bool is_stacked(s8 reg)
224 static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp,
225 u16 offset_sp, struct hppa_jit_context *ctx)
227 if (is_stacked(hi(reg))) {
228 emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx);
229 emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx);
232 REG_SET_SEEN(ctx, hi(reg));
233 REG_SET_SEEN(ctx, lo(reg));
237 static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp,
238 struct hppa_jit_context *ctx)
240 return bpf_get_reg64_offset(reg, tmp, 0, ctx);
243 static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp,
244 bool must_load, struct hppa_jit_context *ctx)
247 return bpf_get_reg64(reg, tmp, ctx);
249 if (is_stacked(hi(reg))) {
251 emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx);
254 REG_SET_SEEN(ctx, hi(reg));
255 REG_SET_SEEN(ctx, lo(reg));
260 static void bpf_put_reg64(const s8 *reg, const s8 *src,
261 struct hppa_jit_context *ctx)
263 if (is_stacked(hi(reg))) {
264 emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
265 emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
269 static void bpf_save_R0(struct hppa_jit_context *ctx)
271 bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
274 static void bpf_restore_R0(struct hppa_jit_context *ctx)
276 bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
280 static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp,
281 struct hppa_jit_context *ctx)
283 if (is_stacked(lo(reg))) {
284 emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx);
287 REG_SET_SEEN(ctx, lo(reg));
291 static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp,
292 struct hppa_jit_context *ctx)
295 return bpf_get_reg32(reg, tmp, ctx);
297 if (is_stacked(hi(reg))) {
300 REG_SET_SEEN(ctx, lo(reg));
304 static void bpf_put_reg32(const s8 *reg, const s8 *src,
305 struct hppa_jit_context *ctx)
307 if (is_stacked(lo(reg))) {
308 REG_SET_SEEN(ctx, lo(src));
309 emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
310 if (1 && !ctx->prog->aux->verifier_zext) {
311 REG_SET_SEEN(ctx, hi(reg));
312 emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
314 } else if (1 && !ctx->prog->aux->verifier_zext) {
315 REG_SET_SEEN(ctx, hi(reg));
316 emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
320 /* extern hppa millicode functions */
321 extern void $$mulI(void);
322 extern void $$divU(void);
323 extern void $$remU(void);
325 static void emit_call_millicode(void *func, const s8 arg0,
326 const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
330 emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
331 emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
333 /* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */
334 if (arg0 != HPPA_REG_RET1)
337 func_addr = (uintptr_t) dereference_function_descriptor(func);
338 emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
339 /* skip the following be_l instruction if divisor is zero. */
340 if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
341 if (BPF_OP(opcode) == BPF_DIV)
342 emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
344 emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
345 emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx);
347 /* Note: millicode functions use r31 as return pointer instead of rp */
348 emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
349 emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */
351 /* Note: millicode functions return result in RET1, not RET0 */
352 emit_hppa_copy(HPPA_REG_RET1, arg0, ctx);
354 /* restore HPPA_REG_RET0/1, temp. save in dest. */
355 if (arg0 != HPPA_REG_RET1)
359 static void emit_call_libgcc_ll(void *func, const s8 *arg0,
360 const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx)
364 emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx);
365 emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx);
366 emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx);
367 emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx);
369 /* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */
370 if (hi(arg0) != HPPA_REG_RET0)
374 emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
376 func_addr = (uintptr_t) dereference_function_descriptor(func);
377 emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
378 /* zero out the following be_l instruction if divisor is 0 (and set default values) */
379 if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
380 emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
381 if (BPF_OP(opcode) == BPF_DIV)
382 emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
384 emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
385 emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx);
387 emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
388 emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
391 emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
393 emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx);
394 emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx);
396 /* restore HPPA_REG_RET0/_RET1 */
397 if (hi(arg0) != HPPA_REG_RET0)
401 static void emit_jump(s32 paoff, bool force_far,
402 struct hppa_jit_context *ctx)
404 unsigned long pc, addr;
406 /* Note: allocate 2 instructions for jumps if force_far is set. */
407 if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) {
408 /* use BL,short branch followed by nop() */
409 emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx);
411 emit(hppa_nop(), ctx);
415 pc = (uintptr_t) &ctx->insns[ctx->ninsns];
416 addr = pc + (paoff * HPPA_INSN_SIZE);
417 emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
418 emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31
421 static void emit_alu_i64(const s8 *dst, s32 imm,
422 struct hppa_jit_context *ctx, const u8 op)
424 const s8 *tmp1 = regmap[TMP_REG_1];
427 if (0 && op == BPF_MOV)
428 rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
430 rd = bpf_get_reg64(dst, tmp1, ctx);
432 /* dst = dst OP imm */
435 emit_imm32(rd, imm, ctx);
438 emit_imm(HPPA_REG_T0, imm, ctx);
439 emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
441 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
444 emit_imm(HPPA_REG_T0, imm, ctx);
445 emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
447 emit_imm(hi(rd), -1, ctx);
450 emit_imm(HPPA_REG_T0, imm, ctx);
451 emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
453 emit_imm(HPPA_REG_T0, -1, ctx);
454 emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx);
462 emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx);
463 emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
464 } else if (imm == 32) {
465 emit_hppa_copy(lo(rd), hi(rd), ctx);
466 emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
468 emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx);
469 emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
477 emit(hppa_shr(hi(rd), imm, lo(rd)), ctx);
478 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
479 } else if (imm == 32) {
480 emit_hppa_copy(hi(rd), lo(rd), ctx);
481 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
483 emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
484 emit(hppa_shr(hi(rd), imm, hi(rd)), ctx);
492 emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx);
493 emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
494 } else if (imm == 32) {
495 emit_hppa_copy(hi(rd), lo(rd), ctx);
496 emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
498 emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
499 emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx);
506 bpf_put_reg64(dst, rd, ctx);
509 static void emit_alu_i32(const s8 *dst, s32 imm,
510 struct hppa_jit_context *ctx, const u8 op)
512 const s8 *tmp1 = regmap[TMP_REG_1];
513 const s8 *rd = bpf_get_reg32(dst, tmp1, ctx);
516 rd = bpf_get_reg32_ref(dst, tmp1, ctx);
518 rd = bpf_get_reg32(dst, tmp1, ctx);
520 /* dst = dst OP imm */
523 emit_imm(lo(rd), imm, ctx);
526 emit_imm(HPPA_REG_T0, imm, ctx);
527 emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
530 emit_imm(HPPA_REG_T0, imm, ctx);
531 emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
534 emit_imm(HPPA_REG_T0, imm, ctx);
535 emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
538 emit_imm(HPPA_REG_T0, imm, ctx);
539 emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
542 emit_imm(HPPA_REG_T0, imm, ctx);
543 emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
547 emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
551 emit(hppa_shr(lo(rd), imm, lo(rd)), ctx);
555 emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx);
561 bpf_put_reg32(dst, rd, ctx);
564 static void emit_alu_r64(const s8 *dst, const s8 *src,
565 struct hppa_jit_context *ctx, const u8 op)
567 const s8 *tmp1 = regmap[TMP_REG_1];
568 const s8 *tmp2 = regmap[TMP_REG_2];
570 const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
573 rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
575 rd = bpf_get_reg64(dst, tmp1, ctx);
577 /* dst = dst OP src */
580 emit_hppa_copy(lo(rs), lo(rd), ctx);
581 emit_hppa_copy(hi(rs), hi(rd), ctx);
584 emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
585 emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx);
588 emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
589 emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx);
592 emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
593 emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx);
596 emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
597 emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx);
600 emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
601 emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx);
604 emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx);
607 emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx);
610 emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx);
613 emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx);
616 emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx);
619 emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx);
622 emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);
623 emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx);
629 bpf_put_reg64(dst, rd, ctx);
632 static void emit_alu_r32(const s8 *dst, const s8 *src,
633 struct hppa_jit_context *ctx, const u8 op)
635 const s8 *tmp1 = regmap[TMP_REG_1];
636 const s8 *tmp2 = regmap[TMP_REG_2];
638 const s8 *rs = bpf_get_reg32(src, tmp2, ctx);
641 rd = bpf_get_reg32_ref(dst, tmp1, ctx);
643 rd = bpf_get_reg32(dst, tmp1, ctx);
645 /* dst = dst OP src */
648 emit_hppa_copy(lo(rs), lo(rd), ctx);
651 emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
654 emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
657 emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
660 emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
663 emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
666 emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx);
669 emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx);
672 emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx);
675 emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
676 emit(hppa_mtsar(HPPA_REG_T0), ctx);
677 emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx);
680 emit(hppa_mtsar(lo(rs)), ctx);
681 emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx);
683 case BPF_ARSH: /* sign extending arithmetic shift right */
684 // emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx);
685 emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
686 emit(hppa_mtsar(HPPA_REG_T0), ctx);
687 emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx);
690 emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx); // sub r0,rd,rd
696 bpf_put_reg32(dst, rd, ctx);
699 static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff,
700 struct hppa_jit_context *ctx, const u8 op)
702 int e, s = ctx->ninsns;
703 const s8 *tmp1 = regmap[TMP_REG_1];
704 const s8 *tmp2 = regmap[TMP_REG_2];
706 const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx);
707 const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx);
710 * NO_JUMP skips over the rest of the instructions and the
711 * emit_jump, meaning the BPF branch is not taken.
712 * JUMP skips directly to the emit_jump, meaning
713 * the BPF branch is taken.
715 * The fallthrough case results in the BPF branch being taken.
717 #define NO_JUMP(idx) (2 + (idx) - 1)
718 #define JUMP(idx) (0 + (idx) - 1)
722 emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
723 emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
726 emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
727 emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
728 emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
731 emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
732 emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
733 emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
736 emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
737 emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
738 emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
741 emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
742 emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
743 emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
746 emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx);
747 emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
750 emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
751 emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
752 emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
755 emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
756 emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
757 emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
760 emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
761 emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
762 emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
765 emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
766 emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
767 emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
770 emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx);
771 emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx);
772 emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx);
773 emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx);
783 /* Adjust for extra insns. */
785 emit_jump(paoff, true, ctx);
789 static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx)
795 if (op == BPF_JSET) {
797 * BPF_JSET is a special case: it has no inverse so we always
798 * treat it as a far branch.
800 emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
801 paoff -= 1; /* reduce offset due to hppa_and() above */
809 if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) {
810 op = invert_bpf_cond(op);
815 * For a far branch, the condition is negated and we jump over the
816 * branch itself, and the three instructions from emit_jump.
817 * For a near branch, just use paoff.
819 off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT;
822 /* IF (dst COND src) JUMP off */
824 emit(hppa_beq(rd, rs, off), ctx);
827 emit(hppa_bgtu(rd, rs, off), ctx);
830 emit(hppa_bltu(rd, rs, off), ctx);
833 emit(hppa_bgeu(rd, rs, off), ctx);
836 emit(hppa_bleu(rd, rs, off), ctx);
839 emit(hppa_bne(rd, rs, off), ctx);
842 emit(hppa_bgt(rd, rs, off), ctx);
845 emit(hppa_blt(rd, rs, off), ctx);
848 emit(hppa_bge(rd, rs, off), ctx);
851 emit(hppa_ble(rd, rs, off), ctx);
859 /* Adjust for extra insns. */
861 emit_jump(paoff, true, ctx);
866 static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff,
867 struct hppa_jit_context *ctx, const u8 op)
869 int e, s = ctx->ninsns;
870 const s8 *tmp1 = regmap[TMP_REG_1];
871 const s8 *tmp2 = regmap[TMP_REG_2];
873 const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx);
874 const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx);
877 /* Adjust for extra insns. */
880 if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx))
886 static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx)
888 const s8 *tmp = regmap[TMP_REG_1];
889 const s8 *r0 = regmap[BPF_REG_0];
891 const int offset_sp = 2 * STACK_ALIGN;
894 emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
896 /* load R1 & R2 in registers, R3-R5 to stack. */
897 reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx);
898 emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx);
899 emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx);
901 reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx);
902 emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx);
903 emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx);
905 reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx);
906 emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx);
907 emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx);
909 reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx);
910 emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx);
911 emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx);
913 reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx);
914 emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx);
915 emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx);
918 if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
919 emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
922 * Use ldil() to load absolute address. Don't use emit_imm as the
923 * number of emitted instructions should not depend on the value of
926 emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
927 emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
928 /* set return address in delay slot */
929 emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
932 if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
933 emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
936 emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
938 /* set return value. */
939 emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx);
940 emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx);
943 static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
951 const s8 *arr_reg = regmap[BPF_REG_2];
952 const s8 *idx_reg = regmap[BPF_REG_3];
953 struct bpf_array bpfa;
954 struct bpf_prog bpfp;
956 /* get address of TCC main exit function for error case into rp */
957 emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
959 /* max_entries = array->map.max_entries; */
960 off = offsetof(struct bpf_array, map.max_entries);
961 BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
962 emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx);
965 * if (index >= max_entries)
968 emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
969 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
975 REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
976 emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
977 emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
978 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
981 * prog = array->ptrs[index];
985 BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4);
986 emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx);
987 off = offsetof(struct bpf_array, ptrs);
988 BUILD_BUG_ON(!relative_bits_ok(off, 11));
989 emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
990 emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
991 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
995 * goto *(prog->bpf_func + 4);
997 off = offsetof(struct bpf_prog, bpf_func);
998 BUILD_BUG_ON(!relative_bits_ok(off, 11));
999 BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4);
1000 emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
1001 /* Epilogue jumps to *(t0 + 4). */
1002 __build_epilogue(true, ctx);
1006 static int emit_load_r64(const s8 *dst, const s8 *src, s16 off,
1007 struct hppa_jit_context *ctx, const u8 size)
1009 const s8 *tmp1 = regmap[TMP_REG_1];
1010 const s8 *tmp2 = regmap[TMP_REG_2];
1011 const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx);
1012 const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1015 /* need to calculate address since offset does not fit in 14 bits? */
1016 if (relative_bits_ok(off, 14))
1019 /* need to use R1 here, since addil puts result into R1 */
1020 srcreg = HPPA_REG_R1;
1021 emit(hppa_addil(off, lo(rs)), ctx);
1025 /* LDX: dst = *(size *)(src + off) */
1028 emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx);
1029 if (!ctx->prog->aux->verifier_zext)
1030 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1033 emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx);
1034 if (!ctx->prog->aux->verifier_zext)
1035 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1038 emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx);
1039 if (!ctx->prog->aux->verifier_zext)
1040 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1043 emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx);
1044 emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx);
1048 bpf_put_reg64(dst, rd, ctx);
1052 static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
1053 struct hppa_jit_context *ctx, const u8 size,
1056 const s8 *tmp1 = regmap[TMP_REG_1];
1057 const s8 *tmp2 = regmap[TMP_REG_2];
1058 const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1059 const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1062 /* need to calculate address since offset does not fit in 14 bits? */
1063 if (relative_bits_ok(off, 14))
1066 /* need to use R1 here, since addil puts result into R1 */
1067 dstreg = HPPA_REG_R1;
1068 emit(hppa_addil(off, lo(rd)), ctx);
1072 /* ST: *(size *)(dst + off) = imm */
1075 emit(hppa_stb(lo(rs), off + 0, dstreg), ctx);
1078 emit(hppa_sth(lo(rs), off + 0, dstreg), ctx);
1081 emit(hppa_stw(lo(rs), off + 0, dstreg), ctx);
1084 emit(hppa_stw(hi(rs), off + 0, dstreg), ctx);
1085 emit(hppa_stw(lo(rs), off + 4, dstreg), ctx);
1092 static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx)
1094 emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx);
1095 emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
1096 emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
1099 static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
1101 emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx);
1102 emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx);
1103 emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx);
1106 static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx)
1109 const s8 *tmp1 = regmap[TMP_REG_1];
1111 rd = bpf_get_reg64(dst, tmp1, ctx);
1112 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1113 bpf_put_reg64(dst, rd, ctx);
1116 int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
1119 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1120 BPF_CLASS(insn->code) == BPF_JMP;
1121 int s, e, paoff, i = insn - ctx->prog->insnsi;
1122 u8 code = insn->code;
1123 s16 off = insn->off;
1124 s32 imm = insn->imm;
1126 const s8 *dst = regmap[insn->dst_reg];
1127 const s8 *src = regmap[insn->src_reg];
1128 const s8 *tmp1 = regmap[TMP_REG_1];
1129 const s8 *tmp2 = regmap[TMP_REG_2];
1131 if (0) printk("CLASS %03d CODE %#02x ALU64:%d BPF_SIZE %#02x "
1132 "BPF_CODE %#02x src_reg %d dst_reg %d\n",
1133 BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code),
1134 BPF_OP(code), insn->src_reg, insn->dst_reg);
1138 case BPF_ALU64 | BPF_MOV | BPF_X:
1140 case BPF_ALU64 | BPF_ADD | BPF_X:
1141 case BPF_ALU64 | BPF_ADD | BPF_K:
1143 case BPF_ALU64 | BPF_SUB | BPF_X:
1144 case BPF_ALU64 | BPF_SUB | BPF_K:
1146 case BPF_ALU64 | BPF_AND | BPF_X:
1147 case BPF_ALU64 | BPF_OR | BPF_X:
1148 case BPF_ALU64 | BPF_XOR | BPF_X:
1150 case BPF_ALU64 | BPF_MUL | BPF_X:
1151 case BPF_ALU64 | BPF_MUL | BPF_K:
1153 case BPF_ALU64 | BPF_DIV | BPF_X:
1154 case BPF_ALU64 | BPF_DIV | BPF_K:
1156 case BPF_ALU64 | BPF_MOD | BPF_X:
1157 case BPF_ALU64 | BPF_MOD | BPF_K:
1159 case BPF_ALU64 | BPF_LSH | BPF_X:
1160 case BPF_ALU64 | BPF_RSH | BPF_X:
1161 case BPF_ALU64 | BPF_ARSH | BPF_X:
1162 if (BPF_SRC(code) == BPF_K) {
1163 emit_imm32(tmp2, imm, ctx);
1166 emit_alu_r64(dst, src, ctx, BPF_OP(code));
1170 case BPF_ALU64 | BPF_NEG:
1171 emit_alu_r64(dst, tmp2, ctx, BPF_OP(code));
1174 case BPF_ALU64 | BPF_MOV | BPF_K:
1175 case BPF_ALU64 | BPF_AND | BPF_K:
1176 case BPF_ALU64 | BPF_OR | BPF_K:
1177 case BPF_ALU64 | BPF_XOR | BPF_K:
1178 case BPF_ALU64 | BPF_LSH | BPF_K:
1179 case BPF_ALU64 | BPF_RSH | BPF_K:
1180 case BPF_ALU64 | BPF_ARSH | BPF_K:
1181 emit_alu_i64(dst, imm, ctx, BPF_OP(code));
1184 case BPF_ALU | BPF_MOV | BPF_X:
1186 /* Special mov32 for zext. */
1187 emit_zext64(dst, ctx);
1191 /* dst = dst OP src */
1192 case BPF_ALU | BPF_ADD | BPF_X:
1193 case BPF_ALU | BPF_SUB | BPF_X:
1194 case BPF_ALU | BPF_AND | BPF_X:
1195 case BPF_ALU | BPF_OR | BPF_X:
1196 case BPF_ALU | BPF_XOR | BPF_X:
1198 case BPF_ALU | BPF_MUL | BPF_X:
1199 case BPF_ALU | BPF_MUL | BPF_K:
1201 case BPF_ALU | BPF_DIV | BPF_X:
1202 case BPF_ALU | BPF_DIV | BPF_K:
1204 case BPF_ALU | BPF_MOD | BPF_X:
1205 case BPF_ALU | BPF_MOD | BPF_K:
1207 case BPF_ALU | BPF_LSH | BPF_X:
1208 case BPF_ALU | BPF_RSH | BPF_X:
1209 case BPF_ALU | BPF_ARSH | BPF_X:
1210 if (BPF_SRC(code) == BPF_K) {
1211 emit_imm32(tmp2, imm, ctx);
1214 emit_alu_r32(dst, src, ctx, BPF_OP(code));
1217 /* dst = dst OP imm */
1218 case BPF_ALU | BPF_MOV | BPF_K:
1219 case BPF_ALU | BPF_ADD | BPF_K:
1220 case BPF_ALU | BPF_SUB | BPF_K:
1221 case BPF_ALU | BPF_AND | BPF_K:
1222 case BPF_ALU | BPF_OR | BPF_K:
1223 case BPF_ALU | BPF_XOR | BPF_K:
1224 case BPF_ALU | BPF_LSH | BPF_K:
1225 case BPF_ALU | BPF_RSH | BPF_K:
1226 case BPF_ALU | BPF_ARSH | BPF_K:
1228 * mul,div,mod are handled in the BPF_X case.
1230 emit_alu_i32(dst, imm, ctx, BPF_OP(code));
1234 case BPF_ALU | BPF_NEG:
1236 * src is ignored---choose tmp2 as a dummy register since it
1237 * is not on the stack.
1239 emit_alu_r32(dst, tmp2, ctx, BPF_OP(code));
1242 /* dst = BSWAP##imm(dst) */
1243 case BPF_ALU | BPF_END | BPF_FROM_BE:
1245 const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1249 /* zero-extend 16 bits into 64 bits */
1250 emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx);
1253 /* zero-extend 32 bits into 64 bits */
1254 if (!ctx->prog->aux->verifier_zext)
1255 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1261 pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1265 bpf_put_reg64(dst, rd, ctx);
1269 case BPF_ALU | BPF_END | BPF_FROM_LE:
1271 const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1275 emit_rev16(lo(rd), ctx);
1276 if (!ctx->prog->aux->verifier_zext)
1277 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1280 emit_rev32(lo(rd), lo(rd), ctx);
1281 if (!ctx->prog->aux->verifier_zext)
1282 emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1285 /* Swap upper and lower halves, then each half. */
1286 emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx);
1287 emit_rev32(lo(rd), hi(rd), ctx);
1288 emit_rev32(HPPA_REG_T0, lo(rd), ctx);
1291 pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1295 bpf_put_reg64(dst, rd, ctx);
1299 case BPF_JMP | BPF_JA:
1300 paoff = hppa_offset(i, off, ctx);
1301 emit_jump(paoff, false, ctx);
1304 case BPF_JMP | BPF_CALL:
1310 ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
1314 emit_call(fixed, addr, ctx);
1318 case BPF_JMP | BPF_TAIL_CALL:
1319 REG_SET_SEEN_ALL(ctx);
1320 if (emit_bpf_tail_call(i, ctx))
1323 /* IF (dst COND imm) JUMP off */
1324 case BPF_JMP | BPF_JEQ | BPF_X:
1325 case BPF_JMP | BPF_JEQ | BPF_K:
1326 case BPF_JMP32 | BPF_JEQ | BPF_X:
1327 case BPF_JMP32 | BPF_JEQ | BPF_K:
1329 case BPF_JMP | BPF_JNE | BPF_X:
1330 case BPF_JMP | BPF_JNE | BPF_K:
1331 case BPF_JMP32 | BPF_JNE | BPF_X:
1332 case BPF_JMP32 | BPF_JNE | BPF_K:
1334 case BPF_JMP | BPF_JLE | BPF_X:
1335 case BPF_JMP | BPF_JLE | BPF_K:
1336 case BPF_JMP32 | BPF_JLE | BPF_X:
1337 case BPF_JMP32 | BPF_JLE | BPF_K:
1339 case BPF_JMP | BPF_JLT | BPF_X:
1340 case BPF_JMP | BPF_JLT | BPF_K:
1341 case BPF_JMP32 | BPF_JLT | BPF_X:
1342 case BPF_JMP32 | BPF_JLT | BPF_K:
1344 case BPF_JMP | BPF_JGE | BPF_X:
1345 case BPF_JMP | BPF_JGE | BPF_K:
1346 case BPF_JMP32 | BPF_JGE | BPF_X:
1347 case BPF_JMP32 | BPF_JGE | BPF_K:
1349 case BPF_JMP | BPF_JGT | BPF_X:
1350 case BPF_JMP | BPF_JGT | BPF_K:
1351 case BPF_JMP32 | BPF_JGT | BPF_X:
1352 case BPF_JMP32 | BPF_JGT | BPF_K:
1354 case BPF_JMP | BPF_JSLE | BPF_X:
1355 case BPF_JMP | BPF_JSLE | BPF_K:
1356 case BPF_JMP32 | BPF_JSLE | BPF_X:
1357 case BPF_JMP32 | BPF_JSLE | BPF_K:
1359 case BPF_JMP | BPF_JSLT | BPF_X:
1360 case BPF_JMP | BPF_JSLT | BPF_K:
1361 case BPF_JMP32 | BPF_JSLT | BPF_X:
1362 case BPF_JMP32 | BPF_JSLT | BPF_K:
1364 case BPF_JMP | BPF_JSGE | BPF_X:
1365 case BPF_JMP | BPF_JSGE | BPF_K:
1366 case BPF_JMP32 | BPF_JSGE | BPF_X:
1367 case BPF_JMP32 | BPF_JSGE | BPF_K:
1369 case BPF_JMP | BPF_JSGT | BPF_X:
1370 case BPF_JMP | BPF_JSGT | BPF_K:
1371 case BPF_JMP32 | BPF_JSGT | BPF_X:
1372 case BPF_JMP32 | BPF_JSGT | BPF_K:
1374 case BPF_JMP | BPF_JSET | BPF_X:
1375 case BPF_JMP | BPF_JSET | BPF_K:
1376 case BPF_JMP32 | BPF_JSET | BPF_X:
1377 case BPF_JMP32 | BPF_JSET | BPF_K:
1378 paoff = hppa_offset(i, off, ctx);
1379 if (BPF_SRC(code) == BPF_K) {
1381 emit_imm32(tmp2, imm, ctx);
1387 emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code));
1389 emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code));
1391 /* function return */
1392 case BPF_JMP | BPF_EXIT:
1393 if (i == ctx->prog->len - 1)
1395 /* load epilogue function pointer and jump to it. */
1396 emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
1397 emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
1401 case BPF_LD | BPF_IMM | BPF_DW:
1403 struct bpf_insn insn1 = insn[1];
1404 u32 upper = insn1.imm;
1406 const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
1408 if (0 && bpf_pseudo_func(insn)) {
1409 WARN_ON(upper); /* we are 32-bit! */
1411 lower = (uintptr_t) dereference_function_descriptor(lower);
1414 emit_imm64(rd, upper, lower, ctx);
1415 bpf_put_reg64(dst, rd, ctx);
1419 /* LDX: dst = *(size *)(src + off) */
1420 case BPF_LDX | BPF_MEM | BPF_B:
1421 case BPF_LDX | BPF_MEM | BPF_H:
1422 case BPF_LDX | BPF_MEM | BPF_W:
1423 case BPF_LDX | BPF_MEM | BPF_DW:
1424 if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code)))
1428 /* speculation barrier */
1429 case BPF_ST | BPF_NOSPEC:
1432 /* ST: *(size *)(dst + off) = imm */
1433 case BPF_ST | BPF_MEM | BPF_B:
1434 case BPF_ST | BPF_MEM | BPF_H:
1435 case BPF_ST | BPF_MEM | BPF_W:
1436 case BPF_ST | BPF_MEM | BPF_DW:
1438 case BPF_STX | BPF_MEM | BPF_B:
1439 case BPF_STX | BPF_MEM | BPF_H:
1440 case BPF_STX | BPF_MEM | BPF_W:
1441 case BPF_STX | BPF_MEM | BPF_DW:
1442 if (BPF_CLASS(code) == BPF_ST) {
1443 emit_imm32(tmp2, imm, ctx);
1447 if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code),
1452 case BPF_STX | BPF_ATOMIC | BPF_W:
1453 case BPF_STX | BPF_ATOMIC | BPF_DW:
1455 "bpf-jit: not supported: atomic operation %02x ***\n",
1460 pr_err("bpf-jit: unknown opcode %02x\n", code);
1467 void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1469 const s8 *tmp = regmap[TMP_REG_1];
1470 const s8 *dst, *reg;
1471 int stack_adjust = 0;
1474 int bpf_stack_adjust;
1477 * stack on hppa grows up, so if tail calls are used we need to
1478 * allocate the maximum stack size
1480 if (REG_ALL_SEEN(ctx))
1481 bpf_stack_adjust = MAX_BPF_STACK;
1483 bpf_stack_adjust = ctx->prog->aux->stack_depth;
1484 bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1486 /* make space for callee-saved registers. */
1487 stack_adjust += NR_SAVED_REGISTERS * REG_SIZE;
1488 /* make space for BPF registers on stack. */
1489 stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE;
1490 /* make space for BPF stack. */
1491 stack_adjust += bpf_stack_adjust;
1492 /* round up for stack alignment. */
1493 stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1496 * The first instruction sets the tail-call-counter (TCC) register.
1497 * This instruction is skipped by tail calls.
1498 * Use a temporary register instead of a caller-saved register initially.
1500 emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1503 * skip all initializations when called as BPF TAIL call.
1505 emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1506 emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
1508 /* set up hppa stack frame. */
1509 emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx); // copy sp,r1 (=prev_sp)
1510 emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx); // ldo stack_adjust(sp),sp (increase stack)
1511 emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx); // stw prev_sp,-0x04(sp)
1512 emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx); // stw rp,-0x14(sp)
1514 REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1515 REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1516 REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1517 REG_FORCE_SEEN(ctx, HPPA_REG_T3);
1518 REG_FORCE_SEEN(ctx, HPPA_REG_T4);
1519 REG_FORCE_SEEN(ctx, HPPA_REG_T5);
1521 /* save callee-save registers. */
1522 for (i = 3; i <= 18; i++) {
1523 if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1525 emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx); // stw ri,-save_area(sp)
1529 * now really set the tail call counter (TCC) register.
1531 if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1532 emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1535 * save epilogue function pointer for outer TCC call chain.
1536 * The main TCC call stores the final RP on stack.
1538 addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1539 /* skip first two instructions of exit function, which jump to exit */
1540 addr += 2 * HPPA_INSN_SIZE;
1541 emit(hppa_ldil(addr, HPPA_REG_T2), ctx);
1542 emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx);
1543 emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1545 /* load R1 & R2 from registers, R3-R5 from stack. */
1546 /* use HPPA_REG_R1 which holds the old stack value */
1547 dst = regmap[BPF_REG_5];
1548 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1549 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1550 if (REG_WAS_SEEN(ctx, hi(reg)))
1551 emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx);
1552 if (REG_WAS_SEEN(ctx, lo(reg)))
1553 emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx);
1554 bpf_put_reg64(dst, tmp, ctx);
1557 dst = regmap[BPF_REG_4];
1558 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1559 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1560 if (REG_WAS_SEEN(ctx, hi(reg)))
1561 emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx);
1562 if (REG_WAS_SEEN(ctx, lo(reg)))
1563 emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx);
1564 bpf_put_reg64(dst, tmp, ctx);
1567 dst = regmap[BPF_REG_3];
1568 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1569 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1570 if (REG_WAS_SEEN(ctx, hi(reg)))
1571 emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx);
1572 if (REG_WAS_SEEN(ctx, lo(reg)))
1573 emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx);
1574 bpf_put_reg64(dst, tmp, ctx);
1577 dst = regmap[BPF_REG_2];
1578 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1579 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1580 if (REG_WAS_SEEN(ctx, hi(reg)))
1581 emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx);
1582 if (REG_WAS_SEEN(ctx, lo(reg)))
1583 emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx);
1584 bpf_put_reg64(dst, tmp, ctx);
1587 dst = regmap[BPF_REG_1];
1588 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1589 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1590 if (REG_WAS_SEEN(ctx, hi(reg)))
1591 emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx);
1592 if (REG_WAS_SEEN(ctx, lo(reg)))
1593 emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx);
1594 bpf_put_reg64(dst, tmp, ctx);
1597 /* Set up BPF frame pointer. */
1598 dst = regmap[BPF_REG_FP];
1599 reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1600 if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1601 if (REG_WAS_SEEN(ctx, lo(reg)))
1602 emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS),
1603 HPPA_REG_SP, lo(reg)), ctx);
1604 if (REG_WAS_SEEN(ctx, hi(reg)))
1605 emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
1606 bpf_put_reg64(dst, tmp, ctx);
1609 emit(hppa_nop(), ctx);
1612 void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1614 __build_epilogue(false, ctx);