x86/cfi,bpf: Fix bpf_struct_ops CFI

author Peter Zijlstra <[email protected]>

Fri, 15 Dec 2023 09:12:20 +0000 (10:12 +0100)

committer Alexei Starovoitov <[email protected]>

Sat, 16 Dec 2023 00:25:55 +0000 (16:25 -0800)
author Peter Zijlstra <[email protected]>
Fri, 15 Dec 2023 09:12:20 +0000 (10:12 +0100)
committer Alexei Starovoitov <[email protected]>
Sat, 16 Dec 2023 00:25:55 +0000 (16:25 -0800)
diff --git a/arch/x86/include/asm/cfi.h b/arch/x86/include/asm/cfi.h

index 8779abd217b7a10c652da1c794a343f95db854b2..1a50b2cd4713853d8f65d7df6a2f62dfb71e32bf 100644 (file)
--- a/arch/x86/include/asm/cfi.h
+++ b/arch/x86/include/asm/cfi.h
@@ -123,6 +123,8 @@ static inline int cfi_get_offset(void)
  }
  #define cfi_get_offset cfi_get_offset
  
+extern u32 cfi_get_func_hash(void *func);
+
  #else
  static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
  {
@@ -130,6 +132,10 @@ static inline enum bug_trap_type handle_cfi_failure(struct pt_regs *regs)
  }
  #define cfi_bpf_hash 0U
  #define cfi_bpf_subprog_hash 0U
+static inline u32 cfi_get_func_hash(void *func)
+{
+       return 0;
+}
  #endif /* CONFIG_CFI_CLANG */
  
  #endif /* _ASM_X86_CFI_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c

index cb393efd5ccd0ded37eb5a01631b722bb4692ad4..49c2a62ba5e4e93a1a7c6227b7e58fe22b13e5db 100644 (file)
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -883,6 +883,28 @@ asm (
  "      .size   cfi_bpf_subprog_hash, 4                         \n"
  "      .popsection                                             \n"
  );
+
+u32 cfi_get_func_hash(void *func)
+{
+       u32 hash;
+
+       func -= cfi_get_offset();
+       switch (cfi_mode) {
+       case CFI_FINEIBT:
+               func += 7;
+               break;
+       case CFI_KCFI:
+               func += 1;
+               break;
+       default:
+               return 0;
+       }
+
+       if (get_kernel_nofault(hash, func))
+               return 0;
+
+       return hash;
+}
  #endif
  
  #ifdef CONFIG_FINEIBT
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c

index 43b8c08fdf8d71f20e633ab1e81bee781a5df14a..c89a4abdd72652aa83ed6dfa291bc0efac16702f 100644 (file)
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -312,9 +312,8 @@ static void pop_callee_regs(u8 **pprog, bool *callee_regs_used)
   * in arch/x86/kernel/alternative.c
   */
  
-static void emit_fineibt(u8 **pprog, bool is_subprog)
+static void emit_fineibt(u8 **pprog, u32 hash)
  {
-       u32 hash = is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash;
         u8 *prog = *pprog;
  
         EMIT_ENDBR();
@@ -327,9 +326,8 @@ static void emit_fineibt(u8 **pprog, bool is_subprog)
         *pprog = prog;
  }
  
-static void emit_kcfi(u8 **pprog, bool is_subprog)
+static void emit_kcfi(u8 **pprog, u32 hash)
  {
-       u32 hash = is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash;
         u8 *prog = *pprog;
  
         EMIT1_off32(0xb8, hash);                        /* movl $hash, %eax     */
@@ -351,17 +349,17 @@ static void emit_kcfi(u8 **pprog, bool is_subprog)
         *pprog = prog;
  }
  
-static void emit_cfi(u8 **pprog, bool is_subprog)
+static void emit_cfi(u8 **pprog, u32 hash)
  {
         u8 *prog = *pprog;
  
         switch (cfi_mode) {
         case CFI_FINEIBT:
-               emit_fineibt(&prog, is_subprog);
+               emit_fineibt(&prog, hash);
                 break;
  
         case CFI_KCFI:
-               emit_kcfi(&prog, is_subprog);
+               emit_kcfi(&prog, hash);
                 break;
  
         default:
@@ -383,7 +381,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
  {
         u8 *prog = *pprog;
  
-       emit_cfi(&prog, is_subprog);
+       emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
         /* BPF trampoline can be made to work without these nops,
          * but let's waste 5 bytes for now and optimize later
          */
@@ -2510,10 +2508,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
         u8 *prog;
         bool save_ret;
  
+       /*
+        * F_INDIRECT is only compatible with F_RET_FENTRY_RET, it is
+        * explicitly incompatible with F_CALL_ORIG | F_SKIP_FRAME | F_IP_ARG
+        * because @func_addr.
+        */
+       WARN_ON_ONCE((flags & BPF_TRAMP_F_INDIRECT) &&
+                    (flags & ~(BPF_TRAMP_F_INDIRECT | BPF_TRAMP_F_RET_FENTRY_RET)));
+
         /* extra registers for struct arguments */
-       for (i = 0; i < m->nr_args; i++)
+       for (i = 0; i < m->nr_args; i++) {
                 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
                         nr_regs += (m->arg_size[i] + 7) / 8 - 1;
+       }
  
         /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
          * are passed through regs, the remains are through stack.
@@ -2596,20 +2603,27 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
  
         prog = rw_image;
  
-       EMIT_ENDBR();
-       /*
-        * This is the direct-call trampoline, as such it needs accounting
-        * for the __fentry__ call.
-        */
-       x86_call_depth_emit_accounting(&prog, NULL);
+       if (flags & BPF_TRAMP_F_INDIRECT) {
+               /*
+                * Indirect call for bpf_struct_ops
+                */
+               emit_cfi(&prog, cfi_get_func_hash(func_addr));
+       } else {
+               /*
+                * Direct-call fentry stub, as such it needs accounting for the
+                * __fentry__ call.
+                */
+               x86_call_depth_emit_accounting(&prog, NULL);
+       }
         EMIT1(0x55);             /* push rbp */
         EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
-       if (!is_imm8(stack_size))
+       if (!is_imm8(stack_size)) {
                 /* sub rsp, stack_size */
                 EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
-       else
+       } else {
                 /* sub rsp, stack_size */
                 EMIT4(0x48, 0x83, 0xEC, stack_size);
+       }
         if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
                 EMIT1(0x50);            /* push rax */
         /* mov QWORD PTR [rbp - rbx_off], rbx */
@@ -2643,10 +2657,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
                 }
         }
  
-       if (fentry->nr_links)
+       if (fentry->nr_links) {
                 if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
                                flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
                         return -EINVAL;
+       }
  
         if (fmod_ret->nr_links) {
                 branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
@@ -2665,11 +2680,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
                 restore_regs(m, &prog, regs_off);
                 save_args(m, &prog, arg_stack_off, true);
  
-               if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+               if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
                         /* Before calling the original function, restore the
                          * tail_call_cnt from stack to rax.
                          */
                         RESTORE_TAIL_CALL_CNT(stack_size);
+               }
  
                 if (flags & BPF_TRAMP_F_ORIG_STACK) {
                         emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
@@ -2698,17 +2714,19 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
                 /* Update the branches saved in invoke_bpf_mod_ret with the
                  * aligned address of do_fexit.
                  */
-               for (i = 0; i < fmod_ret->nr_links; i++)
+               for (i = 0; i < fmod_ret->nr_links; i++) {
                         emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
                                             image + (branches[i] - (u8 *)rw_image), X86_JNE);
+               }
         }
  
-       if (fexit->nr_links)
+       if (fexit->nr_links) {
                 if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
                                false, image, rw_image)) {
                         ret = -EINVAL;
                         goto cleanup;
                 }
+       }
  
         if (flags & BPF_TRAMP_F_RESTORE_REGS)
                 restore_regs(m, &prog, regs_off);
@@ -2725,11 +2743,12 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
                         ret = -EINVAL;
                         goto cleanup;
                 }
-       } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+       } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
                 /* Before running the original function, restore the
                  * tail_call_cnt from stack to rax.
                  */
                 RESTORE_TAIL_CALL_CNT(stack_size);
+       }
  
         /* restore return value of orig_call or fentry prog back into RAX */
         if (save_ret)
@@ -2737,9 +2756,10 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
  
         emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
         EMIT1(0xC9); /* leave */
-       if (flags & BPF_TRAMP_F_SKIP_FRAME)
+       if (flags & BPF_TRAMP_F_SKIP_FRAME) {
                 /* skip our return address and return to parent */
                 EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+       }
         emit_return(&prog, image + (prog - (u8 *)rw_image));
         /* Make sure the trampoline generation logic doesn't overflow */
         if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 9d84c376851af5de62ab1f8dfc4170c218ebadae..db46b3359bf5fc5569de336169d967980fc20fb5 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1060,6 +1060,17 @@ struct btf_func_model {
   */
  #define BPF_TRAMP_F_TAIL_CALL_CTX      BIT(7)
  
+/*
+ * Indicate the trampoline should be suitable to receive indirect calls;
+ * without this indirectly calling the generated code can result in #UD/#CP,
+ * depending on the CFI options.
+ *
+ * Used by bpf_struct_ops.
+ *
+ * Incompatible with FENTRY usage, overloads @func_addr argument.
+ */
+#define BPF_TRAMP_F_INDIRECT           BIT(8)
+
  /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
   * bytes on x86.
   */
@@ -1697,6 +1708,7 @@ struct bpf_struct_ops {
         struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
         u32 type_id;
         u32 value_id;
+       void *cfi_stubs;
  };
  
  #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
@@ -1710,6 +1722,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
  int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
                                       struct bpf_tramp_link *link,
                                       const struct btf_func_model *model,
+                                     void *stub_func,
                                       void *image, void *image_end);
  static inline bool bpf_try_module_get(const void *data, struct module *owner)
  {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c

index 4d53c53fc5aaf6fa5c6aa91f18b83e581d1a533c..02068bd0e4d951afbf1a6c1c01e7652d2e8dd460 100644 (file)
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -352,17 +352,16 @@ const struct bpf_link_ops bpf_struct_ops_link_lops = {
  int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
                                       struct bpf_tramp_link *link,
                                       const struct btf_func_model *model,
-                                     void *image, void *image_end)
+                                     void *stub_func, void *image, void *image_end)
  {
-       u32 flags;
+       u32 flags = BPF_TRAMP_F_INDIRECT;
         int size;
  
         tlinks[BPF_TRAMP_FENTRY].links[0] = link;
         tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
-       /* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
-        * and it must be used alone.
-        */
-       flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0;
+
+       if (model->ret_size > 0)
+               flags |= BPF_TRAMP_F_RET_FENTRY_RET;
  
         size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
         if (size < 0)
@@ -370,7 +369,7 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
         if (size > (unsigned long)image_end - (unsigned long)image)
                 return -E2BIG;
         return arch_prepare_bpf_trampoline(NULL, image, image_end,
-                                          model, flags, tlinks, NULL);
+                                          model, flags, tlinks, stub_func);
  }
  
  static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
@@ -504,11 +503,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
  
                 err = bpf_struct_ops_prepare_trampoline(tlinks, link,
                                                         &st_ops->func_models[i],
+                                                       *(void **)(st_ops->cfi_stubs + moff),
                                                         image, image_end);
                 if (err < 0)
                         goto reset_unlock;
  
-               *(void **)(kdata + moff) = image;
+               *(void **)(kdata + moff) = image + cfi_get_offset();
                 image += err;
  
                 /* put prog_id to udata */
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c

index 2748f9d77b18d2092ca25357a6ce8cf2e16579cc..8906f7bdf4a92ad24f23efe51c2ffb0f6fdd49e2 100644 (file)
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -12,6 +12,11 @@ extern struct bpf_struct_ops bpf_bpf_dummy_ops;
  /* A common type for test_N with return value in bpf_dummy_ops */
  typedef int (*dummy_ops_test_ret_fn)(struct bpf_dummy_ops_state *state, ...);
  
+static int dummy_ops_test_ret_function(struct bpf_dummy_ops_state *state, ...)
+{
+       return 0;
+}
+
  struct bpf_dummy_ops_test_args {
         u64 args[MAX_BPF_FUNC_ARGS];
         struct bpf_dummy_ops_state state;
@@ -62,7 +67,7 @@ static int dummy_ops_copy_args(struct bpf_dummy_ops_test_args *args)
  
  static int dummy_ops_call_op(void *image, struct bpf_dummy_ops_test_args *args)
  {
-       dummy_ops_test_ret_fn test = (void *)image;
+       dummy_ops_test_ret_fn test = (void *)image + cfi_get_offset();
         struct bpf_dummy_ops_state *state = NULL;
  
         /* state needs to be NULL if args[0] is 0 */
@@ -119,6 +124,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
         op_idx = prog->expected_attach_type;
         err = bpf_struct_ops_prepare_trampoline(tlinks, link,
                                                 &st_ops->func_models[op_idx],
+                                               &dummy_ops_test_ret_function,
                                                 image, image + PAGE_SIZE);
         if (err < 0)
                 goto out;
@@ -219,6 +225,28 @@ static void bpf_dummy_unreg(void *kdata)
  {
  }
  
+static int bpf_dummy_test_1(struct bpf_dummy_ops_state *cb)
+{
+       return 0;
+}
+
+static int bpf_dummy_test_2(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
+                           char a3, unsigned long a4)
+{
+       return 0;
+}
+
+static int bpf_dummy_test_sleepable(struct bpf_dummy_ops_state *cb)
+{
+       return 0;
+}
+
+static struct bpf_dummy_ops __bpf_bpf_dummy_ops = {
+       .test_1 = bpf_dummy_test_1,
+       .test_2 = bpf_dummy_test_2,
+       .test_sleepable = bpf_dummy_test_sleepable,
+};
+
  struct bpf_struct_ops bpf_bpf_dummy_ops = {
         .verifier_ops = &bpf_dummy_verifier_ops,
         .init = bpf_dummy_init,
@@ -227,4 +255,5 @@ struct bpf_struct_ops bpf_bpf_dummy_ops = {
         .reg = bpf_dummy_reg,
         .unreg = bpf_dummy_unreg,
         .name = "bpf_dummy_ops",
+       .cfi_stubs = &__bpf_bpf_dummy_ops,
  };
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c

index c7bbd8f3c708ba258446a9f204bd7d15176370c9..634cfafa583dc931ea3111ffc002022605f17b2a 100644 (file)
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -271,6 +271,74 @@ static int bpf_tcp_ca_validate(void *kdata)
         return tcp_validate_congestion_control(kdata);
  }
  
+static u32 bpf_tcp_ca_ssthresh(struct sock *sk)
+{
+       return 0;
+}
+
+static void bpf_tcp_ca_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+{
+}
+
+static void bpf_tcp_ca_set_state(struct sock *sk, u8 new_state)
+{
+}
+
+static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+{
+}
+
+static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags)
+{
+}
+
+static void bpf_tcp_ca_pkts_acked(struct sock *sk, const struct ack_sample *sample)
+{
+}
+
+static u32 bpf_tcp_ca_min_tso_segs(struct sock *sk)
+{
+       return 0;
+}
+
+static void bpf_tcp_ca_cong_control(struct sock *sk, const struct rate_sample *rs)
+{
+}
+
+static u32 bpf_tcp_ca_undo_cwnd(struct sock *sk)
+{
+       return 0;
+}
+
+static u32 bpf_tcp_ca_sndbuf_expand(struct sock *sk)
+{
+       return 0;
+}
+
+static void __bpf_tcp_ca_init(struct sock *sk)
+{
+}
+
+static void __bpf_tcp_ca_release(struct sock *sk)
+{
+}
+
+static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = {
+       .ssthresh = bpf_tcp_ca_ssthresh,
+       .cong_avoid = bpf_tcp_ca_cong_avoid,
+       .set_state = bpf_tcp_ca_set_state,
+       .cwnd_event = bpf_tcp_ca_cwnd_event,
+       .in_ack_event = bpf_tcp_ca_in_ack_event,
+       .pkts_acked = bpf_tcp_ca_pkts_acked,
+       .min_tso_segs = bpf_tcp_ca_min_tso_segs,
+       .cong_control = bpf_tcp_ca_cong_control,
+       .undo_cwnd = bpf_tcp_ca_undo_cwnd,
+       .sndbuf_expand = bpf_tcp_ca_sndbuf_expand,
+
+       .init = __bpf_tcp_ca_init,
+       .release = __bpf_tcp_ca_release,
+};
+
  struct bpf_struct_ops bpf_tcp_congestion_ops = {
         .verifier_ops = &bpf_tcp_ca_verifier_ops,
         .reg = bpf_tcp_ca_reg,
@@ -281,6 +349,7 @@ struct bpf_struct_ops bpf_tcp_congestion_ops = {
         .init = bpf_tcp_ca_init,
         .validate = bpf_tcp_ca_validate,
         .name = "tcp_congestion_ops",
+       .cfi_stubs = &__bpf_ops_tcp_congestion_ops,
  };
  
  static int __init bpf_tcp_ca_kfunc_init(void)
author	Peter Zijlstra <[email protected]>
	Fri, 15 Dec 2023 09:12:20 +0000 (10:12 +0100)
committer	Alexei Starovoitov <[email protected]>
	Sat, 16 Dec 2023 00:25:55 +0000 (16:25 -0800)
arch/x86/include/asm/cfi.h		patch \| blob \| blame \| history
arch/x86/kernel/alternative.c		patch \| blob \| blame \| history
arch/x86/net/bpf_jit_comp.c		patch \| blob \| blame \| history
include/linux/bpf.h		patch \| blob \| blame \| history
kernel/bpf/bpf_struct_ops.c		patch \| blob \| blame \| history
net/bpf/bpf_dummy_struct_ops.c		patch \| blob \| blame \| history
net/ipv4/bpf_tcp_ca.c		patch \| blob \| blame \| history