Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf...

author Jakub Kicinski <[email protected]>

Tue, 19 Dec 2023 00:46:07 +0000 (16:46 -0800)

committer Jakub Kicinski <[email protected]>

Tue, 19 Dec 2023 00:46:08 +0000 (16:46 -0800)
author Jakub Kicinski <[email protected]>
Tue, 19 Dec 2023 00:46:07 +0000 (16:46 -0800)
committer Jakub Kicinski <[email protected]>
Tue, 19 Dec 2023 00:46:08 +0000 (16:46 -0800)
diff --combined Documentation/netlink/specs/netdev.yaml

index f2c76d103bd86a832414b4d0bdfa56e893402301,aeec090e1387c4f0cba8d40efb44c54c03768273..3addac97068048a591c9f166bd1271975f2c21b6
--- 1/Documentation/netlink/specs/netdev.yaml
--- 2/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@@ -54,6 -54,10 +54,10 @@@ definitions
           name: hash
           doc:
             Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash().
+       -
+         name: vlan-tag
+         doc:
+           Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
     -
       type: flags
       name: xsk-flags
@@@ -66,10 -70,6 +70,10 @@@
           name: tx-checksum
           doc:
             L3 checksum HW offload is supported by the driver.
+ +  -
+ +    name: queue-type
+ +    type: enum
+ +    entries: [ rx, tx ]
   
   attribute-sets:
     -
@@@ -213,54 -213,6 +217,54 @@@
           name: recycle-released-refcnt
           type: uint
   
+ +  -
+ +    name: napi
+ +    attributes:
+ +      -
+ +        name: ifindex
+ +        doc: ifindex of the netdevice to which NAPI instance belongs.
+ +        type: u32
+ +        checks:
+ +          min: 1
+ +      -
+ +        name: id
+ +        doc: ID of the NAPI instance.
+ +        type: u32
+ +      -
+ +        name: irq
+ +        doc: The associated interrupt vector number for the napi
+ +        type: u32
+ +      -
+ +        name: pid
+ +        doc: PID of the napi thread, if NAPI is configured to operate in
+ +             threaded mode. If NAPI is not in threaded mode (i.e. uses normal
+ +             softirq context), the attribute will be absent.
+ +        type: u32
+ +  -
+ +    name: queue
+ +    attributes:
+ +      -
+ +        name: id
+ +        doc: Queue index; most queue types are indexed like a C array, with
+ +             indexes starting at 0 and ending at queue count - 1. Queue indexes
+ +             are scoped to an interface and queue type.
+ +        type: u32
+ +      -
+ +        name: ifindex
+ +        doc: ifindex of the netdevice to which the queue belongs.
+ +        type: u32
+ +        checks:
+ +          min: 1
+ +      -
+ +        name: type
+ +        doc: Queue type as rx, tx. Each queue type defines a separate ID space.
+ +        type: u32
+ +        enum: queue-type
+ +      -
+ +        name: napi-id
+ +        doc: ID of the NAPI instance which services this queue.
+ +        type: u32
+ +
   operations:
     list:
       -
@@@ -359,48 -311,6 +363,48 @@@
         dump:
           reply: *pp-stats-reply
         config-cond: page-pool-stats
+ +    -
+ +      name: queue-get
+ +      doc: Get queue information from the kernel.
+ +           Only configured queues will be reported (as opposed to all available
+ +           hardware queues).
+ +      attribute-set: queue
+ +      do:
+ +        request:
+ +          attributes:
+ +            - ifindex
+ +            - type
+ +            - id
+ +        reply: &queue-get-op
+ +          attributes:
+ +            - id
+ +            - type
+ +            - napi-id
+ +            - ifindex
+ +      dump:
+ +        request:
+ +          attributes:
+ +            - ifindex
+ +        reply: *queue-get-op
+ +    -
+ +      name: napi-get
+ +      doc: Get information about NAPI instances configured on the system.
+ +      attribute-set: napi
+ +      do:
+ +        request:
+ +          attributes:
+ +            - id
+ +        reply: &napi-get-op
+ +          attributes:
+ +            - id
+ +            - ifindex
+ +            - irq
+ +            - pid
+ +      dump:
+ +        request:
+ +          attributes:
+ +            - ifindex
+ +        reply: *napi-get-op
   
   mcast-groups:
     list:
diff --combined arch/x86/net/bpf_jit_comp.c

index e89e415aa7435311991a945575519a593a2abb44,c89a4abdd72652aa83ed6dfa291bc0efac16702f..bdacbb84456d97a00180c82f8f1a7e93ecd76f77
--- 1/arch/x86/net/bpf_jit_comp.c
--- 2/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@@ -17,6 -17,7 +17,7 @@@
   #include <asm/nospec-branch.h>
   #include <asm/text-patching.h>
   #include <asm/unwind.h>
+ #include <asm/cfi.h>
   
   static bool all_callee_regs_used[4] = {true, true, true, true};
   
@@@ -51,9 -52,11 +52,11 @@@ static u8 *emit_code(u8 *ptr, u32 bytes
         do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
   
   #ifdef CONFIG_X86_KERNEL_IBT
- #define EMIT_ENDBR()  EMIT(gen_endbr(), 4)
+ #define EMIT_ENDBR()          EMIT(gen_endbr(), 4)
+ #define EMIT_ENDBR_POISON()   EMIT(gen_endbr_poison(), 4)
   #else
   #define EMIT_ENDBR()
+ #define EMIT_ENDBR_POISON()
   #endif
   
   static bool is_imm8(int value)
@@@ -304,6 -307,69 +307,69 @@@ static void pop_callee_regs(u8 **pprog
         *pprog = prog;
   }
   
+ /*
+  * Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT
+  * in arch/x86/kernel/alternative.c
+  */
+ 
+ static void emit_fineibt(u8 **pprog, u32 hash)
+ {
+       u8 *prog = *pprog;
+ 
+       EMIT_ENDBR();
+       EMIT3_off32(0x41, 0x81, 0xea, hash);            /* subl $hash, %r10d    */
+       EMIT2(0x74, 0x07);                              /* jz.d8 +7             */
+       EMIT2(0x0f, 0x0b);                              /* ud2                  */
+       EMIT1(0x90);                                    /* nop                  */
+       EMIT_ENDBR_POISON();
+ 
+       *pprog = prog;
+ }
+ 
+ static void emit_kcfi(u8 **pprog, u32 hash)
+ {
+       u8 *prog = *pprog;
+ 
+       EMIT1_off32(0xb8, hash);                        /* movl $hash, %eax     */
+ #ifdef CONFIG_CALL_PADDING
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+       EMIT1(0x90);
+ #endif
+       EMIT_ENDBR();
+ 
+       *pprog = prog;
+ }
+ 
+ static void emit_cfi(u8 **pprog, u32 hash)
+ {
+       u8 *prog = *pprog;
+ 
+       switch (cfi_mode) {
+       case CFI_FINEIBT:
+               emit_fineibt(&prog, hash);
+               break;
+ 
+       case CFI_KCFI:
+               emit_kcfi(&prog, hash);
+               break;
+ 
+       default:
+               EMIT_ENDBR();
+               break;
+       }
+ 
+       *pprog = prog;
+ }
+ 
   /*
    * Emit x86-64 prologue code for BPF program.
    * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
@@@ -315,10 -381,10 +381,10 @@@ static void emit_prologue(u8 **pprog, u
   {
         u8 *prog = *pprog;
   
+       emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
         /* BPF trampoline can be made to work without these nops,
          * but let's waste 5 bytes for now and optimize later
          */
-       EMIT_ENDBR();
         memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
         prog += X86_PATCH_SIZE;
         if (!ebpf_from_cbpf) {
@@@ -2198,7 -2264,8 +2264,8 @@@ static void restore_regs(const struct b
   
   static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
                            struct bpf_tramp_link *l, int stack_size,
-                          int run_ctx_off, bool save_ret)
+                          int run_ctx_off, bool save_ret,
+                          void *image, void *rw_image)
   {
         u8 *prog = *pprog;
         u8 *jmp_insn;
@@@ -2226,7 -2293,7 +2293,7 @@@
         else
                 EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
   
-       if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog))
+       if (emit_rsb_call(&prog, bpf_trampoline_enter(p), image + (prog - (u8 *)rw_image)))
                 return -EINVAL;
         /* remember prog start time returned by __bpf_prog_enter */
         emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@@@ -2250,7 -2317,7 +2317,7 @@@
                                (long) p->insnsi >> 32,
                                (u32) (long) p->insnsi);
         /* call JITed bpf program or interpreter */
-       if (emit_rsb_call(&prog, p->bpf_func, prog))
+       if (emit_rsb_call(&prog, p->bpf_func, image + (prog - (u8 *)rw_image)))
                 return -EINVAL;
   
         /*
@@@ -2277,7 -2344,7 +2344,7 @@@
                 EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off);
         else
                 EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
-       if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog))
+       if (emit_rsb_call(&prog, bpf_trampoline_exit(p), image + (prog - (u8 *)rw_image)))
                 return -EINVAL;
   
         *pprog = prog;
@@@ -2312,14 -2379,15 +2379,15 @@@ static int emit_cond_near_jump(u8 **ppr
   
   static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
                       struct bpf_tramp_links *tl, int stack_size,
-                     int run_ctx_off, bool save_ret)
+                     int run_ctx_off, bool save_ret,
+                     void *image, void *rw_image)
   {
         int i;
         u8 *prog = *pprog;
   
         for (i = 0; i < tl->nr_links; i++) {
                 if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
-                                   run_ctx_off, save_ret))
+                                   run_ctx_off, save_ret, image, rw_image))
                         return -EINVAL;
         }
         *pprog = prog;
@@@ -2328,7 -2396,8 +2396,8 @@@
   
   static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
                               struct bpf_tramp_links *tl, int stack_size,
-                             int run_ctx_off, u8 **branches)
+                             int run_ctx_off, u8 **branches,
+                             void *image, void *rw_image)
   {
         u8 *prog = *pprog;
         int i;
@@@ -2339,7 -2408,8 +2408,8 @@@
         emit_mov_imm32(&prog, false, BPF_REG_0, 0);
         emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
         for (i = 0; i < tl->nr_links; i++) {
-               if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true))
+               if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true,
+                                   image, rw_image))
                         return -EINVAL;
   
                 /* mod_ret prog stored return value into [rbp - 8]. Emit:
@@@ -2422,10 -2492,11 +2492,11 @@@
    * add rsp, 8                      // skip eth_type_trans's frame
    * ret                             // return to its caller
    */
- int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
-                               const struct btf_func_model *m, u32 flags,
-                               struct bpf_tramp_links *tlinks,
-                               void *func_addr)
+ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image,
+                                        void *rw_image_end, void *image,
+                                        const struct btf_func_model *m, u32 flags,
+                                        struct bpf_tramp_links *tlinks,
+                                        void *func_addr)
   {
         int i, ret, nr_regs = m->nr_args, stack_size = 0;
         int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
@@@ -2437,10 -2508,19 +2508,19 @@@
         u8 *prog;
         bool save_ret;
   
+       /*
+        * F_INDIRECT is only compatible with F_RET_FENTRY_RET, it is
+        * explicitly incompatible with F_CALL_ORIG | F_SKIP_FRAME | F_IP_ARG
+        * because @func_addr.
+        */
+       WARN_ON_ONCE((flags & BPF_TRAMP_F_INDIRECT) &&
+                    (flags & ~(BPF_TRAMP_F_INDIRECT | BPF_TRAMP_F_RET_FENTRY_RET)));
+ 
         /* extra registers for struct arguments */
-       for (i = 0; i < m->nr_args; i++)
+       for (i = 0; i < m->nr_args; i++) {
                 if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
                         nr_regs += (m->arg_size[i] + 7) / 8 - 1;
+       }
   
         /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6
          * are passed through regs, the remains are through stack.
@@@ -2521,22 -2601,29 +2601,29 @@@
                 orig_call += X86_PATCH_SIZE;
         }
   
-       prog = image;
+       prog = rw_image;
   
-       EMIT_ENDBR();
-       /*
-        * This is the direct-call trampoline, as such it needs accounting
-        * for the __fentry__ call.
-        */
-       x86_call_depth_emit_accounting(&prog, NULL);
+       if (flags & BPF_TRAMP_F_INDIRECT) {
+               /*
+                * Indirect call for bpf_struct_ops
+                */
+               emit_cfi(&prog, cfi_get_func_hash(func_addr));
+       } else {
+               /*
+                * Direct-call fentry stub, as such it needs accounting for the
+                * __fentry__ call.
+                */
+               x86_call_depth_emit_accounting(&prog, NULL);
+       }
         EMIT1(0x55);             /* push rbp */
         EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
-       if (!is_imm8(stack_size))
+       if (!is_imm8(stack_size)) {
                 /* sub rsp, stack_size */
                 EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
-       else
+       } else {
                 /* sub rsp, stack_size */
                 EMIT4(0x48, 0x83, 0xEC, stack_size);
+       }
         if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
                 EMIT1(0x50);            /* push rax */
         /* mov QWORD PTR [rbp - rbx_off], rbx */
@@@ -2563,16 -2650,18 +2650,18 @@@
         if (flags & BPF_TRAMP_F_CALL_ORIG) {
                 /* arg1: mov rdi, im */
                 emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
-               if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
+               if (emit_rsb_call(&prog, __bpf_tramp_enter,
+                                 image + (prog - (u8 *)rw_image))) {
                         ret = -EINVAL;
                         goto cleanup;
                 }
         }
   
-       if (fentry->nr_links)
+       if (fentry->nr_links) {
                 if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
-                              flags & BPF_TRAMP_F_RET_FENTRY_RET))
+                              flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
                         return -EINVAL;
+       }
   
         if (fmod_ret->nr_links) {
                 branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *),
@@@ -2581,7 -2670,7 +2670,7 @@@
                         return -ENOMEM;
   
                 if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off,
-                                      run_ctx_off, branches)) {
+                                      run_ctx_off, branches, image, rw_image)) {
                         ret = -EINVAL;
                         goto cleanup;
                 }
@@@ -2591,25 -2680,26 +2680,26 @@@
                 restore_regs(m, &prog, regs_off);
                 save_args(m, &prog, arg_stack_off, true);
   
-               if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+               if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
                         /* Before calling the original function, restore the
                          * tail_call_cnt from stack to rax.
                          */
                         RESTORE_TAIL_CALL_CNT(stack_size);
+               }
   
                 if (flags & BPF_TRAMP_F_ORIG_STACK) {
                         emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
                         EMIT2(0xff, 0xd3); /* call *rbx */
                 } else {
                         /* call original function */
-                       if (emit_rsb_call(&prog, orig_call, prog)) {
+                       if (emit_rsb_call(&prog, orig_call, image + (prog - (u8 *)rw_image))) {
                                 ret = -EINVAL;
                                 goto cleanup;
                         }
                 }
                 /* remember return value in a stack for bpf prog to access */
                 emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
-               im->ip_after_call = prog;
+               im->ip_after_call = image + (prog - (u8 *)rw_image);
                 memcpy(prog, x86_nops[5], X86_PATCH_SIZE);
                 prog += X86_PATCH_SIZE;
         }
@@@ -2624,16 -2714,19 +2714,19 @@@
                 /* Update the branches saved in invoke_bpf_mod_ret with the
                  * aligned address of do_fexit.
                  */
-               for (i = 0; i < fmod_ret->nr_links; i++)
-                       emit_cond_near_jump(&branches[i], prog, branches[i],
-                                           X86_JNE);
+               for (i = 0; i < fmod_ret->nr_links; i++) {
+                       emit_cond_near_jump(&branches[i], image + (prog - (u8 *)rw_image),
+                                           image + (branches[i] - (u8 *)rw_image), X86_JNE);
+               }
         }
   
-       if (fexit->nr_links)
-               if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) {
+       if (fexit->nr_links) {
+               if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
+                              false, image, rw_image)) {
                         ret = -EINVAL;
                         goto cleanup;
                 }
+       }
   
         if (flags & BPF_TRAMP_F_RESTORE_REGS)
                 restore_regs(m, &prog, regs_off);
@@@ -2643,18 -2736,19 +2736,19 @@@
          * restored to R0.
          */
         if (flags & BPF_TRAMP_F_CALL_ORIG) {
-               im->ip_epilogue = prog;
+               im->ip_epilogue = image + (prog - (u8 *)rw_image);
                 /* arg1: mov rdi, im */
                 emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
-               if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
+               if (emit_rsb_call(&prog, __bpf_tramp_exit, image + (prog - (u8 *)rw_image))) {
                         ret = -EINVAL;
                         goto cleanup;
                 }
-       } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
+       } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
                 /* Before running the original function, restore the
                  * tail_call_cnt from stack to rax.
                  */
                 RESTORE_TAIL_CALL_CNT(stack_size);
+       }
   
         /* restore return value of orig_call or fentry prog back into RAX */
         if (save_ret)
@@@ -2662,22 -2756,94 +2756,94 @@@
   
         emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
         EMIT1(0xC9); /* leave */
-       if (flags & BPF_TRAMP_F_SKIP_FRAME)
+       if (flags & BPF_TRAMP_F_SKIP_FRAME) {
                 /* skip our return address and return to parent */
                 EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
-       emit_return(&prog, prog);
+       }
+       emit_return(&prog, image + (prog - (u8 *)rw_image));
         /* Make sure the trampoline generation logic doesn't overflow */
-       if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
+       if (WARN_ON_ONCE(prog > (u8 *)rw_image_end - BPF_INSN_SAFETY)) {
                 ret = -EFAULT;
                 goto cleanup;
         }
-       ret = prog - (u8 *)image;
+       ret = prog - (u8 *)rw_image + BPF_INSN_SAFETY;
   
   cleanup:
         kfree(branches);
         return ret;
   }
   
+ void *arch_alloc_bpf_trampoline(unsigned int size)
+ {
+       return bpf_prog_pack_alloc(size, jit_fill_hole);
+ }
+ 
+ void arch_free_bpf_trampoline(void *image, unsigned int size)
+ {
+       bpf_prog_pack_free(image, size);
+ }
+ 
+ void arch_protect_bpf_trampoline(void *image, unsigned int size)
+ {
+ }
+ 
+ void arch_unprotect_bpf_trampoline(void *image, unsigned int size)
+ {
+ }
+ 
+ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
+                               const struct btf_func_model *m, u32 flags,
+                               struct bpf_tramp_links *tlinks,
+                               void *func_addr)
+ {
+       void *rw_image, *tmp;
+       int ret;
+       u32 size = image_end - image;
+ 
+       /* rw_image doesn't need to be in module memory range, so we can
+        * use kvmalloc.
+        */
+       rw_image = kvmalloc(size, GFP_KERNEL);
+       if (!rw_image)
+               return -ENOMEM;
+ 
+       ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m,
+                                           flags, tlinks, func_addr);
+       if (ret < 0)
+               goto out;
+ 
+       tmp = bpf_arch_text_copy(image, rw_image, size);
+       if (IS_ERR(tmp))
+               ret = PTR_ERR(tmp);
+ out:
+       kvfree(rw_image);
+       return ret;
+ }
+ 
+ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+                            struct bpf_tramp_links *tlinks, void *func_addr)
+ {
+       struct bpf_tramp_image im;
+       void *image;
+       int ret;
+ 
+       /* Allocate a temporary buffer for __arch_prepare_bpf_trampoline().
+        * This will NOT cause fragmentation in direct map, as we do not
+        * call set_memory_*() on this buffer.
+        *
+        * We cannot use kvmalloc here, because we need image to be in
+        * module memory range.
+        */
+       image = bpf_jit_alloc_exec(PAGE_SIZE);
+       if (!image)
+               return -ENOMEM;
+ 
+       ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image,
+                                           m, flags, tlinks, func_addr);
+       bpf_jit_free_exec(image);
+       return ret;
+ }
+ 
   static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf)
   {
         u8 *jg_reloc, *prog = *pprog;
@@@ -2935,9 -3101,16 +3101,16 @@@ out_image
                         jit_data->header = header;
                         jit_data->rw_header = rw_header;
                 }
-               prog->bpf_func = (void *)image;
+               /*
+                * ctx.prog_offset is used when CFI preambles put code *before*
+                * the function. See emit_cfi(). For FineIBT specifically this code
+                * can also be executed and bpf_prog_kallsyms_add() will
+                * generate an additional symbol to cover this, hence also
+                * decrement proglen.
+                */
+               prog->bpf_func = (void *)image + cfi_get_offset();
                 prog->jited = 1;
-               prog->jited_len = proglen;
+               prog->jited_len = proglen - cfi_get_offset();
         } else {
                 prog = orig_prog;
         }
@@@ -2992,6 -3165,7 +3165,7 @@@ void bpf_jit_free(struct bpf_prog *prog
                         kvfree(jit_data->addrs);
                         kfree(jit_data);
                 }
+               prog->bpf_func = (void *)prog->bpf_func - cfi_get_offset();
                 hdr = bpf_jit_binary_pack_hdr(prog);
                 bpf_jit_binary_pack_free(hdr, NULL);
                 WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
@@@ -3025,49 -3199,3 +3199,49 @@@ void arch_bpf_stack_walk(bool (*consume
   #endif
         WARN(1, "verification of programs using bpf_throw should have failed\n");
   }
+ +
+ +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ +                             struct bpf_prog *new, struct bpf_prog *old)
+ +{
+ +      u8 *old_addr, *new_addr, *old_bypass_addr;
+ +      int ret;
+ +
+ +      old_bypass_addr = old ? NULL : poke->bypass_addr;
+ +      old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
+ +      new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
+ +
+ +      /*
+ +       * On program loading or teardown, the program's kallsym entry
+ +       * might not be in place, so we use __bpf_arch_text_poke to skip
+ +       * the kallsyms check.
+ +       */
+ +      if (new) {
+ +              ret = __bpf_arch_text_poke(poke->tailcall_target,
+ +                                         BPF_MOD_JUMP,
+ +                                         old_addr, new_addr);
+ +              BUG_ON(ret < 0);
+ +              if (!old) {
+ +                      ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ +                                                 BPF_MOD_JUMP,
+ +                                                 poke->bypass_addr,
+ +                                                 NULL);
+ +                      BUG_ON(ret < 0);
+ +              }
+ +      } else {
+ +              ret = __bpf_arch_text_poke(poke->tailcall_bypass,
+ +                                         BPF_MOD_JUMP,
+ +                                         old_bypass_addr,
+ +                                         poke->bypass_addr);
+ +              BUG_ON(ret < 0);
+ +              /* let other CPUs finish the execution of program
+ +               * so that it will not possible to expose them
+ +               * to invalid nop, stack unwind, nop state
+ +               */
+ +              if (!ret)
+ +                      synchronize_rcu();
+ +              ret = __bpf_arch_text_poke(poke->tailcall_target,
+ +                                         BPF_MOD_JUMP,
+ +                                         old_addr, NULL);
+ +              BUG_ON(ret < 0);
+ +      }
+ +}
diff --combined drivers/net/ethernet/intel/ice/ice.h

index 50304e4a4fb020aca4f300bdec9ace141ebf8b87,9cf4ed3d28857da063cb6dca0a72d6444b57c6e8..2defac6d91680a08b6683b78a881cf6da451789c
--- 1/drivers/net/ethernet/intel/ice/ice.h
--- 2/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@@ -360,7 -360,6 +360,7 @@@ struct ice_vsi 
         /* RSS config */
         u16 rss_table_size;     /* HW RSS table size */
         u16 rss_size;           /* Allocated RSS queues */
+ +      u8 rss_hfunc;           /* User configured hash type */
         u8 *rss_hkey_user;      /* User configured hash keys */
         u8 *rss_lut_user;       /* User configured lookup table entries */
         u8 rss_lut_type;        /* used to configure Get/Set RSS LUT AQ call */
@@@ -571,10 -570,6 +571,10 @@@ struct ice_pf 
         struct ice_vsi_stats **vsi_stats;
         struct ice_sw *first_sw;        /* first switch created by firmware */
         u16 eswitch_mode;               /* current mode of eswitch */
+ +      struct dentry *ice_debugfs_pf;
+ +      struct dentry *ice_debugfs_pf_fwlog;
+ +      /* keep track of all the dentrys for FW log modules */
+ +      struct dentry **ice_debugfs_pf_fwlog_modules;
         struct ice_vfs vfs;
         DECLARE_BITMAP(features, ICE_F_MAX);
         DECLARE_BITMAP(state, ICE_STATE_NBITS);
@@@ -660,7 -655,6 +660,7 @@@
   #define ICE_MAX_VF_AGG_NODES          32
         struct ice_agg_node vf_agg_node[ICE_MAX_VF_AGG_NODES];
         struct ice_dplls dplls;
+ +      struct device *hwmon_dev;
   };
   
   extern struct workqueue_struct *ice_lag_wq;
@@@ -894,11 -888,6 +894,11 @@@ static inline bool ice_is_adq_active(st
         return false;
   }
   
+ +void ice_debugfs_fwlog_init(struct ice_pf *pf);
+ +void ice_debugfs_init(void);
+ +void ice_debugfs_exit(void);
+ +void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module);
+ +
   bool netif_is_ice(const struct net_device *dev);
   int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
   int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
@@@ -930,7 -919,6 +930,7 @@@ int ice_set_rss_lut(struct ice_vsi *vsi
   int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
   int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed);
   int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
+ +int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc);
   void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
   int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
   void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
@@@ -1008,4 -996,6 +1008,6 @@@ static inline void ice_clear_rdma_cap(s
         set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
         clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
   }
+ 
+ extern const struct xdp_metadata_ops ice_xdp_md_ops;
   #endif /* _ICE_H_ */
diff --combined drivers/net/ethernet/intel/ice/ice_base.c

index edad5f9ab16ce031d18a1ee23886017744e20e57,a040f02a342e7b3e34a19bfcc399a2dbbcddc03a..b25b7f41596523e1061351383f3cff745e865716
--- 1/drivers/net/ethernet/intel/ice/ice_base.c
--- 2/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@@ -189,18 -189,10 +189,18 @@@ static void ice_free_q_vector(struct ic
         }
         q_vector = vsi->q_vectors[v_idx];
   
- -      ice_for_each_tx_ring(tx_ring, q_vector->tx)
+ +      ice_for_each_tx_ring(tx_ring, q_vector->tx) {
+ +              if (vsi->netdev)
+ +                      netif_queue_set_napi(vsi->netdev, tx_ring->q_index,
+ +                                           NETDEV_QUEUE_TYPE_TX, NULL);
                 tx_ring->q_vector = NULL;
- -      ice_for_each_rx_ring(rx_ring, q_vector->rx)
+ +      }
+ +      ice_for_each_rx_ring(rx_ring, q_vector->rx) {
+ +              if (vsi->netdev)
+ +                      netif_queue_set_napi(vsi->netdev, rx_ring->q_index,
+ +                                           NETDEV_QUEUE_TYPE_RX, NULL);
                 rx_ring->q_vector = NULL;
+ +      }
   
         /* only VSI with an associated netdev is set up with NAPI */
         if (vsi->netdev)
@@@ -527,6 -519,19 +527,19 @@@ static int ice_setup_rx_ctx(struct ice_
         return 0;
   }
   
+ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring)
+ {
+       void *ctx_ptr = &ring->pkt_ctx;
+       struct xsk_cb_desc desc = {};
+ 
+       XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff);
+       desc.src = &ctx_ptr;
+       desc.off = offsetof(struct ice_xdp_buff, pkt_ctx) -
+                  sizeof(struct xdp_buff);
+       desc.bytes = sizeof(ctx_ptr);
+       xsk_pool_fill_cb(ring->xsk_pool, &desc);
+ }
+ 
   /**
    * ice_vsi_cfg_rxq - Configure an Rx queue
    * @ring: the ring being configured
@@@ -561,6 -566,7 +574,7 @@@ int ice_vsi_cfg_rxq(struct ice_rx_ring 
                         if (err)
                                 return err;
                         xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
+                       ice_xsk_pool_fill_cb(ring);
   
                         dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
                                  ring->q_index);
@@@ -583,6 -589,7 +597,7 @@@
   
         xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq);
         ring->xdp.data = NULL;
+       ring->xdp_ext.pkt_ctx = &ring->pkt_ctx;
         err = ice_setup_rx_ctx(ring);
         if (err) {
                 dev_err(dev, "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
diff --combined drivers/net/ethernet/intel/ice/ice_main.c

index 9b0c04d595ced6ed9051be9a65e5eaeb7696e634,86f704850aa6ed88f1d95832e06c571ec67adf4d..b97d116650beb00731ec1ac4677ef25cd177eb10
--- 1/drivers/net/ethernet/intel/ice/ice_main.c
--- 2/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@@ -14,7 -14,6 +14,7 @@@
   #include "ice_dcb_lib.h"
   #include "ice_dcb_nl.h"
   #include "ice_devlink.h"
+ +#include "ice_hwmon.h"
   /* Including ice_trace.h with CREATE_TRACE_POINTS defined will generate the
    * ice tracepoint functions. This must be done exactly once across the
    * ice driver.
@@@ -1252,32 -1251,6 +1252,32 @@@ ice_handle_link_event(struct ice_pf *pf
         return status;
   }
   
+ +/**
+ + * ice_get_fwlog_data - copy the FW log data from ARQ event
+ + * @pf: PF that the FW log event is associated with
+ + * @event: event structure containing FW log data
+ + */
+ +static void
+ +ice_get_fwlog_data(struct ice_pf *pf, struct ice_rq_event_info *event)
+ +{
+ +      struct ice_fwlog_data *fwlog;
+ +      struct ice_hw *hw = &pf->hw;
+ +
+ +      fwlog = &hw->fwlog_ring.rings[hw->fwlog_ring.tail];
+ +
+ +      memset(fwlog->data, 0, PAGE_SIZE);
+ +      fwlog->data_size = le16_to_cpu(event->desc.datalen);
+ +
+ +      memcpy(fwlog->data, event->msg_buf, fwlog->data_size);
+ +      ice_fwlog_ring_increment(&hw->fwlog_ring.tail, hw->fwlog_ring.size);
+ +
+ +      if (ice_fwlog_ring_full(&hw->fwlog_ring)) {
+ +              /* the rings are full so bump the head to create room */
+ +              ice_fwlog_ring_increment(&hw->fwlog_ring.head,
+ +                                       hw->fwlog_ring.size);
+ +      }
+ +}
+ +
   /**
    * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware
    * @pf: pointer to the PF private structure
@@@ -1559,8 -1532,8 +1559,8 @@@ static int __ice_clean_ctrlq(struct ice
   
                         ice_vc_process_vf_msg(pf, &event, &data);
                         break;
- -              case ice_aqc_opc_fw_logging:
- -                      ice_output_fw_log(hw, &event.desc, event.msg_buf);
+ +              case ice_aqc_opc_fw_logs_event:
+ +                      ice_get_fwlog_data(pf, &event);
                         break;
                 case ice_aqc_opc_lldp_set_mib_change:
                         ice_dcb_process_lldp_set_mib_change(pf, &event);
@@@ -3177,7 -3150,7 +3177,7 @@@ static irqreturn_t ice_misc_intr(int __
   
         if (oicr & PFINT_OICR_TSYN_TX_M) {
                 ena_mask &= ~PFINT_OICR_TSYN_TX_M;
- -              if (!hw->reset_ongoing && ice_ptp_pf_handles_tx_interrupt(pf))
+ +              if (ice_ptp_pf_handles_tx_interrupt(pf))
                         set_bit(ICE_MISC_THREAD_TX_TSTAMP, pf->misc_thread);
         }
   
@@@ -3402,11 -3375,9 +3402,11 @@@ static void ice_napi_add(struct ice_vs
         if (!vsi->netdev)
                 return;
   
- -      ice_for_each_q_vector(vsi, v_idx)
+ +      ice_for_each_q_vector(vsi, v_idx) {
                 netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx]->napi,
                                ice_napi_poll);
+ +              ice_q_vector_set_napi_queues(vsi->q_vectors[v_idx], false);
+ +      }
   }
   
   /**
@@@ -3426,6 -3397,7 +3426,7 @@@ static void ice_set_ops(struct ice_vsi 
   
         netdev->netdev_ops = &ice_netdev_ops;
         netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic;
+       netdev->xdp_metadata_ops = &ice_xdp_md_ops;
         ice_set_ethtool_ops(netdev);
   
         if (vsi->type != ICE_VSI_PF)
@@@ -4389,19 -4361,6 +4390,19 @@@ static void ice_print_wake_reason(struc
         dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
   }
   
+ +/**
+ + * ice_pf_fwlog_update_module - update 1 module
+ + * @pf: pointer to the PF struct
+ + * @log_level: log_level to use for the @module
+ + * @module: module to update
+ + */
+ +void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module)
+ +{
+ +      struct ice_hw *hw = &pf->hw;
+ +
+ +      hw->fwlog_cfg.module_entries[module].log_level = log_level;
+ +}
+ +
   /**
    * ice_register_netdev - register netdev
    * @vsi: pointer to the VSI struct
@@@ -4727,8 -4686,6 +4728,8 @@@ static void ice_init_features(struct ic
   
         if (ice_init_lag(pf))
                 dev_warn(dev, "Failed to init link aggregation support\n");
+ +
+ +      ice_hwmon_init(pf);
   }
   
   static void ice_deinit_features(struct ice_pf *pf)
@@@ -5249,15 -5206,11 +5250,15 @@@ static void ice_remove(struct pci_dev *
                 msleep(100);
         }
   
+ +      ice_debugfs_exit();
+ +
         if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
                 set_bit(ICE_VF_RESETS_DISABLED, pf->state);
                 ice_free_vfs(pf);
         }
   
+ +      ice_hwmon_exit(pf);
+ +
         ice_service_task_stop(pf);
         ice_aq_cancel_waiting_tasks(pf);
         set_bit(ICE_DOWN, pf->state);
@@@ -5722,8 -5675,6 +5723,8 @@@ static int __init ice_module_init(void
                 goto err_dest_wq;
         }
   
+ +      ice_debugfs_init();
+ +
         status = pci_register_driver(&ice_driver);
         if (status) {
                 pr_err("failed to register PCI driver, err %d\n", status);
@@@ -5734,7 -5685,6 +5735,7 @@@
   
   err_dest_lag_wq:
         destroy_workqueue(ice_lag_wq);
+ +      ice_debugfs_exit();
   err_dest_wq:
         destroy_workqueue(ice_wq);
         return status;
@@@ -6093,6 -6043,23 +6094,23 @@@ ice_fix_features(struct net_device *net
         return features;
   }
   
+ /**
+  * ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto
+  * @vsi: PF's VSI
+  * @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order
+  *
+  * Store current stripped VLAN proto in ring packet context,
+  * so it can be accessed more efficiently by packet processing code.
+  */
+ static void
+ ice_set_rx_rings_vlan_proto(struct ice_vsi *vsi, __be16 vlan_ethertype)
+ {
+       u16 i;
+ 
+       ice_for_each_alloc_rxq(vsi, i)
+               vsi->rx_rings[i]->pkt_ctx.vlan_proto = vlan_ethertype;
+ }
+ 
   /**
    * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI
    * @vsi: PF's VSI
@@@ -6135,6 -6102,9 +6153,9 @@@ ice_set_vlan_offload_features(struct ic
         if (strip_err || insert_err)
                 return -EIO;
   
+       ice_set_rx_rings_vlan_proto(vsi, enable_stripping ?
+                                   htons(vlan_ethertype) : 0);
+ 
         return 0;
   }
   
@@@ -7756,59 -7726,6 +7777,59 @@@ int ice_get_rss_key(struct ice_vsi *vsi
         return status;
   }
   
+ +/**
+ + * ice_set_rss_hfunc - Set RSS HASH function
+ + * @vsi: Pointer to VSI structure
+ + * @hfunc: hash function (ICE_AQ_VSI_Q_OPT_RSS_*)
+ + *
+ + * Returns 0 on success, negative on failure
+ + */
+ +int ice_set_rss_hfunc(struct ice_vsi *vsi, u8 hfunc)
+ +{
+ +      struct ice_hw *hw = &vsi->back->hw;
+ +      struct ice_vsi_ctx *ctx;
+ +      bool symm;
+ +      int err;
+ +
+ +      if (hfunc == vsi->rss_hfunc)
+ +              return 0;
+ +
+ +      if (hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ &&
+ +          hfunc != ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ)
+ +              return -EOPNOTSUPP;
+ +
+ +      ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ +      if (!ctx)
+ +              return -ENOMEM;
+ +
+ +      ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID);
+ +      ctx->info.q_opt_rss = vsi->info.q_opt_rss;
+ +      ctx->info.q_opt_rss &= ~ICE_AQ_VSI_Q_OPT_RSS_HASH_M;
+ +      ctx->info.q_opt_rss |=
+ +              FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hfunc);
+ +      ctx->info.q_opt_tc = vsi->info.q_opt_tc;
+ +      ctx->info.q_opt_flags = vsi->info.q_opt_rss;
+ +
+ +      err = ice_update_vsi(hw, vsi->idx, ctx, NULL);
+ +      if (err) {
+ +              dev_err(ice_pf_to_dev(vsi->back), "Failed to configure RSS hash for VSI %d, error %d\n",
+ +                      vsi->vsi_num, err);
+ +      } else {
+ +              vsi->info.q_opt_rss = ctx->info.q_opt_rss;
+ +              vsi->rss_hfunc = hfunc;
+ +              netdev_info(vsi->netdev, "Hash function set to: %sToeplitz\n",
+ +                          hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ ?
+ +                          "Symmetric " : "");
+ +      }
+ +      kfree(ctx);
+ +      if (err)
+ +              return err;
+ +
+ +      /* Fix the symmetry setting for all existing RSS configurations */
+ +      symm = !!(hfunc == ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ);
+ +      return ice_set_rss_cfg_symm(hw, vsi, symm);
+ +}
+ +
   /**
    * ice_bridge_getlink - Get the hardware bridge mode
    * @skb: skb buff
@@@ -8244,12 -8161,13 +8265,12 @@@ static int ice_add_vsi_to_fdir(struct i
   
                 for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
                         enum ice_flow_priority prio;
- -                      u64 prof_id;
   
                         /* add this VSI to FDir profile for this flow */
                         prio = ICE_FLOW_PRIO_NORMAL;
                         prof = hw->fdir_prof[flow];
- -                      prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
- -                      status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id,
+ +                      status = ice_flow_add_entry(hw, ICE_BLK_FD,
+ +                                                  prof->prof_id[tun],
                                                     prof->vsi_h[0], vsi->idx,
                                                     prio, prof->fdir_seg[tun],
                                                     &entry_h);
diff --combined drivers/net/ethernet/intel/ice/ice_ptp.c

index e9e59f4b5580b2da1dfcf62e484ee45205ecf9af,a4d3a9ee409a0d4911a4e160164df29b23ffacf4..239cf8a2ee8011aa645d55277ee6b2534de9e7e8
--- 1/drivers/net/ethernet/intel/ice/ice_ptp.c
--- 2/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@@ -7,7 -7,7 +7,7 @@@
   
   #define E810_OUT_PROP_DELAY_NS 1
   
- -#define UNKNOWN_INCVAL_E822 0x100000000ULL
+ +#define UNKNOWN_INCVAL_E82X 0x100000000ULL
   
   static const struct ptp_pin_desc ice_pin_desc_e810t[] = {
         /* name    idx   func         chan */
@@@ -705,9 -705,7 +705,9 @@@ static enum ice_tx_tstamp_work ice_ptp_
   
                 /* Read the Tx ready status first */
                 err = ice_get_phy_tx_tstamp_ready(&pf->hw, i, &tstamp_ready);
- -              if (err || tstamp_ready)
+ +              if (err)
+ +                      break;
+ +              else if (tstamp_ready)
                         return ICE_TX_TSTAMP_WORK_PENDING;
         }
   
@@@ -877,7 -875,7 +877,7 @@@ ice_ptp_release_tx_tracker(struct ice_p
   }
   
   /**
- - * ice_ptp_init_tx_e822 - Initialize tracking for Tx timestamps
+ + * ice_ptp_init_tx_e82x - Initialize tracking for Tx timestamps
    * @pf: Board private structure
    * @tx: the Tx tracking structure to initialize
    * @port: the port this structure tracks
@@@ -888,11 -886,11 +888,11 @@@
    * registers into chunks based on the port number.
    */
   static int
- -ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
+ +ice_ptp_init_tx_e82x(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
   {
         tx->block = port / ICE_PORTS_PER_QUAD;
- -      tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E822;
- -      tx->len = INDEX_PER_PORT_E822;
+ +      tx->offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT_E82X;
+ +      tx->len = INDEX_PER_PORT_E82X;
         tx->verify_cached = 0;
   
         return ice_ptp_alloc_tx_tracker(tx);
@@@ -1095,10 -1093,10 +1095,10 @@@ static u64 ice_base_incval(struct ice_p
   
         if (ice_is_e810(hw))
                 incval = ICE_PTP_NOMINAL_INCVAL_E810;
- -      else if (ice_e822_time_ref(hw) < NUM_ICE_TIME_REF_FREQ)
- -              incval = ice_e822_nominal_incval(ice_e822_time_ref(hw));
+ +      else if (ice_e82x_time_ref(hw) < NUM_ICE_TIME_REF_FREQ)
+ +              incval = ice_e82x_nominal_incval(ice_e82x_time_ref(hw));
         else
- -              incval = UNKNOWN_INCVAL_E822;
+ +              incval = UNKNOWN_INCVAL_E82X;
   
         dev_dbg(ice_pf_to_dev(pf), "PTP: using base increment value of 0x%016llx\n",
                 incval);
@@@ -1127,10 -1125,10 +1127,10 @@@ static int ice_ptp_check_tx_fifo(struc
   
         /* need to read FIFO state */
         if (offs == 0 || offs == 1)
- -              err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO01_STATUS,
+ +              err = ice_read_quad_reg_e82x(hw, quad, Q_REG_FIFO01_STATUS,
                                              &val);
         else
- -              err = ice_read_quad_reg_e822(hw, quad, Q_REG_FIFO23_STATUS,
+ +              err = ice_read_quad_reg_e82x(hw, quad, Q_REG_FIFO23_STATUS,
                                              &val);
   
         if (err) {
@@@ -1158,7 -1156,7 +1158,7 @@@
                 dev_dbg(ice_pf_to_dev(pf),
                         "Port %d Tx FIFO still not empty; resetting quad %d\n",
                         port->port_num, quad);
- -              ice_ptp_reset_ts_memory_quad_e822(hw, quad);
+ +              ice_ptp_reset_ts_memory_quad_e82x(hw, quad);
                 port->tx_fifo_busy_cnt = FIFO_OK;
                 return 0;
         }
@@@ -1203,8 -1201,8 +1203,8 @@@ static void ice_ptp_wait_for_offsets(st
   
         tx_err = ice_ptp_check_tx_fifo(port);
         if (!tx_err)
- -              tx_err = ice_phy_cfg_tx_offset_e822(hw, port->port_num);
- -      rx_err = ice_phy_cfg_rx_offset_e822(hw, port->port_num);
+ +              tx_err = ice_phy_cfg_tx_offset_e82x(hw, port->port_num);
+ +      rx_err = ice_phy_cfg_rx_offset_e82x(hw, port->port_num);
         if (tx_err || rx_err) {
                 /* Tx and/or Rx offset not yet configured, try again later */
                 kthread_queue_delayed_work(pf->ptp.kworker,
@@@ -1233,7 -1231,7 +1233,7 @@@ ice_ptp_port_phy_stop(struct ice_ptp_po
   
         kthread_cancel_delayed_work_sync(&ptp_port->ov_work);
   
- -      err = ice_stop_phy_timer_e822(hw, port, true);
+ +      err = ice_stop_phy_timer_e82x(hw, port, true);
         if (err)
                 dev_err(ice_pf_to_dev(pf), "PTP failed to set PHY port %d down, err %d\n",
                         port, err);
@@@ -1276,7 -1274,7 +1276,7 @@@ ice_ptp_port_phy_restart(struct ice_ptp
         ptp_port->tx_fifo_busy_cnt = 0;
   
         /* Start the PHY timer in Vernier mode */
- -      err = ice_start_phy_timer_e822(hw, port);
+ +      err = ice_start_phy_timer_e82x(hw, port);
         if (err)
                 goto out_unlock;
   
@@@ -1325,7 -1323,7 +1325,7 @@@ void ice_ptp_link_change(struct ice_pf 
         case ICE_PHY_E810:
                 /* Do not reconfigure E810 PHY */
                 return;
- -      case ICE_PHY_E822:
+ +      case ICE_PHY_E82X:
                 ice_ptp_port_phy_restart(ptp_port);
                 return;
         default:
@@@ -1351,7 -1349,7 +1351,7 @@@ static int ice_ptp_tx_ena_intr(struct i
         ice_ptp_reset_ts_memory(hw);
   
         for (quad = 0; quad < ICE_MAX_QUAD; quad++) {
- -              err = ice_read_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
+ +              err = ice_read_quad_reg_e82x(hw, quad, Q_REG_TX_MEM_GBL_CFG,
                                              &val);
                 if (err)
                         break;
@@@ -1365,7 -1363,7 +1365,7 @@@
                         val &= ~Q_REG_TX_MEM_GBL_CFG_INTR_ENA_M;
                 }
   
- -              err = ice_write_quad_reg_e822(hw, quad, Q_REG_TX_MEM_GBL_CFG,
+ +              err = ice_write_quad_reg_e82x(hw, quad, Q_REG_TX_MEM_GBL_CFG,
                                               val);
                 if (err)
                         break;
@@@ -1603,7 -1601,7 +1603,7 @@@ static int ice_ptp_cfg_clkout(struct ic
         if (ice_is_e810(hw))
                 start_time -= E810_OUT_PROP_DELAY_NS;
         else
- -              start_time -= ice_e822_pps_delay(ice_e822_time_ref(hw));
+ +              start_time -= ice_e82x_pps_delay(ice_e82x_time_ref(hw));
   
         /* 2. Write TARGET time */
         wr32(hw, GLTSYN_TGT_L(chan, tmr_idx), lower_32_bits(start_time));
@@@ -1842,7 -1840,7 +1842,7 @@@ ice_ptp_settime64(struct ptp_clock_inf
         ice_ptp_enable_all_clkout(pf);
   
         /* Recalibrate and re-enable timestamp blocks for E822/E823 */
- -      if (hw->phy_model == ICE_PHY_E822)
+ +      if (hw->phy_model == ICE_PHY_E82X)
                 ice_ptp_restart_all_phy(pf);
   exit:
         if (err) {
@@@ -2129,30 -2127,26 +2129,26 @@@ int ice_ptp_set_ts_config(struct ice_p
   }
   
   /**
-  * ice_ptp_rx_hwtstamp - Check for an Rx timestamp
-  * @rx_ring: Ring to get the VSI info
+  * ice_ptp_get_rx_hwts - Get packet Rx timestamp in ns
    * @rx_desc: Receive descriptor
-  * @skb: Particular skb to send timestamp with
+  * @pkt_ctx: Packet context to get the cached time
    *
    * The driver receives a notification in the receive descriptor with timestamp.
-  * The timestamp is in ns, so we must convert the result first.
    */
- void
- ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
-                   union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
+ u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
+                       const struct ice_pkt_ctx *pkt_ctx)
   {
-       struct skb_shared_hwtstamps *hwtstamps;
         u64 ts_ns, cached_time;
         u32 ts_high;
   
         if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID))
-               return;
+               return 0;
   
-       cached_time = READ_ONCE(rx_ring->cached_phctime);
+       cached_time = READ_ONCE(pkt_ctx->cached_phctime);
   
         /* Do not report a timestamp if we don't have a cached PHC time */
         if (!cached_time)
-               return;
+               return 0;
   
         /* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached
          * PHC value, rather than accessing the PF. This also allows us to
@@@ -2163,9 -2157,7 +2159,7 @@@
         ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
         ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high);
   
-       hwtstamps = skb_hwtstamps(skb);
-       memset(hwtstamps, 0, sizeof(*hwtstamps));
-       hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
+       return ts_ns;
   }
   
   /**
@@@ -2442,54 -2434,6 +2436,54 @@@ enum ice_tx_tstamp_work ice_ptp_process
         }
   }
   
+ +/**
+ + * ice_ptp_maybe_trigger_tx_interrupt - Trigger Tx timstamp interrupt
+ + * @pf: Board private structure
+ + *
+ + * The device PHY issues Tx timestamp interrupts to the driver for processing
+ + * timestamp data from the PHY. It will not interrupt again until all
+ + * current timestamp data is read. In rare circumstances, it is possible that
+ + * the driver fails to read all outstanding data.
+ + *
+ + * To avoid getting permanently stuck, periodically check if the PHY has
+ + * outstanding timestamp data. If so, trigger an interrupt from software to
+ + * process this data.
+ + */
+ +static void ice_ptp_maybe_trigger_tx_interrupt(struct ice_pf *pf)
+ +{
+ +      struct device *dev = ice_pf_to_dev(pf);
+ +      struct ice_hw *hw = &pf->hw;
+ +      bool trigger_oicr = false;
+ +      unsigned int i;
+ +
+ +      if (ice_is_e810(hw))
+ +              return;
+ +
+ +      if (!ice_pf_src_tmr_owned(pf))
+ +              return;
+ +
+ +      for (i = 0; i < ICE_MAX_QUAD; i++) {
+ +              u64 tstamp_ready;
+ +              int err;
+ +
+ +              err = ice_get_phy_tx_tstamp_ready(&pf->hw, i, &tstamp_ready);
+ +              if (!err && tstamp_ready) {
+ +                      trigger_oicr = true;
+ +                      break;
+ +              }
+ +      }
+ +
+ +      if (trigger_oicr) {
+ +              /* Trigger a software interrupt, to ensure this data
+ +               * gets processed.
+ +               */
+ +              dev_dbg(dev, "PTP periodic task detected waiting timestamps. Triggering Tx timestamp interrupt now.\n");
+ +
+ +              wr32(hw, PFINT_OICR, PFINT_OICR_TSYN_TX_M);
+ +              ice_flush(hw);
+ +      }
+ +}
+ +
   static void ice_ptp_periodic_work(struct kthread_work *work)
   {
         struct ice_ptp *ptp = container_of(work, struct ice_ptp, work.work);
@@@ -2501,8 -2445,6 +2495,8 @@@
   
         err = ice_ptp_update_cached_phctime(pf);
   
+ +      ice_ptp_maybe_trigger_tx_interrupt(pf);
+ +
         /* Run twice a second or reschedule if phc update failed */
         kthread_queue_delayed_work(ptp->kworker, &ptp->work,
                                    msecs_to_jiffies(err ? 10 : 500));
@@@ -2520,10 -2462,12 +2514,10 @@@ void ice_ptp_reset(struct ice_pf *pf
         int err, itr = 1;
         u64 time_diff;
   
- -      if (test_bit(ICE_PFR_REQ, pf->state))
+ +      if (test_bit(ICE_PFR_REQ, pf->state) ||
+ +          !ice_pf_src_tmr_owned(pf))
                 goto pfr;
   
- -      if (!ice_pf_src_tmr_owned(pf))
- -              goto reset_ts;
- -
         err = ice_ptp_init_phc(hw);
         if (err)
                 goto err;
@@@ -2567,6 -2511,10 +2561,6 @@@
                         goto err;
         }
   
- -reset_ts:
- -      /* Restart the PHY timestamping block */
- -      ice_ptp_reset_phy_timestamping(pf);
- -
   pfr:
         /* Init Tx structures */
         if (ice_is_e810(&pf->hw)) {
@@@ -2574,7 -2522,7 +2568,7 @@@
         } else {
                 kthread_init_delayed_work(&ptp->port.ov_work,
                                           ice_ptp_wait_for_offsets);
- -              err = ice_ptp_init_tx_e822(pf, &ptp->port.tx,
+ +              err = ice_ptp_init_tx_e82x(pf, &ptp->port.tx,
                                            ptp->port.port_num);
         }
         if (err)
@@@ -2582,11 -2530,6 +2576,11 @@@
   
         set_bit(ICE_FLAG_PTP, pf->flags);
   
+ +      /* Restart the PHY timestamping block */
+ +      if (!test_bit(ICE_PFR_REQ, pf->state) &&
+ +          ice_pf_src_tmr_owned(pf))
+ +              ice_ptp_restart_all_phy(pf);
+ +
         /* Start periodic work going */
         kthread_queue_delayed_work(ptp->kworker, &ptp->work, 0);
   
@@@ -2947,11 -2890,11 +2941,11 @@@ static int ice_ptp_init_port(struct ice
         switch (hw->phy_model) {
         case ICE_PHY_E810:
                 return ice_ptp_init_tx_e810(pf, &ptp_port->tx);
- -      case ICE_PHY_E822:
+ +      case ICE_PHY_E82X:
                 kthread_init_delayed_work(&ptp_port->ov_work,
                                           ice_ptp_wait_for_offsets);
   
- -              return ice_ptp_init_tx_e822(pf, &ptp_port->tx,
+ +              return ice_ptp_init_tx_e82x(pf, &ptp_port->tx,
                                             ptp_port->port_num);
         default:
                 return -ENODEV;
@@@ -3038,7 -2981,7 +3032,7 @@@ static void ice_ptp_remove_auxbus_devic
   static void ice_ptp_init_tx_interrupt_mode(struct ice_pf *pf)
   {
         switch (pf->hw.phy_model) {
- -      case ICE_PHY_E822:
+ +      case ICE_PHY_E82X:
                 /* E822 based PHY has the clock owner process the interrupt
                  * for all ports.
                  */
diff --combined drivers/net/ethernet/intel/ice/ice_ptp.h

index d7928106140970ec92957b3cf2eb1a0bec73fd03,5c6450e4f2f2d05a593e9ef54081f43eaec3b494..032653a7a133cde3d716579f9cd9eac326521ae5
--- 1/drivers/net/ethernet/intel/ice/ice_ptp.h
--- 2/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@@ -147,7 -147,7 +147,7 @@@ struct ice_ptp_tx 
   
   /* Quad and port information for initializing timestamp blocks */
   #define INDEX_PER_QUAD                        64
- -#define INDEX_PER_PORT_E822           16
+ +#define INDEX_PER_PORT_E82X           16
   #define INDEX_PER_PORT_E810           64
   
   /**
@@@ -298,9 -298,8 +298,8 @@@ void ice_ptp_extts_event(struct ice_pf 
   s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
   enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf);
   
- void
- ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
-                   union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
+ u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
+                       const struct ice_pkt_ctx *pkt_ctx);
   void ice_ptp_reset(struct ice_pf *pf);
   void ice_ptp_prepare_for_reset(struct ice_pf *pf);
   void ice_ptp_init(struct ice_pf *pf);
@@@ -329,9 -328,14 +328,14 @@@ static inline bool ice_ptp_process_ts(s
   {
         return true;
   }
- static inline void
- ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
-                   union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
+ 
+ static inline u64
+ ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc,
+                   const struct ice_pkt_ctx *pkt_ctx)
+ {
+       return 0;
+ }
+ 
   static inline void ice_ptp_reset(struct ice_pf *pf) { }
   static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
   static inline void ice_ptp_init(struct ice_pf *pf) { }
diff --combined drivers/net/veth.c

index 977861c46b1fe16a27872b5df76e067409ae2da2,1efdbe4b92f53ac0dae90def3c7d6ecf3f9ffb03..578e36ea1589c11f1ca26b6e05a84b455d22999e
--- 1/drivers/net/veth.c
--- 2/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@@ -790,8 -790,7 +790,8 @@@ static int veth_convert_skb_to_xdp_buff
   
                         skb_add_rx_frag(nskb, i, page, page_offset, size,
                                         truesize);
- -                      if (skb_copy_bits(skb, off, page_address(page),
+ +                      if (skb_copy_bits(skb, off,
+ +                                        page_address(page) + page_offset,
                                           size)) {
                                 consume_skb(nskb);
                                 goto drop;
@@@ -1723,6 -1722,24 +1723,24 @@@ static int veth_xdp_rx_hash(const struc
         return 0;
   }
   
+ static int veth_xdp_rx_vlan_tag(const struct xdp_md *ctx, __be16 *vlan_proto,
+                               u16 *vlan_tci)
+ {
+       const struct veth_xdp_buff *_ctx = (void *)ctx;
+       const struct sk_buff *skb = _ctx->skb;
+       int err;
+ 
+       if (!skb)
+               return -ENODATA;
+ 
+       err = __vlan_hwaccel_get_tag(skb, vlan_tci);
+       if (err)
+               return err;
+ 
+       *vlan_proto = skb->vlan_proto;
+       return err;
+ }
+ 
   static const struct net_device_ops veth_netdev_ops = {
         .ndo_init            = veth_dev_init,
         .ndo_open            = veth_open,
@@@ -1747,6 -1764,7 +1765,7 @@@
   static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
         .xmo_rx_timestamp               = veth_xdp_rx_timestamp,
         .xmo_rx_hash                    = veth_xdp_rx_hash,
+       .xmo_rx_vlan_tag                = veth_xdp_rx_vlan_tag,
   };
   
   #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
diff --combined include/linux/bpf.h

index d5d8e2083610be2cd60b95a0283d58fa5f8468b8,5e694934cf37a31a25e7726653f513d495dadc7f..2f54cc0436c4d26444df3756344b08f46800c93f
--- 1/include/linux/bpf.h
--- 2/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -29,6 -29,7 +29,7 @@@
   #include <linux/rcupdate_trace.h>
   #include <linux/static_call.h>
   #include <linux/memcontrol.h>
+ #include <linux/cfi.h>
   
   struct bpf_verifier_env;
   struct bpf_verifier_log;
@@@ -51,6 -52,10 +52,10 @@@ struct module
   struct bpf_func_state;
   struct ftrace_ops;
   struct cgroup;
+ struct bpf_token;
+ struct user_namespace;
+ struct super_block;
+ struct inode;
   
   extern struct idr btf_idr;
   extern spinlock_t btf_idr_lock;
@@@ -106,7 -111,11 +111,11 @@@ struct bpf_map_ops 
         /* funcs called by prog_array and perf_event_array map */
         void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
                                 int fd);
-       void (*map_fd_put_ptr)(void *ptr);
+       /* If need_defer is true, the implementation should guarantee that
+        * the to-be-put element is still alive before the bpf program, which
+        * may manipulate it, exists.
+        */
+       void (*map_fd_put_ptr)(struct bpf_map *map, void *ptr, bool need_defer);
         int (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf);
         u32 (*map_fd_sys_lookup_elem)(void *ptr);
         void (*map_seq_show_elem)(struct bpf_map *map, void *key,
@@@ -272,7 -281,11 +281,11 @@@ struct bpf_map 
          */
         atomic64_t refcnt ____cacheline_aligned;
         atomic64_t usercnt;
-       struct work_struct work;
+       /* rcu is used before freeing and work is only used during freeing */
+       union {
+               struct work_struct work;
+               struct rcu_head rcu;
+       };
         struct mutex freeze_mutex;
         atomic64_t writecnt;
         /* 'Ownership' of program-containing map is claimed by the first program
@@@ -288,6 -301,9 +301,9 @@@
         } owner;
         bool bypass_spec_v1;
         bool frozen; /* write-once; write-protected by freeze_mutex */
+       bool free_after_mult_rcu_gp;
+       bool free_after_rcu_gp;
+       atomic64_t sleepable_refcnt;
         s64 __percpu *elem_count;
   };
   
@@@ -1044,6 -1060,17 +1060,17 @@@ struct btf_func_model 
    */
   #define BPF_TRAMP_F_TAIL_CALL_CTX     BIT(7)
   
+ /*
+  * Indicate the trampoline should be suitable to receive indirect calls;
+  * without this indirectly calling the generated code can result in #UD/#CP,
+  * depending on the CFI options.
+  *
+  * Used by bpf_struct_ops.
+  *
+  * Incompatible with FENTRY usage, overloads @func_addr argument.
+  */
+ #define BPF_TRAMP_F_INDIRECT          BIT(8)
+ 
   /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
    * bytes on x86.
    */
@@@ -1083,10 -1110,17 +1110,17 @@@ struct bpf_tramp_run_ctx
    *      fexit = a set of program to run after original function
    */
   struct bpf_tramp_image;
- int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
+ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
                                 const struct btf_func_model *m, u32 flags,
                                 struct bpf_tramp_links *tlinks,
-                               void *orig_call);
+                               void *func_addr);
+ void *arch_alloc_bpf_trampoline(unsigned int size);
+ void arch_free_bpf_trampoline(void *image, unsigned int size);
+ void arch_protect_bpf_trampoline(void *image, unsigned int size);
+ void arch_unprotect_bpf_trampoline(void *image, unsigned int size);
+ int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
+                            struct bpf_tramp_links *tlinks, void *func_addr);
+ 
   u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
                                              struct bpf_tramp_run_ctx *run_ctx);
   void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
@@@ -1119,6 -1153,7 +1153,7 @@@ enum bpf_tramp_prog_type 
   
   struct bpf_tramp_image {
         void *image;
+       int size;
         struct bpf_ksym ksym;
         struct percpu_ref pcref;
         void *ip_after_call;
@@@ -1188,7 -1223,11 +1223,11 @@@ struct bpf_dispatcher 
   #endif
   };
   
- static __always_inline __nocfi unsigned int bpf_dispatcher_nop_func(
+ #ifndef __bpfcall
+ #define __bpfcall __nocfi
+ #endif
+ 
+ static __always_inline __bpfcall unsigned int bpf_dispatcher_nop_func(
         const void *ctx,
         const struct bpf_insn *insnsi,
         bpf_func_t bpf_func)
@@@ -1280,7 -1319,7 +1319,7 @@@ int arch_prepare_bpf_dispatcher(void *i
   
   #define DEFINE_BPF_DISPATCHER(name)                                   \
         __BPF_DISPATCHER_SC(name);                                      \
-       noinline __nocfi unsigned int bpf_dispatcher_##name##_func(     \
+       noinline __bpfcall unsigned int bpf_dispatcher_##name##_func(   \
                 const void *ctx,                                        \
                 const struct bpf_insn *insnsi,                          \
                 bpf_func_t bpf_func)                                    \
@@@ -1303,7 -1342,7 +1342,7 @@@
   void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
                                 struct bpf_prog *to);
   /* Called only from JIT-enabled code, so there's no need for stubs. */
- void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
+ void bpf_image_ksym_add(void *data, unsigned int size, struct bpf_ksym *ksym);
   void bpf_image_ksym_del(struct bpf_ksym *ksym);
   void bpf_ksym_add(struct bpf_ksym *ksym);
   void bpf_ksym_del(struct bpf_ksym *ksym);
@@@ -1430,6 -1469,9 +1469,9 @@@ struct bpf_prog_aux 
         struct bpf_kfunc_desc_tab *kfunc_tab;
         struct bpf_kfunc_btf_tab *kfunc_btf_tab;
         u32 size_poke_tab;
+ #ifdef CONFIG_FINEIBT
+       struct bpf_ksym ksym_prefix;
+ #endif
         struct bpf_ksym ksym;
         const struct bpf_prog_ops *ops;
         struct bpf_map **used_maps;
@@@ -1442,10 -1484,11 +1484,11 @@@
         int cgroup_atype; /* enum cgroup_bpf_attach_type */
         struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
         char name[BPF_OBJ_NAME_LEN];
-       unsigned int (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp);
+       u64 (*bpf_exception_cb)(u64 cookie, u64 sp, u64 bp, u64, u64);
   #ifdef CONFIG_SECURITY
         void *security;
   #endif
+       struct bpf_token *token;
         struct bpf_prog_offload *offload;
         struct btf *btf;
         struct bpf_func_info *func_info;
@@@ -1570,6 -1613,31 +1613,31 @@@ struct bpf_link_primer 
         u32 id;
   };
   
+ struct bpf_mount_opts {
+       kuid_t uid;
+       kgid_t gid;
+       umode_t mode;
+ 
+       /* BPF token-related delegation options */
+       u64 delegate_cmds;
+       u64 delegate_maps;
+       u64 delegate_progs;
+       u64 delegate_attachs;
+ };
+ 
+ struct bpf_token {
+       struct work_struct work;
+       atomic64_t refcnt;
+       struct user_namespace *userns;
+       u64 allowed_cmds;
+       u64 allowed_maps;
+       u64 allowed_progs;
+       u64 allowed_attachs;
+ #ifdef CONFIG_SECURITY
+       void *security;
+ #endif
+ };
+ 
   struct bpf_struct_ops_value;
   struct btf_member;
   
@@@ -1640,6 -1708,7 +1708,7 @@@ struct bpf_struct_ops 
         struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
         u32 type_id;
         u32 value_id;
+       void *cfi_stubs;
   };
   
   #if defined(CONFIG_BPF_JIT) && defined(CONFIG_BPF_SYSCALL)
@@@ -1653,6 -1722,7 +1722,7 @@@ int bpf_struct_ops_map_sys_lookup_elem(
   int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
                                       struct bpf_tramp_link *link,
                                       const struct btf_func_model *model,
+                                     void *stub_func,
                                       void *image, void *image_end);
   static inline bool bpf_try_module_get(const void *data, struct module *owner)
   {
@@@ -2027,6 -2097,7 +2097,7 @@@ static inline void bpf_enable_instrumen
         migrate_enable();
   }
   
+ extern const struct super_operations bpf_super_ops;
   extern const struct file_operations bpf_map_fops;
   extern const struct file_operations bpf_prog_fops;
   extern const struct file_operations bpf_iter_fops;
@@@ -2161,24 -2232,26 +2232,26 @@@ static inline void bpf_map_dec_elem_cou
   
   extern int sysctl_unprivileged_bpf_disabled;
   
- static inline bool bpf_allow_ptr_leaks(void)
+ bool bpf_token_capable(const struct bpf_token *token, int cap);
+ 
+ static inline bool bpf_allow_ptr_leaks(const struct bpf_token *token)
   {
-       return perfmon_capable();
+       return bpf_token_capable(token, CAP_PERFMON);
   }
   
- static inline bool bpf_allow_uninit_stack(void)
+ static inline bool bpf_allow_uninit_stack(const struct bpf_token *token)
   {
-       return perfmon_capable();
+       return bpf_token_capable(token, CAP_PERFMON);
   }
   
- static inline bool bpf_bypass_spec_v1(void)
+ static inline bool bpf_bypass_spec_v1(const struct bpf_token *token)
   {
-       return cpu_mitigations_off() || perfmon_capable();
+       return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
   }
   
- static inline bool bpf_bypass_spec_v4(void)
+ static inline bool bpf_bypass_spec_v4(const struct bpf_token *token)
   {
-       return cpu_mitigations_off() || perfmon_capable();
+       return cpu_mitigations_off() || bpf_token_capable(token, CAP_PERFMON);
   }
   
   int bpf_map_new_fd(struct bpf_map *map, int flags);
@@@ -2195,8 -2268,21 +2268,21 @@@ int bpf_link_new_fd(struct bpf_link *li
   struct bpf_link *bpf_link_get_from_fd(u32 ufd);
   struct bpf_link *bpf_link_get_curr_or_next(u32 *id);
   
+ void bpf_token_inc(struct bpf_token *token);
+ void bpf_token_put(struct bpf_token *token);
+ int bpf_token_create(union bpf_attr *attr);
+ struct bpf_token *bpf_token_get_from_fd(u32 ufd);
+ 
+ bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd);
+ bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type);
+ bool bpf_token_allow_prog_type(const struct bpf_token *token,
+                              enum bpf_prog_type prog_type,
+                              enum bpf_attach_type attach_type);
+ 
   int bpf_obj_pin_user(u32 ufd, int path_fd, const char __user *pathname);
   int bpf_obj_get_user(int path_fd, const char __user *pathname, int flags);
+ struct inode *bpf_get_inode(struct super_block *sb, const struct inode *dir,
+                           umode_t mode);
   
   #define BPF_ITER_FUNC_PREFIX "bpf_iter_"
   #define DEFINE_BPF_ITER_FUNC(target, args...)                 \
@@@ -2431,7 -2517,7 +2517,7 @@@ int btf_check_subprog_arg_match(struct 
   int btf_check_subprog_call(struct bpf_verifier_env *env, int subprog,
                            struct bpf_reg_state *regs);
   int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
-                         struct bpf_reg_state *reg, bool is_ex_cb);
+                         struct bpf_reg_state *reg, u32 *nargs);
   int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
                          struct btf *btf, const struct btf_type *t);
   const char *btf_find_decl_tag_value(const struct btf *btf, const struct btf_type *pt,
@@@ -2440,7 -2526,8 +2526,8 @@@
   struct bpf_prog *bpf_prog_by_id(u32 id);
   struct bpf_link *bpf_link_by_id(u32 id);
   
- const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+ const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id,
+                                                const struct bpf_prog *prog);
   void bpf_task_storage_free(struct task_struct *task);
   void bpf_cgrp_storage_free(struct cgroup *cgroup);
   bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
@@@ -2559,6 -2646,24 +2646,24 @@@ static inline int bpf_obj_get_user(cons
         return -EOPNOTSUPP;
   }
   
+ static inline bool bpf_token_capable(const struct bpf_token *token, int cap)
+ {
+       return capable(cap) || (cap != CAP_SYS_ADMIN && capable(CAP_SYS_ADMIN));
+ }
+ 
+ static inline void bpf_token_inc(struct bpf_token *token)
+ {
+ }
+ 
+ static inline void bpf_token_put(struct bpf_token *token)
+ {
+ }
+ 
+ static inline struct bpf_token *bpf_token_get_from_fd(u32 ufd)
+ {
+       return ERR_PTR(-EOPNOTSUPP);
+ }
+ 
   static inline void __dev_flush(void)
   {
   }
@@@ -2682,7 -2787,7 +2787,7 @@@ static inline int btf_struct_access(str
   }
   
   static inline const struct bpf_func_proto *
- bpf_base_func_proto(enum bpf_func_id func_id)
+ bpf_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   {
         return NULL;
   }
@@@ -3179,9 -3284,6 +3284,9 @@@ enum bpf_text_poke_type 
   int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
                        void *addr1, void *addr2);
   
+ +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ +                             struct bpf_prog *new, struct bpf_prog *old);
+ +
   void *bpf_arch_text_copy(void *dst, void *src, size_t len);
   int bpf_arch_text_invalidate(void *dst, size_t len);
   
diff --combined include/linux/skbuff.h

index 7ce38874dbd1f4efe03a1fddc3d06c769ca4c994,df6ef42639d8bf1f8d9a3d641e8f0f8d3f0b17d9..ea5c8ab3ed00d027c39de5728d056bd44fc40d54
--- 1/include/linux/skbuff.h
--- 2/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -1069,7 -1069,7 +1069,7 @@@ struct sk_buff 
         refcount_t              users;
   
   #ifdef CONFIG_SKB_EXTENSIONS
- -      /* only useable after checking ->active_extensions != 0 */
+ +      /* only usable after checking ->active_extensions != 0 */
         struct skb_ext          *extensions;
   #endif
   };
@@@ -3311,7 -3311,7 +3311,7 @@@ static inline struct page *__dev_alloc_
                                              unsigned int order)
   {
         /* This piece of code contains several assumptions.
- -       * 1.  This is for device Rx, therefor a cold page is preferred.
+ +       * 1.  This is for device Rx, therefore a cold page is preferred.
          * 2.  The expectation is the user wants a compound page.
          * 3.  If requesting a order 0 page it will not be compound
          *     due to the check to see if order has a value in prep_new_page
@@@ -4247,10 -4247,13 +4247,13 @@@ static inline bool __skb_metadata_diffe
   {
         const void *a = skb_metadata_end(skb_a);
         const void *b = skb_metadata_end(skb_b);
-       /* Using more efficient variant than plain call to memcmp(). */
- #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
         u64 diffs = 0;
   
+       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
+           BITS_PER_LONG != 64)
+               goto slow;
+ 
+       /* Using more efficient variant than plain call to memcmp(). */
         switch (meta_len) {
   #define __it(x, op) (x -= sizeof(u##op))
   #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op))
@@@ -4270,11 -4273,11 +4273,11 @@@
                 fallthrough;
         case  4: diffs |= __it_diff(a, b, 32);
                 break;
+       default:
+ slow:
+               return memcmp(a - meta_len, b - meta_len, meta_len);
         }
         return diffs;
- #else
-       return memcmp(a - meta_len, b - meta_len, meta_len);
- #endif
   }
   
   static inline bool skb_metadata_differs(const struct sk_buff *skb_a,
diff --combined include/uapi/linux/bpf.h

index b1e8c5bdfc8242dd9f25b118bc9478349c290b95,e0545201b55f6ddfbdf0c0cce703a40fcd36f4a7..42f4d3090efe1502ab2d0f832c8ebfb6fa4209ed
--- 1/include/uapi/linux/bpf.h
--- 2/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@@ -847,6 -847,36 +847,36 @@@ union bpf_iter_link_info 
    *            Returns zero on success. On error, -1 is returned and *errno*
    *            is set appropriately.
    *
+  * BPF_TOKEN_CREATE
+  *    Description
+  *            Create BPF token with embedded information about what
+  *            BPF-related functionality it allows:
+  *            - a set of allowed bpf() syscall commands;
+  *            - a set of allowed BPF map types to be created with
+  *            BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed;
+  *            - a set of allowed BPF program types and BPF program attach
+  *            types to be loaded with BPF_PROG_LOAD command, if
+  *            BPF_PROG_LOAD itself is allowed.
+  *
+  *            BPF token is created (derived) from an instance of BPF FS,
+  *            assuming it has necessary delegation mount options specified.
+  *            This BPF token can be passed as an extra parameter to various
+  *            bpf() syscall commands to grant BPF subsystem functionality to
+  *            unprivileged processes.
+  *
+  *            When created, BPF token is "associated" with the owning
+  *            user namespace of BPF FS instance (super block) that it was
+  *            derived from, and subsequent BPF operations performed with
+  *            BPF token would be performing capabilities checks (i.e.,
+  *            CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within
+  *            that user namespace. Without BPF token, such capabilities
+  *            have to be granted in init user namespace, making bpf()
+  *            syscall incompatible with user namespace, for the most part.
+  *
+  *    Return
+  *            A new file descriptor (a nonnegative integer), or -1 if an
+  *            error occurred (in which case, *errno* is set appropriately).
+  *
    * NOTES
    *    eBPF objects (maps and programs) can be shared between processes.
    *
@@@ -901,6 -931,8 +931,8 @@@ enum bpf_cmd 
         BPF_ITER_CREATE,
         BPF_LINK_DETACH,
         BPF_PROG_BIND_MAP,
+       BPF_TOKEN_CREATE,
+       __MAX_BPF_CMD,
   };
   
   enum bpf_map_type {
@@@ -951,6 -983,7 +983,7 @@@
         BPF_MAP_TYPE_BLOOM_FILTER,
         BPF_MAP_TYPE_USER_RINGBUF,
         BPF_MAP_TYPE_CGRP_STORAGE,
+       __MAX_BPF_MAP_TYPE
   };
   
   /* Note that tracing related programs such as
@@@ -995,6 -1028,7 +1028,7 @@@ enum bpf_prog_type 
         BPF_PROG_TYPE_SK_LOOKUP,
         BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */
         BPF_PROG_TYPE_NETFILTER,
+       __MAX_BPF_PROG_TYPE
   };
   
   enum bpf_attach_type {
@@@ -1074,9 -1108,11 +1108,11 @@@ enum bpf_link_type 
         BPF_LINK_TYPE_TCX = 11,
         BPF_LINK_TYPE_UPROBE_MULTI = 12,
         BPF_LINK_TYPE_NETKIT = 13,
-       MAX_BPF_LINK_TYPE,
+       __MAX_BPF_LINK_TYPE,
   };
   
+ #define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE
+ 
   enum bpf_perf_event_type {
         BPF_PERF_EVENT_UNSPEC = 0,
         BPF_PERF_EVENT_UPROBE = 1,
@@@ -1401,6 -1437,7 +1437,7 @@@ union bpf_attr 
                  * to using 5 hash functions).
                  */
                 __u64   map_extra;
+               __u32   map_token_fd;
         };
   
         struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
@@@ -1470,6 -1507,7 +1507,7 @@@
                  * truncated), or smaller (if log buffer wasn't filled completely).
                  */
                 __u32           log_true_size;
+               __u32           prog_token_fd;
         };
   
         struct { /* anonymous struct used by BPF_OBJ_* commands */
@@@ -1582,6 -1620,7 +1620,7 @@@
                  * truncated), or smaller (if log buffer wasn't filled completely).
                  */
                 __u32           btf_log_true_size;
+               __u32           btf_token_fd;
         };
   
         struct {
@@@ -1712,6 -1751,11 +1751,11 @@@
                 __u32           flags;          /* extra flags */
         } prog_bind_map;
   
+       struct { /* struct used by BPF_TOKEN_CREATE command */
+               __u32           flags;
+               __u32           bpffs_fd;
+       } token_create;
+ 
   } __attribute__((aligned(8)));
   
   /* The description below is an attempt at providing documentation to eBPF
@@@ -6902,7 -6946,6 +6946,7 @@@ enum 
         BPF_TCP_LISTEN,
         BPF_TCP_CLOSING,        /* Now a valid state */
         BPF_TCP_NEW_SYN_RECV,
+ +      BPF_TCP_BOUND_INACTIVE,
   
         BPF_TCP_MAX_STATES      /* Leave at the end! */
   };
diff --combined include/uapi/linux/netdev.h

index 424c5e28f4951986b709f95b307b3f8b4d4655f5,966638b08ccfe1c3fb4f82a32705c5f0450350d6..93cb411adf72e4a13be01d32708f7d53eb89861d
--- 1/include/uapi/linux/netdev.h
--- 2/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@@ -44,10 -44,13 +44,13 @@@ enum netdev_xdp_act 
    *   timestamp via bpf_xdp_metadata_rx_timestamp().
    * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
    *   hash via bpf_xdp_metadata_rx_hash().
+  * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+  *   packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
    */
   enum netdev_xdp_rx_metadata {
         NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
         NETDEV_XDP_RX_METADATA_HASH = 2,
+       NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
   };
   
   /**
@@@ -62,11 -65,6 +65,11 @@@ enum netdev_xsk_flags 
         NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
   };
   
+ +enum netdev_queue_type {
+ +      NETDEV_QUEUE_TYPE_RX,
+ +      NETDEV_QUEUE_TYPE_TX,
+ +};
+ +
   enum {
         NETDEV_A_DEV_IFINDEX = 1,
         NETDEV_A_DEV_PAD,
@@@ -109,26 -107,6 +112,26 @@@ enum 
         NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
   };
   
+ +enum {
+ +      NETDEV_A_NAPI_IFINDEX = 1,
+ +      NETDEV_A_NAPI_ID,
+ +      NETDEV_A_NAPI_IRQ,
+ +      NETDEV_A_NAPI_PID,
+ +
+ +      __NETDEV_A_NAPI_MAX,
+ +      NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+ +};
+ +
+ +enum {
+ +      NETDEV_A_QUEUE_ID = 1,
+ +      NETDEV_A_QUEUE_IFINDEX,
+ +      NETDEV_A_QUEUE_TYPE,
+ +      NETDEV_A_QUEUE_NAPI_ID,
+ +
+ +      __NETDEV_A_QUEUE_MAX,
+ +      NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
+ +};
+ +
   enum {
         NETDEV_CMD_DEV_GET = 1,
         NETDEV_CMD_DEV_ADD_NTF,
@@@ -139,8 -117,6 +142,8 @@@
         NETDEV_CMD_PAGE_POOL_DEL_NTF,
         NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
         NETDEV_CMD_PAGE_POOL_STATS_GET,
+ +      NETDEV_CMD_QUEUE_GET,
+ +      NETDEV_CMD_NAPI_GET,
   
         __NETDEV_CMD_MAX,
         NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --combined kernel/bpf/arraymap.c

index c85ff9162a5cd44444746f0199e508d0f045b0c4,b5ec24b3563eb34cb5d633515560ef6383cfbcc3..13358675ff2edc723b64adf449f0edcd733428fd
--- 1/kernel/bpf/arraymap.c
--- 2/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@@ -82,7 -82,7 +82,7 @@@ static struct bpf_map *array_map_alloc(
         bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
         int numa_node = bpf_map_attr_numa_node(attr);
         u32 elem_size, index_mask, max_entries;
-       bool bypass_spec_v1 = bpf_bypass_spec_v1();
+       bool bypass_spec_v1 = bpf_bypass_spec_v1(NULL);
         u64 array_size, mask64;
         struct bpf_array *array;
   
@@@ -867,11 -867,11 +867,11 @@@ int bpf_fd_array_map_update_elem(struc
         }
   
         if (old_ptr)
-               map->ops->map_fd_put_ptr(old_ptr);
+               map->ops->map_fd_put_ptr(map, old_ptr, true);
         return 0;
   }
   
- static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+ static long __fd_array_map_delete_elem(struct bpf_map *map, void *key, bool need_defer)
   {
         struct bpf_array *array = container_of(map, struct bpf_array, map);
         void *old_ptr;
@@@ -890,13 -890,18 +890,18 @@@
         }
   
         if (old_ptr) {
-               map->ops->map_fd_put_ptr(old_ptr);
+               map->ops->map_fd_put_ptr(map, old_ptr, need_defer);
                 return 0;
         } else {
                 return -ENOENT;
         }
   }
   
+ static long fd_array_map_delete_elem(struct bpf_map *map, void *key)
+ {
+       return __fd_array_map_delete_elem(map, key, true);
+ }
+ 
   static void *prog_fd_array_get_ptr(struct bpf_map *map,
                                    struct file *map_file, int fd)
   {
@@@ -913,8 -918,9 +918,9 @@@
         return prog;
   }
   
- static void prog_fd_array_put_ptr(void *ptr)
+ static void prog_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
   {
+       /* bpf_prog is freed after one RCU or tasks trace grace period */
         bpf_prog_put(ptr);
   }
   
@@@ -924,13 -930,13 +930,13 @@@ static u32 prog_fd_array_sys_lookup_ele
   }
   
   /* decrement refcnt of all bpf_progs that are stored in this map */
- static void bpf_fd_array_map_clear(struct bpf_map *map)
+ static void bpf_fd_array_map_clear(struct bpf_map *map, bool need_defer)
   {
         struct bpf_array *array = container_of(map, struct bpf_array, map);
         int i;
   
         for (i = 0; i < array->map.max_entries; i++)
-               fd_array_map_delete_elem(map, &i);
+               __fd_array_map_delete_elem(map, &i, need_defer);
   }
   
   static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
@@@ -1012,16 -1018,11 +1018,16 @@@ static void prog_array_map_poke_untrack
         mutex_unlock(&aux->poke_mutex);
   }
   
+ +void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
+ +                                    struct bpf_prog *new, struct bpf_prog *old)
+ +{
+ +      WARN_ON_ONCE(1);
+ +}
+ +
   static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
                                     struct bpf_prog *old,
                                     struct bpf_prog *new)
   {
- -      u8 *old_addr, *new_addr, *old_bypass_addr;
         struct prog_poke_elem *elem;
         struct bpf_array_aux *aux;
   
@@@ -1030,7 -1031,7 +1036,7 @@@
   
         list_for_each_entry(elem, &aux->poke_progs, list) {
                 struct bpf_jit_poke_descriptor *poke;
- -              int i, ret;
+ +              int i;
   
                 for (i = 0; i < elem->aux->size_poke_tab; i++) {
                         poke = &elem->aux->poke_tab[i];
@@@ -1049,10 -1050,21 +1055,10 @@@
                          *    activated, so tail call updates can arrive from here
                          *    while JIT is still finishing its final fixup for
                          *    non-activated poke entries.
- -                       * 3) On program teardown, the program's kallsym entry gets
- -                       *    removed out of RCU callback, but we can only untrack
- -                       *    from sleepable context, therefore bpf_arch_text_poke()
- -                       *    might not see that this is in BPF text section and
- -                       *    bails out with -EINVAL. As these are unreachable since
- -                       *    RCU grace period already passed, we simply skip them.
- -                       * 4) Also programs reaching refcount of zero while patching
+ +                       * 3) Also programs reaching refcount of zero while patching
                          *    is in progress is okay since we're protected under
                          *    poke_mutex and untrack the programs before the JIT
- -                       *    buffer is freed. When we're still in the middle of
- -                       *    patching and suddenly kallsyms entry of the program
- -                       *    gets evicted, we just skip the rest which is fine due
- -                       *    to point 3).
- -                       * 5) Any other error happening below from bpf_arch_text_poke()
- -                       *    is a unexpected bug.
+ +                       *    buffer is freed.
                          */
                         if (!READ_ONCE(poke->tailcall_target_stable))
                                 continue;
@@@ -1062,7 -1074,39 +1068,7 @@@
                             poke->tail_call.key != key)
                                 continue;
   
- -                      old_bypass_addr = old ? NULL : poke->bypass_addr;
- -                      old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
- -                      new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
- -
- -                      if (new) {
- -                              ret = bpf_arch_text_poke(poke->tailcall_target,
- -                                                       BPF_MOD_JUMP,
- -                                                       old_addr, new_addr);
- -                              BUG_ON(ret < 0 && ret != -EINVAL);
- -                              if (!old) {
- -                                      ret = bpf_arch_text_poke(poke->tailcall_bypass,
- -                                                               BPF_MOD_JUMP,
- -                                                               poke->bypass_addr,
- -                                                               NULL);
- -                                      BUG_ON(ret < 0 && ret != -EINVAL);
- -                              }
- -                      } else {
- -                              ret = bpf_arch_text_poke(poke->tailcall_bypass,
- -                                                       BPF_MOD_JUMP,
- -                                                       old_bypass_addr,
- -                                                       poke->bypass_addr);
- -                              BUG_ON(ret < 0 && ret != -EINVAL);
- -                              /* let other CPUs finish the execution of program
- -                               * so that it will not possible to expose them
- -                               * to invalid nop, stack unwind, nop state
- -                               */
- -                              if (!ret)
- -                                      synchronize_rcu();
- -                              ret = bpf_arch_text_poke(poke->tailcall_target,
- -                                                       BPF_MOD_JUMP,
- -                                                       old_addr, NULL);
- -                              BUG_ON(ret < 0 && ret != -EINVAL);
- -                      }
+ +                      bpf_arch_poke_desc_update(poke, new, old);
                 }
         }
   }
@@@ -1071,7 -1115,7 +1077,7 @@@ static void prog_array_map_clear_deferr
   {
         struct bpf_map *map = container_of(work, struct bpf_array_aux,
                                            work)->map;
-       bpf_fd_array_map_clear(map);
+       bpf_fd_array_map_clear(map, true);
         bpf_map_put(map);
   }
   
@@@ -1151,7 -1195,7 +1157,7 @@@ static struct bpf_event_entry *bpf_even
   {
         struct bpf_event_entry *ee;
   
-       ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
+       ee = kzalloc(sizeof(*ee), GFP_KERNEL);
         if (ee) {
                 ee->event = perf_file->private_data;
                 ee->perf_file = perf_file;
@@@ -1201,8 -1245,9 +1207,9 @@@ err_out
         return ee;
   }
   
- static void perf_event_fd_array_put_ptr(void *ptr)
+ static void perf_event_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
   {
+       /* bpf_perf_event is freed after one RCU grace period */
         bpf_event_entry_free_rcu(ptr);
   }
   
@@@ -1220,7 -1265,7 +1227,7 @@@ static void perf_event_fd_array_release
         for (i = 0; i < array->map.max_entries; i++) {
                 ee = READ_ONCE(array->ptrs[i]);
                 if (ee && ee->map_file == map_file)
-                       fd_array_map_delete_elem(map, &i);
+                       __fd_array_map_delete_elem(map, &i, true);
         }
         rcu_read_unlock();
   }
@@@ -1228,7 -1273,7 +1235,7 @@@
   static void perf_event_fd_array_map_free(struct bpf_map *map)
   {
         if (map->map_flags & BPF_F_PRESERVE_ELEMS)
-               bpf_fd_array_map_clear(map);
+               bpf_fd_array_map_clear(map, false);
         fd_array_map_free(map);
   }
   
@@@ -1256,7 -1301,7 +1263,7 @@@ static void *cgroup_fd_array_get_ptr(st
         return cgroup_get_from_fd(fd);
   }
   
- static void cgroup_fd_array_put_ptr(void *ptr)
+ static void cgroup_fd_array_put_ptr(struct bpf_map *map, void *ptr, bool need_defer)
   {
         /* cgroup_put free cgrp after a rcu grace period */
         cgroup_put(ptr);
@@@ -1264,7 -1309,7 +1271,7 @@@
   
   static void cgroup_fd_array_free(struct bpf_map *map)
   {
-       bpf_fd_array_map_clear(map);
+       bpf_fd_array_map_clear(map, false);
         fd_array_map_free(map);
   }
   
@@@ -1309,7 -1354,7 +1316,7 @@@ static void array_of_map_free(struct bp
          * is protected by fdget/fdput.
          */
         bpf_map_meta_free(map->inner_map_meta);
-       bpf_fd_array_map_clear(map);
+       bpf_fd_array_map_clear(map, false);
         fd_array_map_free(map);
   }
   
diff --combined kernel/bpf/core.c

index fe254ae035fe4956388897af24d38809530f8fb7,5aa6863ac33b30bc5944cf1614a05cb312ce5328..14ace23d517b7f3ae4f709685355b1bc8d7bf15a
--- 1/kernel/bpf/core.c
--- 2/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@@ -121,6 -121,9 +121,9 @@@ struct bpf_prog *bpf_prog_alloc_no_stat
   #endif
   
         INIT_LIST_HEAD_RCU(&fp->aux->ksym.lnode);
+ #ifdef CONFIG_FINEIBT
+       INIT_LIST_HEAD_RCU(&fp->aux->ksym_prefix.lnode);
+ #endif
         mutex_init(&fp->aux->used_maps_mutex);
         mutex_init(&fp->aux->dst_mutex);
   
@@@ -371,18 -374,14 +374,18 @@@ static int bpf_adj_delta_to_imm(struct 
   static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, s32 end_old,
                                 s32 end_new, s32 curr, const bool probe_pass)
   {
- -      const s32 off_min = S16_MIN, off_max = S16_MAX;
+ +      s64 off_min, off_max, off;
         s32 delta = end_new - end_old;
- -      s32 off;
   
- -      if (insn->code == (BPF_JMP32 | BPF_JA))
+ +      if (insn->code == (BPF_JMP32 | BPF_JA)) {
                 off = insn->imm;
- -      else
+ +              off_min = S32_MIN;
+ +              off_max = S32_MAX;
+ +      } else {
                 off = insn->off;
+ +              off_min = S16_MIN;
+ +              off_max = S16_MAX;
+ +      }
   
         if (curr < pos && curr + off + 1 >= end_old)
                 off += delta;
@@@ -679,7 -678,7 +682,7 @@@ static bool bpf_prog_kallsyms_candidate
   void bpf_prog_kallsyms_add(struct bpf_prog *fp)
   {
         if (!bpf_prog_kallsyms_candidate(fp) ||
-           !bpf_capable())
+           !bpf_token_capable(fp->aux->token, CAP_BPF))
                 return;
   
         bpf_prog_ksym_set_addr(fp);
@@@ -687,6 -686,23 +690,23 @@@
         fp->aux->ksym.prog = true;
   
         bpf_ksym_add(&fp->aux->ksym);
+ 
+ #ifdef CONFIG_FINEIBT
+       /*
+        * When FineIBT, code in the __cfi_foo() symbols can get executed
+        * and hence unwinder needs help.
+        */
+       if (cfi_mode != CFI_FINEIBT)
+               return;
+ 
+       snprintf(fp->aux->ksym_prefix.name, KSYM_NAME_LEN,
+                "__cfi_%s", fp->aux->ksym.name);
+ 
+       fp->aux->ksym_prefix.start = (unsigned long) fp->bpf_func - 16;
+       fp->aux->ksym_prefix.end   = (unsigned long) fp->bpf_func;
+ 
+       bpf_ksym_add(&fp->aux->ksym_prefix);
+ #endif
   }
   
   void bpf_prog_kallsyms_del(struct bpf_prog *fp)
@@@ -695,6 -711,11 +715,11 @@@
                 return;
   
         bpf_ksym_del(&fp->aux->ksym);
+ #ifdef CONFIG_FINEIBT
+       if (cfi_mode != CFI_FINEIBT)
+               return;
+       bpf_ksym_del(&fp->aux->ksym_prefix);
+ #endif
   }
   
   static struct bpf_ksym *bpf_ksym_find(unsigned long addr)
@@@ -932,20 -953,20 +957,20 @@@ out
         return ptr;
   }
   
- void bpf_prog_pack_free(struct bpf_binary_header *hdr)
+ void bpf_prog_pack_free(void *ptr, u32 size)
   {
         struct bpf_prog_pack *pack = NULL, *tmp;
         unsigned int nbits;
         unsigned long pos;
   
         mutex_lock(&pack_mutex);
-       if (hdr->size > BPF_PROG_PACK_SIZE) {
-               bpf_jit_free_exec(hdr);
+       if (size > BPF_PROG_PACK_SIZE) {
+               bpf_jit_free_exec(ptr);
                 goto out;
         }
   
         list_for_each_entry(tmp, &pack_list, list) {
-               if ((void *)hdr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > (void *)hdr) {
+               if (ptr >= tmp->ptr && (tmp->ptr + BPF_PROG_PACK_SIZE) > ptr) {
                         pack = tmp;
                         break;
                 }
@@@ -954,10 -975,10 +979,10 @@@
         if (WARN_ONCE(!pack, "bpf_prog_pack bug\n"))
                 goto out;
   
-       nbits = BPF_PROG_SIZE_TO_NBITS(hdr->size);
-       pos = ((unsigned long)hdr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
+       nbits = BPF_PROG_SIZE_TO_NBITS(size);
+       pos = ((unsigned long)ptr - (unsigned long)pack->ptr) >> BPF_PROG_CHUNK_SHIFT;
   
-       WARN_ONCE(bpf_arch_text_invalidate(hdr, hdr->size),
+       WARN_ONCE(bpf_arch_text_invalidate(ptr, size),
                   "bpf_prog_pack bug: missing bpf_arch_text_invalidate?\n");
   
         bitmap_clear(pack->bitmap, pos, nbits);
@@@ -1104,8 -1125,7 +1129,7 @@@ bpf_jit_binary_pack_alloc(unsigned int 
   
         *rw_header = kvmalloc(size, GFP_KERNEL);
         if (!*rw_header) {
-               bpf_arch_text_copy(&ro_header->size, &size, sizeof(size));
-               bpf_prog_pack_free(ro_header);
+               bpf_prog_pack_free(ro_header, size);
                 bpf_jit_uncharge_modmem(size);
                 return NULL;
         }
@@@ -1136,7 -1156,7 +1160,7 @@@ int bpf_jit_binary_pack_finalize(struc
         kvfree(rw_header);
   
         if (IS_ERR(ptr)) {
-               bpf_prog_pack_free(ro_header);
+               bpf_prog_pack_free(ro_header, ro_header->size);
                 return PTR_ERR(ptr);
         }
         return 0;
@@@ -1157,7 -1177,7 +1181,7 @@@ void bpf_jit_binary_pack_free(struct bp
   {
         u32 size = ro_header->size;
   
-       bpf_prog_pack_free(ro_header);
+       bpf_prog_pack_free(ro_header, size);
         kvfree(rw_header);
         bpf_jit_uncharge_modmem(size);
   }
@@@ -2668,12 -2688,16 +2692,16 @@@ void __bpf_free_used_maps(struct bpf_pr
                           struct bpf_map **used_maps, u32 len)
   {
         struct bpf_map *map;
+       bool sleepable;
         u32 i;
   
+       sleepable = aux->sleepable;
         for (i = 0; i < len; i++) {
                 map = used_maps[i];
                 if (map->ops->map_poke_untrack)
                         map->ops->map_poke_untrack(map, aux);
+               if (sleepable)
+                       atomic64_dec(&map->sleepable_refcnt);
                 bpf_map_put(map);
         }
   }
@@@ -2751,6 -2775,7 +2779,7 @@@ void bpf_prog_free(struct bpf_prog *fp
   
         if (aux->dst_prog)
                 bpf_prog_put(aux->dst_prog);
+       bpf_token_put(aux->token);
         INIT_WORK(&aux->work, bpf_prog_free_deferred);
         schedule_work(&aux->work);
   }
diff --combined net/core/filter.c

index 6d89a9cf33c9fd3e45f8c0d0790a070d874449e5,adcfc2c25754893891b3b013d245a3d9fe55e756..4ff6100c6a2733ffe3072d475758f46e3fcda21b
--- 1/net/core/filter.c
--- 2/net/core/filter.c
+++ b/net/core/filter.c
@@@ -87,7 -87,7 +87,7 @@@
   #include "dev.h"
   
   static const struct bpf_func_proto *
- bpf_sk_base_func_proto(enum bpf_func_id func_id);
+ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
   
   int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
   {
@@@ -1219,8 -1219,8 +1219,8 @@@ void sk_filter_uncharge(struct sock *sk
    */
   static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
   {
+ +      int optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
         u32 filter_size = bpf_prog_size(fp->prog->len);
- -      int optmem_max = READ_ONCE(sysctl_optmem_max);
   
         /* same check as in sock_kmalloc() */
         if (filter_size <= optmem_max &&
@@@ -1550,13 -1550,12 +1550,13 @@@ EXPORT_SYMBOL_GPL(sk_attach_filter)
   int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
   {
         struct bpf_prog *prog = __get_filter(fprog, sk);
- -      int err;
+ +      int err, optmem_max;
   
         if (IS_ERR(prog))
                 return PTR_ERR(prog);
   
- -      if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
+ +      optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ +      if (bpf_prog_size(prog->len) > optmem_max)
                 err = -ENOMEM;
         else
                 err = reuseport_attach_prog(sk, prog);
@@@ -1595,7 -1594,7 +1595,7 @@@ int sk_attach_bpf(u32 ufd, struct sock 
   int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
   {
         struct bpf_prog *prog;
- -      int err;
+ +      int err, optmem_max;
   
         if (sock_flag(sk, SOCK_FILTER_LOCKED))
                 return -EPERM;
@@@ -1623,8 -1622,7 +1623,8 @@@
                 }
         } else {
                 /* BPF_PROG_TYPE_SOCKET_FILTER */
- -              if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
+ +              optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max);
+ +              if (bpf_prog_size(prog->len) > optmem_max) {
                         err = -ENOMEM;
                         goto err_prog_put;
                 }
@@@ -2604,22 -2602,6 +2604,22 @@@ BPF_CALL_2(bpf_msg_cork_bytes, struct s
         return 0;
   }
   
+ +static void sk_msg_reset_curr(struct sk_msg *msg)
+ +{
+ +      u32 i = msg->sg.start;
+ +      u32 len = 0;
+ +
+ +      do {
+ +              len += sk_msg_elem(msg, i)->length;
+ +              sk_msg_iter_var_next(i);
+ +              if (len >= msg->sg.size)
+ +                      break;
+ +      } while (i != msg->sg.end);
+ +
+ +      msg->sg.curr = i;
+ +      msg->sg.copybreak = 0;
+ +}
+ +
   static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
         .func           = bpf_msg_cork_bytes,
         .gpl_only       = false,
@@@ -2739,7 -2721,6 +2739,7 @@@ BPF_CALL_4(bpf_msg_pull_data, struct sk
                       msg->sg.end - shift + NR_MSG_FRAG_IDS :
                       msg->sg.end - shift;
   out:
+ +      sk_msg_reset_curr(msg);
         msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
         msg->data_end = msg->data + bytes;
         return 0;
@@@ -2876,7 -2857,6 +2876,7 @@@ BPF_CALL_4(bpf_msg_push_data, struct sk
                 msg->sg.data[new] = rsge;
         }
   
+ +      sk_msg_reset_curr(msg);
         sk_msg_compute_data_pointers(msg);
         return 0;
   }
@@@ -3045,7 -3025,6 +3045,7 @@@ BPF_CALL_4(bpf_msg_pop_data, struct sk_
   
         sk_mem_uncharge(msg->sk, len - pop);
         msg->sg.size -= (len - pop);
+ +      sk_msg_reset_curr(msg);
         sk_msg_compute_data_pointers(msg);
         return 0;
   }
@@@ -7862,7 -7841,7 +7862,7 @@@ sock_filter_func_proto(enum bpf_func_i
         case BPF_FUNC_ktime_get_coarse_ns:
                 return &bpf_ktime_get_coarse_ns_proto;
         default:
-               return bpf_base_func_proto(func_id);
+               return bpf_base_func_proto(func_id, prog);
         }
   }
   
@@@ -7955,7 -7934,7 +7955,7 @@@ sock_addr_func_proto(enum bpf_func_id f
                         return NULL;
                 }
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -7974,7 -7953,7 +7974,7 @@@ sk_filter_func_proto(enum bpf_func_id f
         case BPF_FUNC_perf_event_output:
                 return &bpf_skb_event_output_proto;
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8161,7 -8140,7 +8161,7 @@@ tc_cls_act_func_proto(enum bpf_func_id 
   #endif
   #endif
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8220,7 -8199,7 +8220,7 @@@ xdp_func_proto(enum bpf_func_id func_id
   #endif
   #endif
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   
   #if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
@@@ -8281,7 -8260,7 +8281,7 @@@ sock_ops_func_proto(enum bpf_func_id fu
                 return &bpf_tcp_sock_proto;
   #endif /* CONFIG_INET */
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8323,7 -8302,7 +8323,7 @@@ sk_msg_func_proto(enum bpf_func_id func
                 return &bpf_get_cgroup_classid_curr_proto;
   #endif
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8367,7 -8346,7 +8367,7 @@@ sk_skb_func_proto(enum bpf_func_id func
                 return &bpf_skc_lookup_tcp_proto;
   #endif
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8378,7 -8357,7 +8378,7 @@@ flow_dissector_func_proto(enum bpf_func
         case BPF_FUNC_skb_load_bytes:
                 return &bpf_flow_dissector_load_bytes_proto;
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8405,7 -8384,7 +8405,7 @@@ lwt_out_func_proto(enum bpf_func_id fun
         case BPF_FUNC_skb_under_cgroup:
                 return &bpf_skb_under_cgroup_proto;
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -8580,7 -8559,7 +8580,7 @@@ static bool cg_skb_is_valid_access(int 
                 return false;
         case bpf_ctx_range(struct __sk_buff, data):
         case bpf_ctx_range(struct __sk_buff, data_end):
-               if (!bpf_capable())
+               if (!bpf_token_capable(prog->aux->token, CAP_BPF))
                         return false;
                 break;
         }
@@@ -8592,7 -8571,7 +8592,7 @@@
                 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
                         break;
                 case bpf_ctx_range(struct __sk_buff, tstamp):
-                       if (!bpf_capable())
+                       if (!bpf_token_capable(prog->aux->token, CAP_BPF))
                                 return false;
                         break;
                 default:
@@@ -11236,7 -11215,7 +11236,7 @@@ sk_reuseport_func_proto(enum bpf_func_i
         case BPF_FUNC_ktime_get_coarse_ns:
                 return &bpf_ktime_get_coarse_ns_proto;
         default:
-               return bpf_base_func_proto(func_id);
+               return bpf_base_func_proto(func_id, prog);
         }
   }
   
@@@ -11418,7 -11397,7 +11418,7 @@@ sk_lookup_func_proto(enum bpf_func_id f
         case BPF_FUNC_sk_release:
                 return &bpf_sk_release_proto;
         default:
-               return bpf_sk_base_func_proto(func_id);
+               return bpf_sk_base_func_proto(func_id, prog);
         }
   }
   
@@@ -11752,7 -11731,7 +11752,7 @@@ const struct bpf_func_proto bpf_sock_fr
   };
   
   static const struct bpf_func_proto *
- bpf_sk_base_func_proto(enum bpf_func_id func_id)
+ bpf_sk_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   {
         const struct bpf_func_proto *func;
   
@@@ -11781,10 -11760,10 +11781,10 @@@
         case BPF_FUNC_ktime_get_coarse_ns:
                 return &bpf_ktime_get_coarse_ns_proto;
         default:
-               return bpf_base_func_proto(func_id);
+               return bpf_base_func_proto(func_id, prog);
         }
   
-       if (!perfmon_capable())
+       if (!bpf_token_capable(prog->aux->token, CAP_PERFMON))
                 return NULL;
   
         return func;
diff --combined net/netfilter/nf_bpf_link.c

index 0e4beae421f8302cb0bc6cb798daf36366da0e4b,1969facac91c2d8a010ff9a1a928ce9980a57539..5257d5e7eb09d82aaa233f2417dfbca3e4c1f97f
--- 1/net/netfilter/nf_bpf_link.c
--- 2/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@@ -31,7 -31,7 +31,7 @@@ struct bpf_nf_link 
   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
   static const struct nf_defrag_hook *
   get_proto_defrag_hook(struct bpf_nf_link *link,
- -                    const struct nf_defrag_hook __rcu *global_hook,
+ +                    const struct nf_defrag_hook __rcu **ptr_global_hook,
                       const char *mod)
   {
         const struct nf_defrag_hook *hook;
@@@ -39,7 -39,7 +39,7 @@@
   
         /* RCU protects us from races against module unloading */
         rcu_read_lock();
- -      hook = rcu_dereference(global_hook);
+ +      hook = rcu_dereference(*ptr_global_hook);
         if (!hook) {
                 rcu_read_unlock();
                 err = request_module(mod);
@@@ -47,7 -47,7 +47,7 @@@
                         return ERR_PTR(err < 0 ? err : -EINVAL);
   
                 rcu_read_lock();
- -              hook = rcu_dereference(global_hook);
+ +              hook = rcu_dereference(*ptr_global_hook);
         }
   
         if (hook && try_module_get(hook->owner)) {
@@@ -78,7 -78,7 +78,7 @@@ static int bpf_nf_enable_defrag(struct 
         switch (link->hook_ops.pf) {
   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
         case NFPROTO_IPV4:
- -              hook = get_proto_defrag_hook(link, nf_defrag_v4_hook, "nf_defrag_ipv4");
+ +              hook = get_proto_defrag_hook(link, &nf_defrag_v4_hook, "nf_defrag_ipv4");
                 if (IS_ERR(hook))
                         return PTR_ERR(hook);
   
@@@ -87,7 -87,7 +87,7 @@@
   #endif
   #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
         case NFPROTO_IPV6:
- -              hook = get_proto_defrag_hook(link, nf_defrag_v6_hook, "nf_defrag_ipv6");
+ +              hook = get_proto_defrag_hook(link, &nf_defrag_v6_hook, "nf_defrag_ipv6");
                 if (IS_ERR(hook))
                         return PTR_ERR(hook);
   
@@@ -314,7 -314,7 +314,7 @@@ static bool nf_is_valid_access(int off
   static const struct bpf_func_proto *
   bpf_nf_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
   {
-       return bpf_base_func_proto(func_id);
+       return bpf_base_func_proto(func_id, prog);
   }
   
   const struct bpf_verifier_ops netfilter_verifier_ops = {
diff --combined tools/include/uapi/linux/netdev.h

index 424c5e28f4951986b709f95b307b3f8b4d4655f5,966638b08ccfe1c3fb4f82a32705c5f0450350d6..93cb411adf72e4a13be01d32708f7d53eb89861d
--- 1/tools/include/uapi/linux/netdev.h
--- 2/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@@ -44,10 -44,13 +44,13 @@@ enum netdev_xdp_act 
    *   timestamp via bpf_xdp_metadata_rx_timestamp().
    * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet
    *   hash via bpf_xdp_metadata_rx_hash().
+  * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive
+  *   packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag().
    */
   enum netdev_xdp_rx_metadata {
         NETDEV_XDP_RX_METADATA_TIMESTAMP = 1,
         NETDEV_XDP_RX_METADATA_HASH = 2,
+       NETDEV_XDP_RX_METADATA_VLAN_TAG = 4,
   };
   
   /**
@@@ -62,11 -65,6 +65,11 @@@ enum netdev_xsk_flags 
         NETDEV_XSK_FLAGS_TX_CHECKSUM = 2,
   };
   
+ +enum netdev_queue_type {
+ +      NETDEV_QUEUE_TYPE_RX,
+ +      NETDEV_QUEUE_TYPE_TX,
+ +};
+ +
   enum {
         NETDEV_A_DEV_IFINDEX = 1,
         NETDEV_A_DEV_PAD,
@@@ -109,26 -107,6 +112,26 @@@ enum 
         NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1)
   };
   
+ +enum {
+ +      NETDEV_A_NAPI_IFINDEX = 1,
+ +      NETDEV_A_NAPI_ID,
+ +      NETDEV_A_NAPI_IRQ,
+ +      NETDEV_A_NAPI_PID,
+ +
+ +      __NETDEV_A_NAPI_MAX,
+ +      NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
+ +};
+ +
+ +enum {
+ +      NETDEV_A_QUEUE_ID = 1,
+ +      NETDEV_A_QUEUE_IFINDEX,
+ +      NETDEV_A_QUEUE_TYPE,
+ +      NETDEV_A_QUEUE_NAPI_ID,
+ +
+ +      __NETDEV_A_QUEUE_MAX,
+ +      NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
+ +};
+ +
   enum {
         NETDEV_CMD_DEV_GET = 1,
         NETDEV_CMD_DEV_ADD_NTF,
@@@ -139,8 -117,6 +142,8 @@@
         NETDEV_CMD_PAGE_POOL_DEL_NTF,
         NETDEV_CMD_PAGE_POOL_CHANGE_NTF,
         NETDEV_CMD_PAGE_POOL_STATS_GET,
+ +      NETDEV_CMD_QUEUE_GET,
+ +      NETDEV_CMD_NAPI_GET,
   
         __NETDEV_CMD_MAX,
         NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
author	Jakub Kicinski <[email protected]>
	Tue, 19 Dec 2023 00:46:07 +0000 (16:46 -0800)
committer	Jakub Kicinski <[email protected]>
	Tue, 19 Dec 2023 00:46:08 +0000 (16:46 -0800)
		1	2
Documentation/netlink/specs/netdev.yaml	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/net/bpf_jit_comp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ice/ice.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ice/ice_base.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ice/ice_main.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ice/ice_ptp.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/ethernet/intel/ice/ice_ptp.h	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/net/veth.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/bpf.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/skbuff.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/bpf.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/uapi/linux/netdev.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/arraymap.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/bpf/core.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/core/filter.c	patch \|	diff1 \|	diff2 \|	blob \| history
net/netfilter/nf_bpf_link.c	patch \|	diff1 \|	diff2 \|	blob \| history
tools/include/uapi/linux/netdev.h	patch \|	diff1 \|	diff2 \|	blob \| history