]> Git Repo - J-linux.git/blob - kernel/bpf/cgroup.c
Merge tag 'vfs-6.13-rc7.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
[J-linux.git] / kernel / bpf / cgroup.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Functions to manage eBPF programs attached to cgroups
4  *
5  * Copyright (c) 2016 Daniel Mack
6  */
7
8 #include <linux/kernel.h>
9 #include <linux/atomic.h>
10 #include <linux/cgroup.h>
11 #include <linux/filter.h>
12 #include <linux/slab.h>
13 #include <linux/sysctl.h>
14 #include <linux/string.h>
15 #include <linux/bpf.h>
16 #include <linux/bpf-cgroup.h>
17 #include <linux/bpf_lsm.h>
18 #include <linux/bpf_verifier.h>
19 #include <net/sock.h>
20 #include <net/bpf_sk_storage.h>
21
22 #include "../cgroup/cgroup-internal.h"
23
24 DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
25 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
26
27 /*
28  * cgroup bpf destruction makes heavy use of work items and there can be a lot
29  * of concurrent destructions.  Use a separate workqueue so that cgroup bpf
30  * destruction work items don't end up filling up max_active of system_wq
31  * which may lead to deadlock.
32  */
33 static struct workqueue_struct *cgroup_bpf_destroy_wq;
34
35 static int __init cgroup_bpf_wq_init(void)
36 {
37         cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
38         if (!cgroup_bpf_destroy_wq)
39                 panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
40         return 0;
41 }
42 core_initcall(cgroup_bpf_wq_init);
43
44 /* __always_inline is necessary to prevent indirect call through run_prog
45  * function pointer.
46  */
47 static __always_inline int
48 bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
49                       enum cgroup_bpf_attach_type atype,
50                       const void *ctx, bpf_prog_run_fn run_prog,
51                       int retval, u32 *ret_flags)
52 {
53         const struct bpf_prog_array_item *item;
54         const struct bpf_prog *prog;
55         const struct bpf_prog_array *array;
56         struct bpf_run_ctx *old_run_ctx;
57         struct bpf_cg_run_ctx run_ctx;
58         u32 func_ret;
59
60         run_ctx.retval = retval;
61         migrate_disable();
62         rcu_read_lock();
63         array = rcu_dereference(cgrp->effective[atype]);
64         item = &array->items[0];
65         old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
66         while ((prog = READ_ONCE(item->prog))) {
67                 run_ctx.prog_item = item;
68                 func_ret = run_prog(prog, ctx);
69                 if (ret_flags) {
70                         *(ret_flags) |= (func_ret >> 1);
71                         func_ret &= 1;
72                 }
73                 if (!func_ret && !IS_ERR_VALUE((long)run_ctx.retval))
74                         run_ctx.retval = -EPERM;
75                 item++;
76         }
77         bpf_reset_run_ctx(old_run_ctx);
78         rcu_read_unlock();
79         migrate_enable();
80         return run_ctx.retval;
81 }
82
83 unsigned int __cgroup_bpf_run_lsm_sock(const void *ctx,
84                                        const struct bpf_insn *insn)
85 {
86         const struct bpf_prog *shim_prog;
87         struct sock *sk;
88         struct cgroup *cgrp;
89         int ret = 0;
90         u64 *args;
91
92         args = (u64 *)ctx;
93         sk = (void *)(unsigned long)args[0];
94         /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
95         shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
96
97         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
98         if (likely(cgrp))
99                 ret = bpf_prog_run_array_cg(&cgrp->bpf,
100                                             shim_prog->aux->cgroup_atype,
101                                             ctx, bpf_prog_run, 0, NULL);
102         return ret;
103 }
104
105 unsigned int __cgroup_bpf_run_lsm_socket(const void *ctx,
106                                          const struct bpf_insn *insn)
107 {
108         const struct bpf_prog *shim_prog;
109         struct socket *sock;
110         struct cgroup *cgrp;
111         int ret = 0;
112         u64 *args;
113
114         args = (u64 *)ctx;
115         sock = (void *)(unsigned long)args[0];
116         /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
117         shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
118
119         cgrp = sock_cgroup_ptr(&sock->sk->sk_cgrp_data);
120         if (likely(cgrp))
121                 ret = bpf_prog_run_array_cg(&cgrp->bpf,
122                                             shim_prog->aux->cgroup_atype,
123                                             ctx, bpf_prog_run, 0, NULL);
124         return ret;
125 }
126
127 unsigned int __cgroup_bpf_run_lsm_current(const void *ctx,
128                                           const struct bpf_insn *insn)
129 {
130         const struct bpf_prog *shim_prog;
131         struct cgroup *cgrp;
132         int ret = 0;
133
134         /*shim_prog = container_of(insn, struct bpf_prog, insnsi);*/
135         shim_prog = (const struct bpf_prog *)((void *)insn - offsetof(struct bpf_prog, insnsi));
136
137         /* We rely on trampoline's __bpf_prog_enter_lsm_cgroup to grab RCU read lock. */
138         cgrp = task_dfl_cgroup(current);
139         if (likely(cgrp))
140                 ret = bpf_prog_run_array_cg(&cgrp->bpf,
141                                             shim_prog->aux->cgroup_atype,
142                                             ctx, bpf_prog_run, 0, NULL);
143         return ret;
144 }
145
146 #ifdef CONFIG_BPF_LSM
147 struct cgroup_lsm_atype {
148         u32 attach_btf_id;
149         int refcnt;
150 };
151
152 static struct cgroup_lsm_atype cgroup_lsm_atype[CGROUP_LSM_NUM];
153
154 static enum cgroup_bpf_attach_type
155 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
156 {
157         int i;
158
159         lockdep_assert_held(&cgroup_mutex);
160
161         if (attach_type != BPF_LSM_CGROUP)
162                 return to_cgroup_bpf_attach_type(attach_type);
163
164         for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
165                 if (cgroup_lsm_atype[i].attach_btf_id == attach_btf_id)
166                         return CGROUP_LSM_START + i;
167
168         for (i = 0; i < ARRAY_SIZE(cgroup_lsm_atype); i++)
169                 if (cgroup_lsm_atype[i].attach_btf_id == 0)
170                         return CGROUP_LSM_START + i;
171
172         return -E2BIG;
173
174 }
175
176 void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype)
177 {
178         int i = cgroup_atype - CGROUP_LSM_START;
179
180         lockdep_assert_held(&cgroup_mutex);
181
182         WARN_ON_ONCE(cgroup_lsm_atype[i].attach_btf_id &&
183                      cgroup_lsm_atype[i].attach_btf_id != attach_btf_id);
184
185         cgroup_lsm_atype[i].attach_btf_id = attach_btf_id;
186         cgroup_lsm_atype[i].refcnt++;
187 }
188
189 void bpf_cgroup_atype_put(int cgroup_atype)
190 {
191         int i = cgroup_atype - CGROUP_LSM_START;
192
193         cgroup_lock();
194         if (--cgroup_lsm_atype[i].refcnt <= 0)
195                 cgroup_lsm_atype[i].attach_btf_id = 0;
196         WARN_ON_ONCE(cgroup_lsm_atype[i].refcnt < 0);
197         cgroup_unlock();
198 }
199 #else
200 static enum cgroup_bpf_attach_type
201 bpf_cgroup_atype_find(enum bpf_attach_type attach_type, u32 attach_btf_id)
202 {
203         if (attach_type != BPF_LSM_CGROUP)
204                 return to_cgroup_bpf_attach_type(attach_type);
205         return -EOPNOTSUPP;
206 }
207 #endif /* CONFIG_BPF_LSM */
208
209 void cgroup_bpf_offline(struct cgroup *cgrp)
210 {
211         cgroup_get(cgrp);
212         percpu_ref_kill(&cgrp->bpf.refcnt);
213 }
214
215 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
216 {
217         enum bpf_cgroup_storage_type stype;
218
219         for_each_cgroup_storage_type(stype)
220                 bpf_cgroup_storage_free(storages[stype]);
221 }
222
223 static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
224                                      struct bpf_cgroup_storage *new_storages[],
225                                      enum bpf_attach_type type,
226                                      struct bpf_prog *prog,
227                                      struct cgroup *cgrp)
228 {
229         enum bpf_cgroup_storage_type stype;
230         struct bpf_cgroup_storage_key key;
231         struct bpf_map *map;
232
233         key.cgroup_inode_id = cgroup_id(cgrp);
234         key.attach_type = type;
235
236         for_each_cgroup_storage_type(stype) {
237                 map = prog->aux->cgroup_storage[stype];
238                 if (!map)
239                         continue;
240
241                 storages[stype] = cgroup_storage_lookup((void *)map, &key, false);
242                 if (storages[stype])
243                         continue;
244
245                 storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
246                 if (IS_ERR(storages[stype])) {
247                         bpf_cgroup_storages_free(new_storages);
248                         return -ENOMEM;
249                 }
250
251                 new_storages[stype] = storages[stype];
252         }
253
254         return 0;
255 }
256
257 static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
258                                        struct bpf_cgroup_storage *src[])
259 {
260         enum bpf_cgroup_storage_type stype;
261
262         for_each_cgroup_storage_type(stype)
263                 dst[stype] = src[stype];
264 }
265
266 static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
267                                      struct cgroup *cgrp,
268                                      enum bpf_attach_type attach_type)
269 {
270         enum bpf_cgroup_storage_type stype;
271
272         for_each_cgroup_storage_type(stype)
273                 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
274 }
275
276 /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
277  * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
278  * doesn't free link memory, which will eventually be done by bpf_link's
279  * release() callback, when its last FD is closed.
280  */
281 static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
282 {
283         cgroup_put(link->cgroup);
284         link->cgroup = NULL;
285 }
286
287 /**
288  * cgroup_bpf_release() - put references of all bpf programs and
289  *                        release all cgroup bpf data
290  * @work: work structure embedded into the cgroup to modify
291  */
292 static void cgroup_bpf_release(struct work_struct *work)
293 {
294         struct cgroup *p, *cgrp = container_of(work, struct cgroup,
295                                                bpf.release_work);
296         struct bpf_prog_array *old_array;
297         struct list_head *storages = &cgrp->bpf.storages;
298         struct bpf_cgroup_storage *storage, *stmp;
299
300         unsigned int atype;
301
302         cgroup_lock();
303
304         for (atype = 0; atype < ARRAY_SIZE(cgrp->bpf.progs); atype++) {
305                 struct hlist_head *progs = &cgrp->bpf.progs[atype];
306                 struct bpf_prog_list *pl;
307                 struct hlist_node *pltmp;
308
309                 hlist_for_each_entry_safe(pl, pltmp, progs, node) {
310                         hlist_del(&pl->node);
311                         if (pl->prog) {
312                                 if (pl->prog->expected_attach_type == BPF_LSM_CGROUP)
313                                         bpf_trampoline_unlink_cgroup_shim(pl->prog);
314                                 bpf_prog_put(pl->prog);
315                         }
316                         if (pl->link) {
317                                 if (pl->link->link.prog->expected_attach_type == BPF_LSM_CGROUP)
318                                         bpf_trampoline_unlink_cgroup_shim(pl->link->link.prog);
319                                 bpf_cgroup_link_auto_detach(pl->link);
320                         }
321                         kfree(pl);
322                         static_branch_dec(&cgroup_bpf_enabled_key[atype]);
323                 }
324                 old_array = rcu_dereference_protected(
325                                 cgrp->bpf.effective[atype],
326                                 lockdep_is_held(&cgroup_mutex));
327                 bpf_prog_array_free(old_array);
328         }
329
330         list_for_each_entry_safe(storage, stmp, storages, list_cg) {
331                 bpf_cgroup_storage_unlink(storage);
332                 bpf_cgroup_storage_free(storage);
333         }
334
335         cgroup_unlock();
336
337         for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
338                 cgroup_bpf_put(p);
339
340         percpu_ref_exit(&cgrp->bpf.refcnt);
341         cgroup_put(cgrp);
342 }
343
344 /**
345  * cgroup_bpf_release_fn() - callback used to schedule releasing
346  *                           of bpf cgroup data
347  * @ref: percpu ref counter structure
348  */
349 static void cgroup_bpf_release_fn(struct percpu_ref *ref)
350 {
351         struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
352
353         INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
354         queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work);
355 }
356
357 /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
358  * link or direct prog.
359  */
360 static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
361 {
362         if (pl->prog)
363                 return pl->prog;
364         if (pl->link)
365                 return pl->link->link.prog;
366         return NULL;
367 }
368
369 /* count number of elements in the list.
370  * it's slow but the list cannot be long
371  */
372 static u32 prog_list_length(struct hlist_head *head)
373 {
374         struct bpf_prog_list *pl;
375         u32 cnt = 0;
376
377         hlist_for_each_entry(pl, head, node) {
378                 if (!prog_list_prog(pl))
379                         continue;
380                 cnt++;
381         }
382         return cnt;
383 }
384
385 /* if parent has non-overridable prog attached,
386  * disallow attaching new programs to the descendent cgroup.
387  * if parent has overridable or multi-prog, allow attaching
388  */
389 static bool hierarchy_allows_attach(struct cgroup *cgrp,
390                                     enum cgroup_bpf_attach_type atype)
391 {
392         struct cgroup *p;
393
394         p = cgroup_parent(cgrp);
395         if (!p)
396                 return true;
397         do {
398                 u32 flags = p->bpf.flags[atype];
399                 u32 cnt;
400
401                 if (flags & BPF_F_ALLOW_MULTI)
402                         return true;
403                 cnt = prog_list_length(&p->bpf.progs[atype]);
404                 WARN_ON_ONCE(cnt > 1);
405                 if (cnt == 1)
406                         return !!(flags & BPF_F_ALLOW_OVERRIDE);
407                 p = cgroup_parent(p);
408         } while (p);
409         return true;
410 }
411
412 /* compute a chain of effective programs for a given cgroup:
413  * start from the list of programs in this cgroup and add
414  * all parent programs.
415  * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
416  * to programs in this cgroup
417  */
418 static int compute_effective_progs(struct cgroup *cgrp,
419                                    enum cgroup_bpf_attach_type atype,
420                                    struct bpf_prog_array **array)
421 {
422         struct bpf_prog_array_item *item;
423         struct bpf_prog_array *progs;
424         struct bpf_prog_list *pl;
425         struct cgroup *p = cgrp;
426         int cnt = 0;
427
428         /* count number of effective programs by walking parents */
429         do {
430                 if (cnt == 0 || (p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
431                         cnt += prog_list_length(&p->bpf.progs[atype]);
432                 p = cgroup_parent(p);
433         } while (p);
434
435         progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
436         if (!progs)
437                 return -ENOMEM;
438
439         /* populate the array with effective progs */
440         cnt = 0;
441         p = cgrp;
442         do {
443                 if (cnt > 0 && !(p->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
444                         continue;
445
446                 hlist_for_each_entry(pl, &p->bpf.progs[atype], node) {
447                         if (!prog_list_prog(pl))
448                                 continue;
449
450                         item = &progs->items[cnt];
451                         item->prog = prog_list_prog(pl);
452                         bpf_cgroup_storages_assign(item->cgroup_storage,
453                                                    pl->storage);
454                         cnt++;
455                 }
456         } while ((p = cgroup_parent(p)));
457
458         *array = progs;
459         return 0;
460 }
461
462 static void activate_effective_progs(struct cgroup *cgrp,
463                                      enum cgroup_bpf_attach_type atype,
464                                      struct bpf_prog_array *old_array)
465 {
466         old_array = rcu_replace_pointer(cgrp->bpf.effective[atype], old_array,
467                                         lockdep_is_held(&cgroup_mutex));
468         /* free prog array after grace period, since __cgroup_bpf_run_*()
469          * might be still walking the array
470          */
471         bpf_prog_array_free(old_array);
472 }
473
474 /**
475  * cgroup_bpf_inherit() - inherit effective programs from parent
476  * @cgrp: the cgroup to modify
477  */
478 int cgroup_bpf_inherit(struct cgroup *cgrp)
479 {
480 /* has to use marco instead of const int, since compiler thinks
481  * that array below is variable length
482  */
483 #define NR ARRAY_SIZE(cgrp->bpf.effective)
484         struct bpf_prog_array *arrays[NR] = {};
485         struct cgroup *p;
486         int ret, i;
487
488         ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
489                               GFP_KERNEL);
490         if (ret)
491                 return ret;
492
493         for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
494                 cgroup_bpf_get(p);
495
496         for (i = 0; i < NR; i++)
497                 INIT_HLIST_HEAD(&cgrp->bpf.progs[i]);
498
499         INIT_LIST_HEAD(&cgrp->bpf.storages);
500
501         for (i = 0; i < NR; i++)
502                 if (compute_effective_progs(cgrp, i, &arrays[i]))
503                         goto cleanup;
504
505         for (i = 0; i < NR; i++)
506                 activate_effective_progs(cgrp, i, arrays[i]);
507
508         return 0;
509 cleanup:
510         for (i = 0; i < NR; i++)
511                 bpf_prog_array_free(arrays[i]);
512
513         for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
514                 cgroup_bpf_put(p);
515
516         percpu_ref_exit(&cgrp->bpf.refcnt);
517
518         return -ENOMEM;
519 }
520
521 static int update_effective_progs(struct cgroup *cgrp,
522                                   enum cgroup_bpf_attach_type atype)
523 {
524         struct cgroup_subsys_state *css;
525         int err;
526
527         /* allocate and recompute effective prog arrays */
528         css_for_each_descendant_pre(css, &cgrp->self) {
529                 struct cgroup *desc = container_of(css, struct cgroup, self);
530
531                 if (percpu_ref_is_zero(&desc->bpf.refcnt))
532                         continue;
533
534                 err = compute_effective_progs(desc, atype, &desc->bpf.inactive);
535                 if (err)
536                         goto cleanup;
537         }
538
539         /* all allocations were successful. Activate all prog arrays */
540         css_for_each_descendant_pre(css, &cgrp->self) {
541                 struct cgroup *desc = container_of(css, struct cgroup, self);
542
543                 if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
544                         if (unlikely(desc->bpf.inactive)) {
545                                 bpf_prog_array_free(desc->bpf.inactive);
546                                 desc->bpf.inactive = NULL;
547                         }
548                         continue;
549                 }
550
551                 activate_effective_progs(desc, atype, desc->bpf.inactive);
552                 desc->bpf.inactive = NULL;
553         }
554
555         return 0;
556
557 cleanup:
558         /* oom while computing effective. Free all computed effective arrays
559          * since they were not activated
560          */
561         css_for_each_descendant_pre(css, &cgrp->self) {
562                 struct cgroup *desc = container_of(css, struct cgroup, self);
563
564                 bpf_prog_array_free(desc->bpf.inactive);
565                 desc->bpf.inactive = NULL;
566         }
567
568         return err;
569 }
570
571 #define BPF_CGROUP_MAX_PROGS 64
572
573 static struct bpf_prog_list *find_attach_entry(struct hlist_head *progs,
574                                                struct bpf_prog *prog,
575                                                struct bpf_cgroup_link *link,
576                                                struct bpf_prog *replace_prog,
577                                                bool allow_multi)
578 {
579         struct bpf_prog_list *pl;
580
581         /* single-attach case */
582         if (!allow_multi) {
583                 if (hlist_empty(progs))
584                         return NULL;
585                 return hlist_entry(progs->first, typeof(*pl), node);
586         }
587
588         hlist_for_each_entry(pl, progs, node) {
589                 if (prog && pl->prog == prog && prog != replace_prog)
590                         /* disallow attaching the same prog twice */
591                         return ERR_PTR(-EINVAL);
592                 if (link && pl->link == link)
593                         /* disallow attaching the same link twice */
594                         return ERR_PTR(-EINVAL);
595         }
596
597         /* direct prog multi-attach w/ replacement case */
598         if (replace_prog) {
599                 hlist_for_each_entry(pl, progs, node) {
600                         if (pl->prog == replace_prog)
601                                 /* a match found */
602                                 return pl;
603                 }
604                 /* prog to replace not found for cgroup */
605                 return ERR_PTR(-ENOENT);
606         }
607
608         return NULL;
609 }
610
611 /**
612  * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
613  *                         propagate the change to descendants
614  * @cgrp: The cgroup which descendants to traverse
615  * @prog: A program to attach
616  * @link: A link to attach
617  * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
618  * @type: Type of attach operation
619  * @flags: Option flags
620  *
621  * Exactly one of @prog or @link can be non-null.
622  * Must be called with cgroup_mutex held.
623  */
624 static int __cgroup_bpf_attach(struct cgroup *cgrp,
625                                struct bpf_prog *prog, struct bpf_prog *replace_prog,
626                                struct bpf_cgroup_link *link,
627                                enum bpf_attach_type type, u32 flags)
628 {
629         u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
630         struct bpf_prog *old_prog = NULL;
631         struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
632         struct bpf_cgroup_storage *new_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
633         struct bpf_prog *new_prog = prog ? : link->link.prog;
634         enum cgroup_bpf_attach_type atype;
635         struct bpf_prog_list *pl;
636         struct hlist_head *progs;
637         int err;
638
639         if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
640             ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
641                 /* invalid combination */
642                 return -EINVAL;
643         if (link && (prog || replace_prog))
644                 /* only either link or prog/replace_prog can be specified */
645                 return -EINVAL;
646         if (!!replace_prog != !!(flags & BPF_F_REPLACE))
647                 /* replace_prog implies BPF_F_REPLACE, and vice versa */
648                 return -EINVAL;
649
650         atype = bpf_cgroup_atype_find(type, new_prog->aux->attach_btf_id);
651         if (atype < 0)
652                 return -EINVAL;
653
654         progs = &cgrp->bpf.progs[atype];
655
656         if (!hierarchy_allows_attach(cgrp, atype))
657                 return -EPERM;
658
659         if (!hlist_empty(progs) && cgrp->bpf.flags[atype] != saved_flags)
660                 /* Disallow attaching non-overridable on top
661                  * of existing overridable in this cgroup.
662                  * Disallow attaching multi-prog if overridable or none
663                  */
664                 return -EPERM;
665
666         if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
667                 return -E2BIG;
668
669         pl = find_attach_entry(progs, prog, link, replace_prog,
670                                flags & BPF_F_ALLOW_MULTI);
671         if (IS_ERR(pl))
672                 return PTR_ERR(pl);
673
674         if (bpf_cgroup_storages_alloc(storage, new_storage, type,
675                                       prog ? : link->link.prog, cgrp))
676                 return -ENOMEM;
677
678         if (pl) {
679                 old_prog = pl->prog;
680         } else {
681                 struct hlist_node *last = NULL;
682
683                 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
684                 if (!pl) {
685                         bpf_cgroup_storages_free(new_storage);
686                         return -ENOMEM;
687                 }
688                 if (hlist_empty(progs))
689                         hlist_add_head(&pl->node, progs);
690                 else
691                         hlist_for_each(last, progs) {
692                                 if (last->next)
693                                         continue;
694                                 hlist_add_behind(&pl->node, last);
695                                 break;
696                         }
697         }
698
699         pl->prog = prog;
700         pl->link = link;
701         bpf_cgroup_storages_assign(pl->storage, storage);
702         cgrp->bpf.flags[atype] = saved_flags;
703
704         if (type == BPF_LSM_CGROUP) {
705                 err = bpf_trampoline_link_cgroup_shim(new_prog, atype);
706                 if (err)
707                         goto cleanup;
708         }
709
710         err = update_effective_progs(cgrp, atype);
711         if (err)
712                 goto cleanup_trampoline;
713
714         if (old_prog) {
715                 if (type == BPF_LSM_CGROUP)
716                         bpf_trampoline_unlink_cgroup_shim(old_prog);
717                 bpf_prog_put(old_prog);
718         } else {
719                 static_branch_inc(&cgroup_bpf_enabled_key[atype]);
720         }
721         bpf_cgroup_storages_link(new_storage, cgrp, type);
722         return 0;
723
724 cleanup_trampoline:
725         if (type == BPF_LSM_CGROUP)
726                 bpf_trampoline_unlink_cgroup_shim(new_prog);
727
728 cleanup:
729         if (old_prog) {
730                 pl->prog = old_prog;
731                 pl->link = NULL;
732         }
733         bpf_cgroup_storages_free(new_storage);
734         if (!old_prog) {
735                 hlist_del(&pl->node);
736                 kfree(pl);
737         }
738         return err;
739 }
740
741 static int cgroup_bpf_attach(struct cgroup *cgrp,
742                              struct bpf_prog *prog, struct bpf_prog *replace_prog,
743                              struct bpf_cgroup_link *link,
744                              enum bpf_attach_type type,
745                              u32 flags)
746 {
747         int ret;
748
749         cgroup_lock();
750         ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
751         cgroup_unlock();
752         return ret;
753 }
754
755 /* Swap updated BPF program for given link in effective program arrays across
756  * all descendant cgroups. This function is guaranteed to succeed.
757  */
758 static void replace_effective_prog(struct cgroup *cgrp,
759                                    enum cgroup_bpf_attach_type atype,
760                                    struct bpf_cgroup_link *link)
761 {
762         struct bpf_prog_array_item *item;
763         struct cgroup_subsys_state *css;
764         struct bpf_prog_array *progs;
765         struct bpf_prog_list *pl;
766         struct hlist_head *head;
767         struct cgroup *cg;
768         int pos;
769
770         css_for_each_descendant_pre(css, &cgrp->self) {
771                 struct cgroup *desc = container_of(css, struct cgroup, self);
772
773                 if (percpu_ref_is_zero(&desc->bpf.refcnt))
774                         continue;
775
776                 /* find position of link in effective progs array */
777                 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
778                         if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
779                                 continue;
780
781                         head = &cg->bpf.progs[atype];
782                         hlist_for_each_entry(pl, head, node) {
783                                 if (!prog_list_prog(pl))
784                                         continue;
785                                 if (pl->link == link)
786                                         goto found;
787                                 pos++;
788                         }
789                 }
790 found:
791                 BUG_ON(!cg);
792                 progs = rcu_dereference_protected(
793                                 desc->bpf.effective[atype],
794                                 lockdep_is_held(&cgroup_mutex));
795                 item = &progs->items[pos];
796                 WRITE_ONCE(item->prog, link->link.prog);
797         }
798 }
799
800 /**
801  * __cgroup_bpf_replace() - Replace link's program and propagate the change
802  *                          to descendants
803  * @cgrp: The cgroup which descendants to traverse
804  * @link: A link for which to replace BPF program
805  * @new_prog: &struct bpf_prog for the target BPF program with its refcnt
806  *            incremented
807  *
808  * Must be called with cgroup_mutex held.
809  */
810 static int __cgroup_bpf_replace(struct cgroup *cgrp,
811                                 struct bpf_cgroup_link *link,
812                                 struct bpf_prog *new_prog)
813 {
814         enum cgroup_bpf_attach_type atype;
815         struct bpf_prog *old_prog;
816         struct bpf_prog_list *pl;
817         struct hlist_head *progs;
818         bool found = false;
819
820         atype = bpf_cgroup_atype_find(link->type, new_prog->aux->attach_btf_id);
821         if (atype < 0)
822                 return -EINVAL;
823
824         progs = &cgrp->bpf.progs[atype];
825
826         if (link->link.prog->type != new_prog->type)
827                 return -EINVAL;
828
829         hlist_for_each_entry(pl, progs, node) {
830                 if (pl->link == link) {
831                         found = true;
832                         break;
833                 }
834         }
835         if (!found)
836                 return -ENOENT;
837
838         old_prog = xchg(&link->link.prog, new_prog);
839         replace_effective_prog(cgrp, atype, link);
840         bpf_prog_put(old_prog);
841         return 0;
842 }
843
844 static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
845                               struct bpf_prog *old_prog)
846 {
847         struct bpf_cgroup_link *cg_link;
848         int ret;
849
850         cg_link = container_of(link, struct bpf_cgroup_link, link);
851
852         cgroup_lock();
853         /* link might have been auto-released by dying cgroup, so fail */
854         if (!cg_link->cgroup) {
855                 ret = -ENOLINK;
856                 goto out_unlock;
857         }
858         if (old_prog && link->prog != old_prog) {
859                 ret = -EPERM;
860                 goto out_unlock;
861         }
862         ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
863 out_unlock:
864         cgroup_unlock();
865         return ret;
866 }
867
868 static struct bpf_prog_list *find_detach_entry(struct hlist_head *progs,
869                                                struct bpf_prog *prog,
870                                                struct bpf_cgroup_link *link,
871                                                bool allow_multi)
872 {
873         struct bpf_prog_list *pl;
874
875         if (!allow_multi) {
876                 if (hlist_empty(progs))
877                         /* report error when trying to detach and nothing is attached */
878                         return ERR_PTR(-ENOENT);
879
880                 /* to maintain backward compatibility NONE and OVERRIDE cgroups
881                  * allow detaching with invalid FD (prog==NULL) in legacy mode
882                  */
883                 return hlist_entry(progs->first, typeof(*pl), node);
884         }
885
886         if (!prog && !link)
887                 /* to detach MULTI prog the user has to specify valid FD
888                  * of the program or link to be detached
889                  */
890                 return ERR_PTR(-EINVAL);
891
892         /* find the prog or link and detach it */
893         hlist_for_each_entry(pl, progs, node) {
894                 if (pl->prog == prog && pl->link == link)
895                         return pl;
896         }
897         return ERR_PTR(-ENOENT);
898 }
899
900 /**
901  * purge_effective_progs() - After compute_effective_progs fails to alloc new
902  *                           cgrp->bpf.inactive table we can recover by
903  *                           recomputing the array in place.
904  *
905  * @cgrp: The cgroup which descendants to travers
906  * @prog: A program to detach or NULL
907  * @link: A link to detach or NULL
908  * @atype: Type of detach operation
909  */
910 static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
911                                   struct bpf_cgroup_link *link,
912                                   enum cgroup_bpf_attach_type atype)
913 {
914         struct cgroup_subsys_state *css;
915         struct bpf_prog_array *progs;
916         struct bpf_prog_list *pl;
917         struct hlist_head *head;
918         struct cgroup *cg;
919         int pos;
920
921         /* recompute effective prog array in place */
922         css_for_each_descendant_pre(css, &cgrp->self) {
923                 struct cgroup *desc = container_of(css, struct cgroup, self);
924
925                 if (percpu_ref_is_zero(&desc->bpf.refcnt))
926                         continue;
927
928                 /* find position of link or prog in effective progs array */
929                 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
930                         if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
931                                 continue;
932
933                         head = &cg->bpf.progs[atype];
934                         hlist_for_each_entry(pl, head, node) {
935                                 if (!prog_list_prog(pl))
936                                         continue;
937                                 if (pl->prog == prog && pl->link == link)
938                                         goto found;
939                                 pos++;
940                         }
941                 }
942
943                 /* no link or prog match, skip the cgroup of this layer */
944                 continue;
945 found:
946                 progs = rcu_dereference_protected(
947                                 desc->bpf.effective[atype],
948                                 lockdep_is_held(&cgroup_mutex));
949
950                 /* Remove the program from the array */
951                 WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
952                           "Failed to purge a prog from array at index %d", pos);
953         }
954 }
955
956 /**
957  * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
958  *                         propagate the change to descendants
959  * @cgrp: The cgroup which descendants to traverse
960  * @prog: A program to detach or NULL
961  * @link: A link to detach or NULL
962  * @type: Type of detach operation
963  *
964  * At most one of @prog or @link can be non-NULL.
965  * Must be called with cgroup_mutex held.
966  */
967 static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
968                                struct bpf_cgroup_link *link, enum bpf_attach_type type)
969 {
970         enum cgroup_bpf_attach_type atype;
971         struct bpf_prog *old_prog;
972         struct bpf_prog_list *pl;
973         struct hlist_head *progs;
974         u32 attach_btf_id = 0;
975         u32 flags;
976
977         if (prog)
978                 attach_btf_id = prog->aux->attach_btf_id;
979         if (link)
980                 attach_btf_id = link->link.prog->aux->attach_btf_id;
981
982         atype = bpf_cgroup_atype_find(type, attach_btf_id);
983         if (atype < 0)
984                 return -EINVAL;
985
986         progs = &cgrp->bpf.progs[atype];
987         flags = cgrp->bpf.flags[atype];
988
989         if (prog && link)
990                 /* only one of prog or link can be specified */
991                 return -EINVAL;
992
993         pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
994         if (IS_ERR(pl))
995                 return PTR_ERR(pl);
996
997         /* mark it deleted, so it's ignored while recomputing effective */
998         old_prog = pl->prog;
999         pl->prog = NULL;
1000         pl->link = NULL;
1001
1002         if (update_effective_progs(cgrp, atype)) {
1003                 /* if update effective array failed replace the prog with a dummy prog*/
1004                 pl->prog = old_prog;
1005                 pl->link = link;
1006                 purge_effective_progs(cgrp, old_prog, link, atype);
1007         }
1008
1009         /* now can actually delete it from this cgroup list */
1010         hlist_del(&pl->node);
1011
1012         kfree(pl);
1013         if (hlist_empty(progs))
1014                 /* last program was detached, reset flags to zero */
1015                 cgrp->bpf.flags[atype] = 0;
1016         if (old_prog) {
1017                 if (type == BPF_LSM_CGROUP)
1018                         bpf_trampoline_unlink_cgroup_shim(old_prog);
1019                 bpf_prog_put(old_prog);
1020         }
1021         static_branch_dec(&cgroup_bpf_enabled_key[atype]);
1022         return 0;
1023 }
1024
1025 static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
1026                              enum bpf_attach_type type)
1027 {
1028         int ret;
1029
1030         cgroup_lock();
1031         ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
1032         cgroup_unlock();
1033         return ret;
1034 }
1035
1036 /* Must be called with cgroup_mutex held to avoid races. */
1037 static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1038                               union bpf_attr __user *uattr)
1039 {
1040         __u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
1041         bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
1042         __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
1043         enum bpf_attach_type type = attr->query.attach_type;
1044         enum cgroup_bpf_attach_type from_atype, to_atype;
1045         enum cgroup_bpf_attach_type atype;
1046         struct bpf_prog_array *effective;
1047         int cnt, ret = 0, i;
1048         int total_cnt = 0;
1049         u32 flags;
1050
1051         if (effective_query && prog_attach_flags)
1052                 return -EINVAL;
1053
1054         if (type == BPF_LSM_CGROUP) {
1055                 if (!effective_query && attr->query.prog_cnt &&
1056                     prog_ids && !prog_attach_flags)
1057                         return -EINVAL;
1058
1059                 from_atype = CGROUP_LSM_START;
1060                 to_atype = CGROUP_LSM_END;
1061                 flags = 0;
1062         } else {
1063                 from_atype = to_cgroup_bpf_attach_type(type);
1064                 if (from_atype < 0)
1065                         return -EINVAL;
1066                 to_atype = from_atype;
1067                 flags = cgrp->bpf.flags[from_atype];
1068         }
1069
1070         for (atype = from_atype; atype <= to_atype; atype++) {
1071                 if (effective_query) {
1072                         effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1073                                                               lockdep_is_held(&cgroup_mutex));
1074                         total_cnt += bpf_prog_array_length(effective);
1075                 } else {
1076                         total_cnt += prog_list_length(&cgrp->bpf.progs[atype]);
1077                 }
1078         }
1079
1080         /* always output uattr->query.attach_flags as 0 during effective query */
1081         flags = effective_query ? 0 : flags;
1082         if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
1083                 return -EFAULT;
1084         if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
1085                 return -EFAULT;
1086         if (attr->query.prog_cnt == 0 || !prog_ids || !total_cnt)
1087                 /* return early if user requested only program count + flags */
1088                 return 0;
1089
1090         if (attr->query.prog_cnt < total_cnt) {
1091                 total_cnt = attr->query.prog_cnt;
1092                 ret = -ENOSPC;
1093         }
1094
1095         for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
1096                 if (effective_query) {
1097                         effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
1098                                                               lockdep_is_held(&cgroup_mutex));
1099                         cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
1100                         ret = bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
1101                 } else {
1102                         struct hlist_head *progs;
1103                         struct bpf_prog_list *pl;
1104                         struct bpf_prog *prog;
1105                         u32 id;
1106
1107                         progs = &cgrp->bpf.progs[atype];
1108                         cnt = min_t(int, prog_list_length(progs), total_cnt);
1109                         i = 0;
1110                         hlist_for_each_entry(pl, progs, node) {
1111                                 prog = prog_list_prog(pl);
1112                                 id = prog->aux->id;
1113                                 if (copy_to_user(prog_ids + i, &id, sizeof(id)))
1114                                         return -EFAULT;
1115                                 if (++i == cnt)
1116                                         break;
1117                         }
1118
1119                         if (prog_attach_flags) {
1120                                 flags = cgrp->bpf.flags[atype];
1121
1122                                 for (i = 0; i < cnt; i++)
1123                                         if (copy_to_user(prog_attach_flags + i,
1124                                                          &flags, sizeof(flags)))
1125                                                 return -EFAULT;
1126                                 prog_attach_flags += cnt;
1127                         }
1128                 }
1129
1130                 prog_ids += cnt;
1131                 total_cnt -= cnt;
1132         }
1133         return ret;
1134 }
1135
1136 static int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
1137                             union bpf_attr __user *uattr)
1138 {
1139         int ret;
1140
1141         cgroup_lock();
1142         ret = __cgroup_bpf_query(cgrp, attr, uattr);
1143         cgroup_unlock();
1144         return ret;
1145 }
1146
1147 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
1148                            enum bpf_prog_type ptype, struct bpf_prog *prog)
1149 {
1150         struct bpf_prog *replace_prog = NULL;
1151         struct cgroup *cgrp;
1152         int ret;
1153
1154         cgrp = cgroup_get_from_fd(attr->target_fd);
1155         if (IS_ERR(cgrp))
1156                 return PTR_ERR(cgrp);
1157
1158         if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
1159             (attr->attach_flags & BPF_F_REPLACE)) {
1160                 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
1161                 if (IS_ERR(replace_prog)) {
1162                         cgroup_put(cgrp);
1163                         return PTR_ERR(replace_prog);
1164                 }
1165         }
1166
1167         ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
1168                                 attr->attach_type, attr->attach_flags);
1169
1170         if (replace_prog)
1171                 bpf_prog_put(replace_prog);
1172         cgroup_put(cgrp);
1173         return ret;
1174 }
1175
1176 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
1177 {
1178         struct bpf_prog *prog;
1179         struct cgroup *cgrp;
1180         int ret;
1181
1182         cgrp = cgroup_get_from_fd(attr->target_fd);
1183         if (IS_ERR(cgrp))
1184                 return PTR_ERR(cgrp);
1185
1186         prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
1187         if (IS_ERR(prog))
1188                 prog = NULL;
1189
1190         ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
1191         if (prog)
1192                 bpf_prog_put(prog);
1193
1194         cgroup_put(cgrp);
1195         return ret;
1196 }
1197
1198 static void bpf_cgroup_link_release(struct bpf_link *link)
1199 {
1200         struct bpf_cgroup_link *cg_link =
1201                 container_of(link, struct bpf_cgroup_link, link);
1202         struct cgroup *cg;
1203
1204         /* link might have been auto-detached by dying cgroup already,
1205          * in that case our work is done here
1206          */
1207         if (!cg_link->cgroup)
1208                 return;
1209
1210         cgroup_lock();
1211
1212         /* re-check cgroup under lock again */
1213         if (!cg_link->cgroup) {
1214                 cgroup_unlock();
1215                 return;
1216         }
1217
1218         WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
1219                                     cg_link->type));
1220         if (cg_link->type == BPF_LSM_CGROUP)
1221                 bpf_trampoline_unlink_cgroup_shim(cg_link->link.prog);
1222
1223         cg = cg_link->cgroup;
1224         cg_link->cgroup = NULL;
1225
1226         cgroup_unlock();
1227
1228         cgroup_put(cg);
1229 }
1230
1231 static void bpf_cgroup_link_dealloc(struct bpf_link *link)
1232 {
1233         struct bpf_cgroup_link *cg_link =
1234                 container_of(link, struct bpf_cgroup_link, link);
1235
1236         kfree(cg_link);
1237 }
1238
1239 static int bpf_cgroup_link_detach(struct bpf_link *link)
1240 {
1241         bpf_cgroup_link_release(link);
1242
1243         return 0;
1244 }
1245
1246 static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
1247                                         struct seq_file *seq)
1248 {
1249         struct bpf_cgroup_link *cg_link =
1250                 container_of(link, struct bpf_cgroup_link, link);
1251         u64 cg_id = 0;
1252
1253         cgroup_lock();
1254         if (cg_link->cgroup)
1255                 cg_id = cgroup_id(cg_link->cgroup);
1256         cgroup_unlock();
1257
1258         seq_printf(seq,
1259                    "cgroup_id:\t%llu\n"
1260                    "attach_type:\t%d\n",
1261                    cg_id,
1262                    cg_link->type);
1263 }
1264
1265 static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
1266                                           struct bpf_link_info *info)
1267 {
1268         struct bpf_cgroup_link *cg_link =
1269                 container_of(link, struct bpf_cgroup_link, link);
1270         u64 cg_id = 0;
1271
1272         cgroup_lock();
1273         if (cg_link->cgroup)
1274                 cg_id = cgroup_id(cg_link->cgroup);
1275         cgroup_unlock();
1276
1277         info->cgroup.cgroup_id = cg_id;
1278         info->cgroup.attach_type = cg_link->type;
1279         return 0;
1280 }
1281
1282 static const struct bpf_link_ops bpf_cgroup_link_lops = {
1283         .release = bpf_cgroup_link_release,
1284         .dealloc = bpf_cgroup_link_dealloc,
1285         .detach = bpf_cgroup_link_detach,
1286         .update_prog = cgroup_bpf_replace,
1287         .show_fdinfo = bpf_cgroup_link_show_fdinfo,
1288         .fill_link_info = bpf_cgroup_link_fill_link_info,
1289 };
1290
1291 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
1292 {
1293         struct bpf_link_primer link_primer;
1294         struct bpf_cgroup_link *link;
1295         struct cgroup *cgrp;
1296         int err;
1297
1298         if (attr->link_create.flags)
1299                 return -EINVAL;
1300
1301         cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
1302         if (IS_ERR(cgrp))
1303                 return PTR_ERR(cgrp);
1304
1305         link = kzalloc(sizeof(*link), GFP_USER);
1306         if (!link) {
1307                 err = -ENOMEM;
1308                 goto out_put_cgroup;
1309         }
1310         bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops,
1311                       prog);
1312         link->cgroup = cgrp;
1313         link->type = attr->link_create.attach_type;
1314
1315         err = bpf_link_prime(&link->link, &link_primer);
1316         if (err) {
1317                 kfree(link);
1318                 goto out_put_cgroup;
1319         }
1320
1321         err = cgroup_bpf_attach(cgrp, NULL, NULL, link,
1322                                 link->type, BPF_F_ALLOW_MULTI);
1323         if (err) {
1324                 bpf_link_cleanup(&link_primer);
1325                 goto out_put_cgroup;
1326         }
1327
1328         return bpf_link_settle(&link_primer);
1329
1330 out_put_cgroup:
1331         cgroup_put(cgrp);
1332         return err;
1333 }
1334
1335 int cgroup_bpf_prog_query(const union bpf_attr *attr,
1336                           union bpf_attr __user *uattr)
1337 {
1338         struct cgroup *cgrp;
1339         int ret;
1340
1341         cgrp = cgroup_get_from_fd(attr->query.target_fd);
1342         if (IS_ERR(cgrp))
1343                 return PTR_ERR(cgrp);
1344
1345         ret = cgroup_bpf_query(cgrp, attr, uattr);
1346
1347         cgroup_put(cgrp);
1348         return ret;
1349 }
1350
1351 /**
1352  * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
1353  * @sk: The socket sending or receiving traffic
1354  * @skb: The skb that is being sent or received
1355  * @atype: The type of program to be executed
1356  *
1357  * If no socket is passed, or the socket is not of type INET or INET6,
1358  * this function does nothing and returns 0.
1359  *
1360  * The program type passed in via @type must be suitable for network
1361  * filtering. No further check is performed to assert that.
1362  *
1363  * For egress packets, this function can return:
1364  *   NET_XMIT_SUCCESS    (0)    - continue with packet output
1365  *   NET_XMIT_DROP       (1)    - drop packet and notify TCP to call cwr
1366  *   NET_XMIT_CN         (2)    - continue with packet output and notify TCP
1367  *                                to call cwr
1368  *   -err                       - drop packet
1369  *
1370  * For ingress packets, this function will return -EPERM if any
1371  * attached program was found and if it returned != 1 during execution.
1372  * Otherwise 0 is returned.
1373  */
1374 int __cgroup_bpf_run_filter_skb(struct sock *sk,
1375                                 struct sk_buff *skb,
1376                                 enum cgroup_bpf_attach_type atype)
1377 {
1378         unsigned int offset = -skb_network_offset(skb);
1379         struct sock *save_sk;
1380         void *saved_data_end;
1381         struct cgroup *cgrp;
1382         int ret;
1383
1384         if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
1385                 return 0;
1386
1387         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1388         save_sk = skb->sk;
1389         skb->sk = sk;
1390         __skb_push(skb, offset);
1391
1392         /* compute pointers for the bpf prog */
1393         bpf_compute_and_save_data_end(skb, &saved_data_end);
1394
1395         if (atype == CGROUP_INET_EGRESS) {
1396                 u32 flags = 0;
1397                 bool cn;
1398
1399                 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, skb,
1400                                             __bpf_prog_run_save_cb, 0, &flags);
1401
1402                 /* Return values of CGROUP EGRESS BPF programs are:
1403                  *   0: drop packet
1404                  *   1: keep packet
1405                  *   2: drop packet and cn
1406                  *   3: keep packet and cn
1407                  *
1408                  * The returned value is then converted to one of the NET_XMIT
1409                  * or an error code that is then interpreted as drop packet
1410                  * (and no cn):
1411                  *   0: NET_XMIT_SUCCESS  skb should be transmitted
1412                  *   1: NET_XMIT_DROP     skb should be dropped and cn
1413                  *   2: NET_XMIT_CN       skb should be transmitted and cn
1414                  *   3: -err              skb should be dropped
1415                  */
1416
1417                 cn = flags & BPF_RET_SET_CN;
1418                 if (ret && !IS_ERR_VALUE((long)ret))
1419                         ret = -EFAULT;
1420                 if (!ret)
1421                         ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
1422                 else
1423                         ret = (cn ? NET_XMIT_DROP : ret);
1424         } else {
1425                 ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
1426                                             skb, __bpf_prog_run_save_cb, 0,
1427                                             NULL);
1428                 if (ret && !IS_ERR_VALUE((long)ret))
1429                         ret = -EFAULT;
1430         }
1431         bpf_restore_data_end(skb, saved_data_end);
1432         __skb_pull(skb, offset);
1433         skb->sk = save_sk;
1434
1435         return ret;
1436 }
1437 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
1438
1439 /**
1440  * __cgroup_bpf_run_filter_sk() - Run a program on a sock
1441  * @sk: sock structure to manipulate
1442  * @atype: The type of program to be executed
1443  *
1444  * socket is passed is expected to be of type INET or INET6.
1445  *
1446  * The program type passed in via @type must be suitable for sock
1447  * filtering. No further check is performed to assert that.
1448  *
1449  * This function will return %-EPERM if any if an attached program was found
1450  * and if it returned != 1 during execution. In all other cases, 0 is returned.
1451  */
1452 int __cgroup_bpf_run_filter_sk(struct sock *sk,
1453                                enum cgroup_bpf_attach_type atype)
1454 {
1455         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1456
1457         return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0,
1458                                      NULL);
1459 }
1460 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
1461
1462 /**
1463  * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
1464  *                                       provided by user sockaddr
1465  * @sk: sock struct that will use sockaddr
1466  * @uaddr: sockaddr struct provided by user
1467  * @uaddrlen: Pointer to the size of the sockaddr struct provided by user. It is
1468  *            read-only for AF_INET[6] uaddr but can be modified for AF_UNIX
1469  *            uaddr.
1470  * @atype: The type of program to be executed
1471  * @t_ctx: Pointer to attach type specific context
1472  * @flags: Pointer to u32 which contains higher bits of BPF program
1473  *         return value (OR'ed together).
1474  *
1475  * socket is expected to be of type INET, INET6 or UNIX.
1476  *
1477  * This function will return %-EPERM if an attached program is found and
1478  * returned value != 1 during execution. In all other cases, 0 is returned.
1479  */
1480 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
1481                                       struct sockaddr *uaddr,
1482                                       int *uaddrlen,
1483                                       enum cgroup_bpf_attach_type atype,
1484                                       void *t_ctx,
1485                                       u32 *flags)
1486 {
1487         struct bpf_sock_addr_kern ctx = {
1488                 .sk = sk,
1489                 .uaddr = uaddr,
1490                 .t_ctx = t_ctx,
1491         };
1492         struct sockaddr_storage unspec;
1493         struct cgroup *cgrp;
1494         int ret;
1495
1496         /* Check socket family since not all sockets represent network
1497          * endpoint (e.g. AF_UNIX).
1498          */
1499         if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6 &&
1500             sk->sk_family != AF_UNIX)
1501                 return 0;
1502
1503         if (!ctx.uaddr) {
1504                 memset(&unspec, 0, sizeof(unspec));
1505                 ctx.uaddr = (struct sockaddr *)&unspec;
1506                 ctx.uaddrlen = 0;
1507         } else {
1508                 ctx.uaddrlen = *uaddrlen;
1509         }
1510
1511         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1512         ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run,
1513                                     0, flags);
1514
1515         if (!ret && uaddr)
1516                 *uaddrlen = ctx.uaddrlen;
1517
1518         return ret;
1519 }
1520 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
1521
1522 /**
1523  * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
1524  * @sk: socket to get cgroup from
1525  * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
1526  * sk with connection information (IP addresses, etc.) May not contain
1527  * cgroup info if it is a req sock.
1528  * @atype: The type of program to be executed
1529  *
1530  * socket passed is expected to be of type INET or INET6.
1531  *
1532  * The program type passed in via @type must be suitable for sock_ops
1533  * filtering. No further check is performed to assert that.
1534  *
1535  * This function will return %-EPERM if any if an attached program was found
1536  * and if it returned != 1 during execution. In all other cases, 0 is returned.
1537  */
1538 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
1539                                      struct bpf_sock_ops_kern *sock_ops,
1540                                      enum cgroup_bpf_attach_type atype)
1541 {
1542         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1543
1544         return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
1545                                      0, NULL);
1546 }
1547 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
1548
1549 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
1550                                       short access, enum cgroup_bpf_attach_type atype)
1551 {
1552         struct cgroup *cgrp;
1553         struct bpf_cgroup_dev_ctx ctx = {
1554                 .access_type = (access << 16) | dev_type,
1555                 .major = major,
1556                 .minor = minor,
1557         };
1558         int ret;
1559
1560         rcu_read_lock();
1561         cgrp = task_dfl_cgroup(current);
1562         ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1563                                     NULL);
1564         rcu_read_unlock();
1565
1566         return ret;
1567 }
1568
1569 BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
1570 {
1571         /* flags argument is not used now,
1572          * but provides an ability to extend the API.
1573          * verifier checks that its value is correct.
1574          */
1575         enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
1576         struct bpf_cgroup_storage *storage;
1577         struct bpf_cg_run_ctx *ctx;
1578         void *ptr;
1579
1580         /* get current cgroup storage from BPF run context */
1581         ctx = container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1582         storage = ctx->prog_item->cgroup_storage[stype];
1583
1584         if (stype == BPF_CGROUP_STORAGE_SHARED)
1585                 ptr = &READ_ONCE(storage->buf)->data[0];
1586         else
1587                 ptr = this_cpu_ptr(storage->percpu_buf);
1588
1589         return (unsigned long)ptr;
1590 }
1591
1592 const struct bpf_func_proto bpf_get_local_storage_proto = {
1593         .func           = bpf_get_local_storage,
1594         .gpl_only       = false,
1595         .ret_type       = RET_PTR_TO_MAP_VALUE,
1596         .arg1_type      = ARG_CONST_MAP_PTR,
1597         .arg2_type      = ARG_ANYTHING,
1598 };
1599
1600 BPF_CALL_0(bpf_get_retval)
1601 {
1602         struct bpf_cg_run_ctx *ctx =
1603                 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1604
1605         return ctx->retval;
1606 }
1607
1608 const struct bpf_func_proto bpf_get_retval_proto = {
1609         .func           = bpf_get_retval,
1610         .gpl_only       = false,
1611         .ret_type       = RET_INTEGER,
1612 };
1613
1614 BPF_CALL_1(bpf_set_retval, int, retval)
1615 {
1616         struct bpf_cg_run_ctx *ctx =
1617                 container_of(current->bpf_ctx, struct bpf_cg_run_ctx, run_ctx);
1618
1619         ctx->retval = retval;
1620         return 0;
1621 }
1622
1623 const struct bpf_func_proto bpf_set_retval_proto = {
1624         .func           = bpf_set_retval,
1625         .gpl_only       = false,
1626         .ret_type       = RET_INTEGER,
1627         .arg1_type      = ARG_ANYTHING,
1628 };
1629
1630 static const struct bpf_func_proto *
1631 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1632 {
1633         const struct bpf_func_proto *func_proto;
1634
1635         func_proto = cgroup_common_func_proto(func_id, prog);
1636         if (func_proto)
1637                 return func_proto;
1638
1639         func_proto = cgroup_current_func_proto(func_id, prog);
1640         if (func_proto)
1641                 return func_proto;
1642
1643         switch (func_id) {
1644         case BPF_FUNC_perf_event_output:
1645                 return &bpf_event_output_data_proto;
1646         default:
1647                 return bpf_base_func_proto(func_id, prog);
1648         }
1649 }
1650
1651 static bool cgroup_dev_is_valid_access(int off, int size,
1652                                        enum bpf_access_type type,
1653                                        const struct bpf_prog *prog,
1654                                        struct bpf_insn_access_aux *info)
1655 {
1656         const int size_default = sizeof(__u32);
1657
1658         if (type == BPF_WRITE)
1659                 return false;
1660
1661         if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
1662                 return false;
1663         /* The verifier guarantees that size > 0. */
1664         if (off % size != 0)
1665                 return false;
1666
1667         switch (off) {
1668         case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
1669                 bpf_ctx_record_field_size(info, size_default);
1670                 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
1671                         return false;
1672                 break;
1673         default:
1674                 if (size != size_default)
1675                         return false;
1676         }
1677
1678         return true;
1679 }
1680
1681 const struct bpf_prog_ops cg_dev_prog_ops = {
1682 };
1683
1684 const struct bpf_verifier_ops cg_dev_verifier_ops = {
1685         .get_func_proto         = cgroup_dev_func_proto,
1686         .is_valid_access        = cgroup_dev_is_valid_access,
1687 };
1688
1689 /**
1690  * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
1691  *
1692  * @head: sysctl table header
1693  * @table: sysctl table
1694  * @write: sysctl is being read (= 0) or written (= 1)
1695  * @buf: pointer to buffer (in and out)
1696  * @pcount: value-result argument: value is size of buffer pointed to by @buf,
1697  *      result is size of @new_buf if program set new value, initial value
1698  *      otherwise
1699  * @ppos: value-result argument: value is position at which read from or write
1700  *      to sysctl is happening, result is new position if program overrode it,
1701  *      initial value otherwise
1702  * @atype: type of program to be executed
1703  *
1704  * Program is run when sysctl is being accessed, either read or written, and
1705  * can allow or deny such access.
1706  *
1707  * This function will return %-EPERM if an attached program is found and
1708  * returned value != 1 during execution. In all other cases 0 is returned.
1709  */
1710 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
1711                                    const struct ctl_table *table, int write,
1712                                    char **buf, size_t *pcount, loff_t *ppos,
1713                                    enum cgroup_bpf_attach_type atype)
1714 {
1715         struct bpf_sysctl_kern ctx = {
1716                 .head = head,
1717                 .table = table,
1718                 .write = write,
1719                 .ppos = ppos,
1720                 .cur_val = NULL,
1721                 .cur_len = PAGE_SIZE,
1722                 .new_val = NULL,
1723                 .new_len = 0,
1724                 .new_updated = 0,
1725         };
1726         struct cgroup *cgrp;
1727         loff_t pos = 0;
1728         int ret;
1729
1730         ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
1731         if (!ctx.cur_val ||
1732             table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) {
1733                 /* Let BPF program decide how to proceed. */
1734                 ctx.cur_len = 0;
1735         }
1736
1737         if (write && *buf && *pcount) {
1738                 /* BPF program should be able to override new value with a
1739                  * buffer bigger than provided by user.
1740                  */
1741                 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
1742                 ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
1743                 if (ctx.new_val) {
1744                         memcpy(ctx.new_val, *buf, ctx.new_len);
1745                 } else {
1746                         /* Let BPF program decide how to proceed. */
1747                         ctx.new_len = 0;
1748                 }
1749         }
1750
1751         rcu_read_lock();
1752         cgrp = task_dfl_cgroup(current);
1753         ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0,
1754                                     NULL);
1755         rcu_read_unlock();
1756
1757         kfree(ctx.cur_val);
1758
1759         if (ret == 1 && ctx.new_updated) {
1760                 kfree(*buf);
1761                 *buf = ctx.new_val;
1762                 *pcount = ctx.new_len;
1763         } else {
1764                 kfree(ctx.new_val);
1765         }
1766
1767         return ret;
1768 }
1769
1770 #ifdef CONFIG_NET
1771 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen,
1772                              struct bpf_sockopt_buf *buf)
1773 {
1774         if (unlikely(max_optlen < 0))
1775                 return -EINVAL;
1776
1777         if (unlikely(max_optlen > PAGE_SIZE)) {
1778                 /* We don't expose optvals that are greater than PAGE_SIZE
1779                  * to the BPF program.
1780                  */
1781                 max_optlen = PAGE_SIZE;
1782         }
1783
1784         if (max_optlen <= sizeof(buf->data)) {
1785                 /* When the optval fits into BPF_SOCKOPT_KERN_BUF_SIZE
1786                  * bytes avoid the cost of kzalloc.
1787                  */
1788                 ctx->optval = buf->data;
1789                 ctx->optval_end = ctx->optval + max_optlen;
1790                 return max_optlen;
1791         }
1792
1793         ctx->optval = kzalloc(max_optlen, GFP_USER);
1794         if (!ctx->optval)
1795                 return -ENOMEM;
1796
1797         ctx->optval_end = ctx->optval + max_optlen;
1798
1799         return max_optlen;
1800 }
1801
1802 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx,
1803                              struct bpf_sockopt_buf *buf)
1804 {
1805         if (ctx->optval == buf->data)
1806                 return;
1807         kfree(ctx->optval);
1808 }
1809
1810 static bool sockopt_buf_allocated(struct bpf_sockopt_kern *ctx,
1811                                   struct bpf_sockopt_buf *buf)
1812 {
1813         return ctx->optval != buf->data;
1814 }
1815
1816 int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
1817                                        int *optname, sockptr_t optval,
1818                                        int *optlen, char **kernel_optval)
1819 {
1820         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1821         struct bpf_sockopt_buf buf = {};
1822         struct bpf_sockopt_kern ctx = {
1823                 .sk = sk,
1824                 .level = *level,
1825                 .optname = *optname,
1826         };
1827         int ret, max_optlen;
1828
1829         /* Allocate a bit more than the initial user buffer for
1830          * BPF program. The canonical use case is overriding
1831          * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
1832          */
1833         max_optlen = max_t(int, 16, *optlen);
1834         max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1835         if (max_optlen < 0)
1836                 return max_optlen;
1837
1838         ctx.optlen = *optlen;
1839
1840         if (copy_from_sockptr(ctx.optval, optval,
1841                               min(*optlen, max_optlen))) {
1842                 ret = -EFAULT;
1843                 goto out;
1844         }
1845
1846         lock_sock(sk);
1847         ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
1848                                     &ctx, bpf_prog_run, 0, NULL);
1849         release_sock(sk);
1850
1851         if (ret)
1852                 goto out;
1853
1854         if (ctx.optlen == -1) {
1855                 /* optlen set to -1, bypass kernel */
1856                 ret = 1;
1857         } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
1858                 /* optlen is out of bounds */
1859                 if (*optlen > PAGE_SIZE && ctx.optlen >= 0) {
1860                         pr_info_once("bpf setsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
1861                                      ctx.optlen, max_optlen);
1862                         ret = 0;
1863                         goto out;
1864                 }
1865                 ret = -EFAULT;
1866         } else {
1867                 /* optlen within bounds, run kernel handler */
1868                 ret = 0;
1869
1870                 /* export any potential modifications */
1871                 *level = ctx.level;
1872                 *optname = ctx.optname;
1873
1874                 /* optlen == 0 from BPF indicates that we should
1875                  * use original userspace data.
1876                  */
1877                 if (ctx.optlen != 0) {
1878                         *optlen = ctx.optlen;
1879                         /* We've used bpf_sockopt_kern->buf as an intermediary
1880                          * storage, but the BPF program indicates that we need
1881                          * to pass this data to the kernel setsockopt handler.
1882                          * No way to export on-stack buf, have to allocate a
1883                          * new buffer.
1884                          */
1885                         if (!sockopt_buf_allocated(&ctx, &buf)) {
1886                                 void *p = kmalloc(ctx.optlen, GFP_USER);
1887
1888                                 if (!p) {
1889                                         ret = -ENOMEM;
1890                                         goto out;
1891                                 }
1892                                 memcpy(p, ctx.optval, ctx.optlen);
1893                                 *kernel_optval = p;
1894                         } else {
1895                                 *kernel_optval = ctx.optval;
1896                         }
1897                         /* export and don't free sockopt buf */
1898                         return 0;
1899                 }
1900         }
1901
1902 out:
1903         sockopt_free_buf(&ctx, &buf);
1904         return ret;
1905 }
1906
1907 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
1908                                        int optname, sockptr_t optval,
1909                                        sockptr_t optlen, int max_optlen,
1910                                        int retval)
1911 {
1912         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1913         struct bpf_sockopt_buf buf = {};
1914         struct bpf_sockopt_kern ctx = {
1915                 .sk = sk,
1916                 .level = level,
1917                 .optname = optname,
1918                 .current_task = current,
1919         };
1920         int orig_optlen;
1921         int ret;
1922
1923         orig_optlen = max_optlen;
1924         ctx.optlen = max_optlen;
1925         max_optlen = sockopt_alloc_buf(&ctx, max_optlen, &buf);
1926         if (max_optlen < 0)
1927                 return max_optlen;
1928
1929         if (!retval) {
1930                 /* If kernel getsockopt finished successfully,
1931                  * copy whatever was returned to the user back
1932                  * into our temporary buffer. Set optlen to the
1933                  * one that kernel returned as well to let
1934                  * BPF programs inspect the value.
1935                  */
1936                 if (copy_from_sockptr(&ctx.optlen, optlen,
1937                                       sizeof(ctx.optlen))) {
1938                         ret = -EFAULT;
1939                         goto out;
1940                 }
1941
1942                 if (ctx.optlen < 0) {
1943                         ret = -EFAULT;
1944                         goto out;
1945                 }
1946                 orig_optlen = ctx.optlen;
1947
1948                 if (copy_from_sockptr(ctx.optval, optval,
1949                                       min(ctx.optlen, max_optlen))) {
1950                         ret = -EFAULT;
1951                         goto out;
1952                 }
1953         }
1954
1955         lock_sock(sk);
1956         ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
1957                                     &ctx, bpf_prog_run, retval, NULL);
1958         release_sock(sk);
1959
1960         if (ret < 0)
1961                 goto out;
1962
1963         if (!sockptr_is_null(optval) &&
1964             (ctx.optlen > max_optlen || ctx.optlen < 0)) {
1965                 if (orig_optlen > PAGE_SIZE && ctx.optlen >= 0) {
1966                         pr_info_once("bpf getsockopt: ignoring program buffer with optlen=%d (max_optlen=%d)\n",
1967                                      ctx.optlen, max_optlen);
1968                         ret = retval;
1969                         goto out;
1970                 }
1971                 ret = -EFAULT;
1972                 goto out;
1973         }
1974
1975         if (ctx.optlen != 0) {
1976                 if (!sockptr_is_null(optval) &&
1977                     copy_to_sockptr(optval, ctx.optval, ctx.optlen)) {
1978                         ret = -EFAULT;
1979                         goto out;
1980                 }
1981                 if (copy_to_sockptr(optlen, &ctx.optlen, sizeof(ctx.optlen))) {
1982                         ret = -EFAULT;
1983                         goto out;
1984                 }
1985         }
1986
1987 out:
1988         sockopt_free_buf(&ctx, &buf);
1989         return ret;
1990 }
1991
1992 int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
1993                                             int optname, void *optval,
1994                                             int *optlen, int retval)
1995 {
1996         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1997         struct bpf_sockopt_kern ctx = {
1998                 .sk = sk,
1999                 .level = level,
2000                 .optname = optname,
2001                 .optlen = *optlen,
2002                 .optval = optval,
2003                 .optval_end = optval + *optlen,
2004                 .current_task = current,
2005         };
2006         int ret;
2007
2008         /* Note that __cgroup_bpf_run_filter_getsockopt doesn't copy
2009          * user data back into BPF buffer when reval != 0. This is
2010          * done as an optimization to avoid extra copy, assuming
2011          * kernel won't populate the data in case of an error.
2012          * Here we always pass the data and memset() should
2013          * be called if that data shouldn't be "exported".
2014          */
2015
2016         ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
2017                                     &ctx, bpf_prog_run, retval, NULL);
2018         if (ret < 0)
2019                 return ret;
2020
2021         if (ctx.optlen > *optlen)
2022                 return -EFAULT;
2023
2024         /* BPF programs can shrink the buffer, export the modifications.
2025          */
2026         if (ctx.optlen != 0)
2027                 *optlen = ctx.optlen;
2028
2029         return ret;
2030 }
2031 #endif
2032
2033 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
2034                               size_t *lenp)
2035 {
2036         ssize_t tmp_ret = 0, ret;
2037
2038         if (dir->header.parent) {
2039                 tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
2040                 if (tmp_ret < 0)
2041                         return tmp_ret;
2042         }
2043
2044         ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
2045         if (ret < 0)
2046                 return ret;
2047         *bufp += ret;
2048         *lenp -= ret;
2049         ret += tmp_ret;
2050
2051         /* Avoid leading slash. */
2052         if (!ret)
2053                 return ret;
2054
2055         tmp_ret = strscpy(*bufp, "/", *lenp);
2056         if (tmp_ret < 0)
2057                 return tmp_ret;
2058         *bufp += tmp_ret;
2059         *lenp -= tmp_ret;
2060
2061         return ret + tmp_ret;
2062 }
2063
2064 BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
2065            size_t, buf_len, u64, flags)
2066 {
2067         ssize_t tmp_ret = 0, ret;
2068
2069         if (!buf)
2070                 return -EINVAL;
2071
2072         if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
2073                 if (!ctx->head)
2074                         return -EINVAL;
2075                 tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
2076                 if (tmp_ret < 0)
2077                         return tmp_ret;
2078         }
2079
2080         ret = strscpy(buf, ctx->table->procname, buf_len);
2081
2082         return ret < 0 ? ret : tmp_ret + ret;
2083 }
2084
2085 static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
2086         .func           = bpf_sysctl_get_name,
2087         .gpl_only       = false,
2088         .ret_type       = RET_INTEGER,
2089         .arg1_type      = ARG_PTR_TO_CTX,
2090         .arg2_type      = ARG_PTR_TO_MEM,
2091         .arg3_type      = ARG_CONST_SIZE,
2092         .arg4_type      = ARG_ANYTHING,
2093 };
2094
2095 static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
2096                              size_t src_len)
2097 {
2098         if (!dst)
2099                 return -EINVAL;
2100
2101         if (!dst_len)
2102                 return -E2BIG;
2103
2104         if (!src || !src_len) {
2105                 memset(dst, 0, dst_len);
2106                 return -EINVAL;
2107         }
2108
2109         memcpy(dst, src, min(dst_len, src_len));
2110
2111         if (dst_len > src_len) {
2112                 memset(dst + src_len, '\0', dst_len - src_len);
2113                 return src_len;
2114         }
2115
2116         dst[dst_len - 1] = '\0';
2117
2118         return -E2BIG;
2119 }
2120
2121 BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
2122            char *, buf, size_t, buf_len)
2123 {
2124         return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
2125 }
2126
2127 static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
2128         .func           = bpf_sysctl_get_current_value,
2129         .gpl_only       = false,
2130         .ret_type       = RET_INTEGER,
2131         .arg1_type      = ARG_PTR_TO_CTX,
2132         .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
2133         .arg3_type      = ARG_CONST_SIZE,
2134 };
2135
2136 BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
2137            size_t, buf_len)
2138 {
2139         if (!ctx->write) {
2140                 if (buf && buf_len)
2141                         memset(buf, '\0', buf_len);
2142                 return -EINVAL;
2143         }
2144         return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
2145 }
2146
2147 static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
2148         .func           = bpf_sysctl_get_new_value,
2149         .gpl_only       = false,
2150         .ret_type       = RET_INTEGER,
2151         .arg1_type      = ARG_PTR_TO_CTX,
2152         .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
2153         .arg3_type      = ARG_CONST_SIZE,
2154 };
2155
2156 BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
2157            const char *, buf, size_t, buf_len)
2158 {
2159         if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
2160                 return -EINVAL;
2161
2162         if (buf_len > PAGE_SIZE - 1)
2163                 return -E2BIG;
2164
2165         memcpy(ctx->new_val, buf, buf_len);
2166         ctx->new_len = buf_len;
2167         ctx->new_updated = 1;
2168
2169         return 0;
2170 }
2171
2172 static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
2173         .func           = bpf_sysctl_set_new_value,
2174         .gpl_only       = false,
2175         .ret_type       = RET_INTEGER,
2176         .arg1_type      = ARG_PTR_TO_CTX,
2177         .arg2_type      = ARG_PTR_TO_MEM | MEM_RDONLY,
2178         .arg3_type      = ARG_CONST_SIZE,
2179 };
2180
2181 static const struct bpf_func_proto *
2182 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2183 {
2184         const struct bpf_func_proto *func_proto;
2185
2186         func_proto = cgroup_common_func_proto(func_id, prog);
2187         if (func_proto)
2188                 return func_proto;
2189
2190         func_proto = cgroup_current_func_proto(func_id, prog);
2191         if (func_proto)
2192                 return func_proto;
2193
2194         switch (func_id) {
2195         case BPF_FUNC_sysctl_get_name:
2196                 return &bpf_sysctl_get_name_proto;
2197         case BPF_FUNC_sysctl_get_current_value:
2198                 return &bpf_sysctl_get_current_value_proto;
2199         case BPF_FUNC_sysctl_get_new_value:
2200                 return &bpf_sysctl_get_new_value_proto;
2201         case BPF_FUNC_sysctl_set_new_value:
2202                 return &bpf_sysctl_set_new_value_proto;
2203         case BPF_FUNC_ktime_get_coarse_ns:
2204                 return &bpf_ktime_get_coarse_ns_proto;
2205         case BPF_FUNC_perf_event_output:
2206                 return &bpf_event_output_data_proto;
2207         default:
2208                 return bpf_base_func_proto(func_id, prog);
2209         }
2210 }
2211
2212 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
2213                                    const struct bpf_prog *prog,
2214                                    struct bpf_insn_access_aux *info)
2215 {
2216         const int size_default = sizeof(__u32);
2217
2218         if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
2219                 return false;
2220
2221         switch (off) {
2222         case bpf_ctx_range(struct bpf_sysctl, write):
2223                 if (type != BPF_READ)
2224                         return false;
2225                 bpf_ctx_record_field_size(info, size_default);
2226                 return bpf_ctx_narrow_access_ok(off, size, size_default);
2227         case bpf_ctx_range(struct bpf_sysctl, file_pos):
2228                 if (type == BPF_READ) {
2229                         bpf_ctx_record_field_size(info, size_default);
2230                         return bpf_ctx_narrow_access_ok(off, size, size_default);
2231                 } else {
2232                         return size == size_default;
2233                 }
2234         default:
2235                 return false;
2236         }
2237 }
2238
2239 static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
2240                                      const struct bpf_insn *si,
2241                                      struct bpf_insn *insn_buf,
2242                                      struct bpf_prog *prog, u32 *target_size)
2243 {
2244         struct bpf_insn *insn = insn_buf;
2245         u32 read_size;
2246
2247         switch (si->off) {
2248         case offsetof(struct bpf_sysctl, write):
2249                 *insn++ = BPF_LDX_MEM(
2250                         BPF_SIZE(si->code), si->dst_reg, si->src_reg,
2251                         bpf_target_off(struct bpf_sysctl_kern, write,
2252                                        sizeof_field(struct bpf_sysctl_kern,
2253                                                     write),
2254                                        target_size));
2255                 break;
2256         case offsetof(struct bpf_sysctl, file_pos):
2257                 /* ppos is a pointer so it should be accessed via indirect
2258                  * loads and stores. Also for stores additional temporary
2259                  * register is used since neither src_reg nor dst_reg can be
2260                  * overridden.
2261                  */
2262                 if (type == BPF_WRITE) {
2263                         int treg = BPF_REG_9;
2264
2265                         if (si->src_reg == treg || si->dst_reg == treg)
2266                                 --treg;
2267                         if (si->src_reg == treg || si->dst_reg == treg)
2268                                 --treg;
2269                         *insn++ = BPF_STX_MEM(
2270                                 BPF_DW, si->dst_reg, treg,
2271                                 offsetof(struct bpf_sysctl_kern, tmp_reg));
2272                         *insn++ = BPF_LDX_MEM(
2273                                 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2274                                 treg, si->dst_reg,
2275                                 offsetof(struct bpf_sysctl_kern, ppos));
2276                         *insn++ = BPF_RAW_INSN(
2277                                 BPF_CLASS(si->code) | BPF_MEM | BPF_SIZEOF(u32),
2278                                 treg, si->src_reg,
2279                                 bpf_ctx_narrow_access_offset(
2280                                         0, sizeof(u32), sizeof(loff_t)),
2281                                 si->imm);
2282                         *insn++ = BPF_LDX_MEM(
2283                                 BPF_DW, treg, si->dst_reg,
2284                                 offsetof(struct bpf_sysctl_kern, tmp_reg));
2285                 } else {
2286                         *insn++ = BPF_LDX_MEM(
2287                                 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
2288                                 si->dst_reg, si->src_reg,
2289                                 offsetof(struct bpf_sysctl_kern, ppos));
2290                         read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
2291                         *insn++ = BPF_LDX_MEM(
2292                                 BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
2293                                 bpf_ctx_narrow_access_offset(
2294                                         0, read_size, sizeof(loff_t)));
2295                 }
2296                 *target_size = sizeof(u32);
2297                 break;
2298         }
2299
2300         return insn - insn_buf;
2301 }
2302
2303 const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
2304         .get_func_proto         = sysctl_func_proto,
2305         .is_valid_access        = sysctl_is_valid_access,
2306         .convert_ctx_access     = sysctl_convert_ctx_access,
2307 };
2308
2309 const struct bpf_prog_ops cg_sysctl_prog_ops = {
2310 };
2311
2312 #ifdef CONFIG_NET
2313 BPF_CALL_1(bpf_get_netns_cookie_sockopt, struct bpf_sockopt_kern *, ctx)
2314 {
2315         const struct net *net = ctx ? sock_net(ctx->sk) : &init_net;
2316
2317         return net->net_cookie;
2318 }
2319
2320 static const struct bpf_func_proto bpf_get_netns_cookie_sockopt_proto = {
2321         .func           = bpf_get_netns_cookie_sockopt,
2322         .gpl_only       = false,
2323         .ret_type       = RET_INTEGER,
2324         .arg1_type      = ARG_PTR_TO_CTX_OR_NULL,
2325 };
2326 #endif
2327
2328 static const struct bpf_func_proto *
2329 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2330 {
2331         const struct bpf_func_proto *func_proto;
2332
2333         func_proto = cgroup_common_func_proto(func_id, prog);
2334         if (func_proto)
2335                 return func_proto;
2336
2337         func_proto = cgroup_current_func_proto(func_id, prog);
2338         if (func_proto)
2339                 return func_proto;
2340
2341         switch (func_id) {
2342 #ifdef CONFIG_NET
2343         case BPF_FUNC_get_netns_cookie:
2344                 return &bpf_get_netns_cookie_sockopt_proto;
2345         case BPF_FUNC_sk_storage_get:
2346                 return &bpf_sk_storage_get_proto;
2347         case BPF_FUNC_sk_storage_delete:
2348                 return &bpf_sk_storage_delete_proto;
2349         case BPF_FUNC_setsockopt:
2350                 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
2351                         return &bpf_sk_setsockopt_proto;
2352                 return NULL;
2353         case BPF_FUNC_getsockopt:
2354                 if (prog->expected_attach_type == BPF_CGROUP_SETSOCKOPT)
2355                         return &bpf_sk_getsockopt_proto;
2356                 return NULL;
2357 #endif
2358 #ifdef CONFIG_INET
2359         case BPF_FUNC_tcp_sock:
2360                 return &bpf_tcp_sock_proto;
2361 #endif
2362         case BPF_FUNC_perf_event_output:
2363                 return &bpf_event_output_data_proto;
2364         default:
2365                 return bpf_base_func_proto(func_id, prog);
2366         }
2367 }
2368
2369 static bool cg_sockopt_is_valid_access(int off, int size,
2370                                        enum bpf_access_type type,
2371                                        const struct bpf_prog *prog,
2372                                        struct bpf_insn_access_aux *info)
2373 {
2374         const int size_default = sizeof(__u32);
2375
2376         if (off < 0 || off >= sizeof(struct bpf_sockopt))
2377                 return false;
2378
2379         if (off % size != 0)
2380                 return false;
2381
2382         if (type == BPF_WRITE) {
2383                 switch (off) {
2384                 case offsetof(struct bpf_sockopt, retval):
2385                         if (size != size_default)
2386                                 return false;
2387                         return prog->expected_attach_type ==
2388                                 BPF_CGROUP_GETSOCKOPT;
2389                 case offsetof(struct bpf_sockopt, optname):
2390                         fallthrough;
2391                 case offsetof(struct bpf_sockopt, level):
2392                         if (size != size_default)
2393                                 return false;
2394                         return prog->expected_attach_type ==
2395                                 BPF_CGROUP_SETSOCKOPT;
2396                 case offsetof(struct bpf_sockopt, optlen):
2397                         return size == size_default;
2398                 default:
2399                         return false;
2400                 }
2401         }
2402
2403         switch (off) {
2404         case offsetof(struct bpf_sockopt, sk):
2405                 if (size != sizeof(__u64))
2406                         return false;
2407                 info->reg_type = PTR_TO_SOCKET;
2408                 break;
2409         case offsetof(struct bpf_sockopt, optval):
2410                 if (size != sizeof(__u64))
2411                         return false;
2412                 info->reg_type = PTR_TO_PACKET;
2413                 break;
2414         case offsetof(struct bpf_sockopt, optval_end):
2415                 if (size != sizeof(__u64))
2416                         return false;
2417                 info->reg_type = PTR_TO_PACKET_END;
2418                 break;
2419         case offsetof(struct bpf_sockopt, retval):
2420                 if (size != size_default)
2421                         return false;
2422                 return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
2423         default:
2424                 if (size != size_default)
2425                         return false;
2426                 break;
2427         }
2428         return true;
2429 }
2430
2431 #define CG_SOCKOPT_READ_FIELD(F)                                        \
2432         BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F),       \
2433                     si->dst_reg, si->src_reg,                           \
2434                     offsetof(struct bpf_sockopt_kern, F))
2435
2436 #define CG_SOCKOPT_WRITE_FIELD(F)                                       \
2437         BPF_RAW_INSN((BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F) |    \
2438                       BPF_MEM | BPF_CLASS(si->code)),                   \
2439                      si->dst_reg, si->src_reg,                          \
2440                      offsetof(struct bpf_sockopt_kern, F),              \
2441                      si->imm)
2442
2443 static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
2444                                          const struct bpf_insn *si,
2445                                          struct bpf_insn *insn_buf,
2446                                          struct bpf_prog *prog,
2447                                          u32 *target_size)
2448 {
2449         struct bpf_insn *insn = insn_buf;
2450
2451         switch (si->off) {
2452         case offsetof(struct bpf_sockopt, sk):
2453                 *insn++ = CG_SOCKOPT_READ_FIELD(sk);
2454                 break;
2455         case offsetof(struct bpf_sockopt, level):
2456                 if (type == BPF_WRITE)
2457                         *insn++ = CG_SOCKOPT_WRITE_FIELD(level);
2458                 else
2459                         *insn++ = CG_SOCKOPT_READ_FIELD(level);
2460                 break;
2461         case offsetof(struct bpf_sockopt, optname):
2462                 if (type == BPF_WRITE)
2463                         *insn++ = CG_SOCKOPT_WRITE_FIELD(optname);
2464                 else
2465                         *insn++ = CG_SOCKOPT_READ_FIELD(optname);
2466                 break;
2467         case offsetof(struct bpf_sockopt, optlen):
2468                 if (type == BPF_WRITE)
2469                         *insn++ = CG_SOCKOPT_WRITE_FIELD(optlen);
2470                 else
2471                         *insn++ = CG_SOCKOPT_READ_FIELD(optlen);
2472                 break;
2473         case offsetof(struct bpf_sockopt, retval):
2474                 BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
2475
2476                 if (type == BPF_WRITE) {
2477                         int treg = BPF_REG_9;
2478
2479                         if (si->src_reg == treg || si->dst_reg == treg)
2480                                 --treg;
2481                         if (si->src_reg == treg || si->dst_reg == treg)
2482                                 --treg;
2483                         *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
2484                                               offsetof(struct bpf_sockopt_kern, tmp_reg));
2485                         *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2486                                               treg, si->dst_reg,
2487                                               offsetof(struct bpf_sockopt_kern, current_task));
2488                         *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2489                                               treg, treg,
2490                                               offsetof(struct task_struct, bpf_ctx));
2491                         *insn++ = BPF_RAW_INSN(BPF_CLASS(si->code) | BPF_MEM |
2492                                                BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2493                                                treg, si->src_reg,
2494                                                offsetof(struct bpf_cg_run_ctx, retval),
2495                                                si->imm);
2496                         *insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
2497                                               offsetof(struct bpf_sockopt_kern, tmp_reg));
2498                 } else {
2499                         *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
2500                                               si->dst_reg, si->src_reg,
2501                                               offsetof(struct bpf_sockopt_kern, current_task));
2502                         *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
2503                                               si->dst_reg, si->dst_reg,
2504                                               offsetof(struct task_struct, bpf_ctx));
2505                         *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
2506                                               si->dst_reg, si->dst_reg,
2507                                               offsetof(struct bpf_cg_run_ctx, retval));
2508                 }
2509                 break;
2510         case offsetof(struct bpf_sockopt, optval):
2511                 *insn++ = CG_SOCKOPT_READ_FIELD(optval);
2512                 break;
2513         case offsetof(struct bpf_sockopt, optval_end):
2514                 *insn++ = CG_SOCKOPT_READ_FIELD(optval_end);
2515                 break;
2516         }
2517
2518         return insn - insn_buf;
2519 }
2520
2521 static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
2522                                    bool direct_write,
2523                                    const struct bpf_prog *prog)
2524 {
2525         /* Nothing to do for sockopt argument. The data is kzalloc'ated.
2526          */
2527         return 0;
2528 }
2529
2530 const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
2531         .get_func_proto         = cg_sockopt_func_proto,
2532         .is_valid_access        = cg_sockopt_is_valid_access,
2533         .convert_ctx_access     = cg_sockopt_convert_ctx_access,
2534         .gen_prologue           = cg_sockopt_get_prologue,
2535 };
2536
2537 const struct bpf_prog_ops cg_sockopt_prog_ops = {
2538 };
2539
2540 /* Common helpers for cgroup hooks. */
2541 const struct bpf_func_proto *
2542 cgroup_common_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2543 {
2544         switch (func_id) {
2545         case BPF_FUNC_get_local_storage:
2546                 return &bpf_get_local_storage_proto;
2547         case BPF_FUNC_get_retval:
2548                 switch (prog->expected_attach_type) {
2549                 case BPF_CGROUP_INET_INGRESS:
2550                 case BPF_CGROUP_INET_EGRESS:
2551                 case BPF_CGROUP_SOCK_OPS:
2552                 case BPF_CGROUP_UDP4_RECVMSG:
2553                 case BPF_CGROUP_UDP6_RECVMSG:
2554                 case BPF_CGROUP_UNIX_RECVMSG:
2555                 case BPF_CGROUP_INET4_GETPEERNAME:
2556                 case BPF_CGROUP_INET6_GETPEERNAME:
2557                 case BPF_CGROUP_UNIX_GETPEERNAME:
2558                 case BPF_CGROUP_INET4_GETSOCKNAME:
2559                 case BPF_CGROUP_INET6_GETSOCKNAME:
2560                 case BPF_CGROUP_UNIX_GETSOCKNAME:
2561                         return NULL;
2562                 default:
2563                         return &bpf_get_retval_proto;
2564                 }
2565         case BPF_FUNC_set_retval:
2566                 switch (prog->expected_attach_type) {
2567                 case BPF_CGROUP_INET_INGRESS:
2568                 case BPF_CGROUP_INET_EGRESS:
2569                 case BPF_CGROUP_SOCK_OPS:
2570                 case BPF_CGROUP_UDP4_RECVMSG:
2571                 case BPF_CGROUP_UDP6_RECVMSG:
2572                 case BPF_CGROUP_UNIX_RECVMSG:
2573                 case BPF_CGROUP_INET4_GETPEERNAME:
2574                 case BPF_CGROUP_INET6_GETPEERNAME:
2575                 case BPF_CGROUP_UNIX_GETPEERNAME:
2576                 case BPF_CGROUP_INET4_GETSOCKNAME:
2577                 case BPF_CGROUP_INET6_GETSOCKNAME:
2578                 case BPF_CGROUP_UNIX_GETSOCKNAME:
2579                         return NULL;
2580                 default:
2581                         return &bpf_set_retval_proto;
2582                 }
2583         default:
2584                 return NULL;
2585         }
2586 }
2587
2588 /* Common helpers for cgroup hooks with valid process context. */
2589 const struct bpf_func_proto *
2590 cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2591 {
2592         switch (func_id) {
2593         case BPF_FUNC_get_current_uid_gid:
2594                 return &bpf_get_current_uid_gid_proto;
2595         case BPF_FUNC_get_current_comm:
2596                 return &bpf_get_current_comm_proto;
2597 #ifdef CONFIG_CGROUP_NET_CLASSID
2598         case BPF_FUNC_get_cgroup_classid:
2599                 return &bpf_get_cgroup_classid_curr_proto;
2600 #endif
2601         case BPF_FUNC_current_task_under_cgroup:
2602                 return &bpf_current_task_under_cgroup_proto;
2603         default:
2604                 return NULL;
2605         }
2606 }
This page took 0.17468 seconds and 4 git commands to generate.