]> Git Repo - linux.git/blob - net/core/bpf_sk_storage.c
mm/page_alloc: free pages in a single pass during bulk free
[linux.git] / net / core / bpf_sk_storage.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2019 Facebook  */
3 #include <linux/rculist.h>
4 #include <linux/list.h>
5 #include <linux/hash.h>
6 #include <linux/types.h>
7 #include <linux/spinlock.h>
8 #include <linux/bpf.h>
9 #include <linux/btf.h>
10 #include <linux/btf_ids.h>
11 #include <linux/bpf_local_storage.h>
12 #include <net/bpf_sk_storage.h>
13 #include <net/sock.h>
14 #include <uapi/linux/sock_diag.h>
15 #include <uapi/linux/btf.h>
16 #include <linux/rcupdate_trace.h>
17
18 DEFINE_BPF_STORAGE_CACHE(sk_cache);
19
20 static struct bpf_local_storage_data *
21 bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit)
22 {
23         struct bpf_local_storage *sk_storage;
24         struct bpf_local_storage_map *smap;
25
26         sk_storage =
27                 rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held());
28         if (!sk_storage)
29                 return NULL;
30
31         smap = (struct bpf_local_storage_map *)map;
32         return bpf_local_storage_lookup(sk_storage, smap, cacheit_lockit);
33 }
34
35 static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map)
36 {
37         struct bpf_local_storage_data *sdata;
38
39         sdata = bpf_sk_storage_lookup(sk, map, false);
40         if (!sdata)
41                 return -ENOENT;
42
43         bpf_selem_unlink(SELEM(sdata));
44
45         return 0;
46 }
47
48 /* Called by __sk_destruct() & bpf_sk_storage_clone() */
49 void bpf_sk_storage_free(struct sock *sk)
50 {
51         struct bpf_local_storage_elem *selem;
52         struct bpf_local_storage *sk_storage;
53         bool free_sk_storage = false;
54         struct hlist_node *n;
55
56         rcu_read_lock();
57         sk_storage = rcu_dereference(sk->sk_bpf_storage);
58         if (!sk_storage) {
59                 rcu_read_unlock();
60                 return;
61         }
62
63         /* Netiher the bpf_prog nor the bpf-map's syscall
64          * could be modifying the sk_storage->list now.
65          * Thus, no elem can be added-to or deleted-from the
66          * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
67          *
68          * It is racing with bpf_local_storage_map_free() alone
69          * when unlinking elem from the sk_storage->list and
70          * the map's bucket->list.
71          */
72         raw_spin_lock_bh(&sk_storage->lock);
73         hlist_for_each_entry_safe(selem, n, &sk_storage->list, snode) {
74                 /* Always unlink from map before unlinking from
75                  * sk_storage.
76                  */
77                 bpf_selem_unlink_map(selem);
78                 free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
79                                                                   selem, true);
80         }
81         raw_spin_unlock_bh(&sk_storage->lock);
82         rcu_read_unlock();
83
84         if (free_sk_storage)
85                 kfree_rcu(sk_storage, rcu);
86 }
87
88 static void bpf_sk_storage_map_free(struct bpf_map *map)
89 {
90         struct bpf_local_storage_map *smap;
91
92         smap = (struct bpf_local_storage_map *)map;
93         bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
94         bpf_local_storage_map_free(smap, NULL);
95 }
96
97 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
98 {
99         struct bpf_local_storage_map *smap;
100
101         smap = bpf_local_storage_map_alloc(attr);
102         if (IS_ERR(smap))
103                 return ERR_CAST(smap);
104
105         smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
106         return &smap->map;
107 }
108
109 static int notsupp_get_next_key(struct bpf_map *map, void *key,
110                                 void *next_key)
111 {
112         return -ENOTSUPP;
113 }
114
115 static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key)
116 {
117         struct bpf_local_storage_data *sdata;
118         struct socket *sock;
119         int fd, err;
120
121         fd = *(int *)key;
122         sock = sockfd_lookup(fd, &err);
123         if (sock) {
124                 sdata = bpf_sk_storage_lookup(sock->sk, map, true);
125                 sockfd_put(sock);
126                 return sdata ? sdata->data : NULL;
127         }
128
129         return ERR_PTR(err);
130 }
131
132 static int bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key,
133                                          void *value, u64 map_flags)
134 {
135         struct bpf_local_storage_data *sdata;
136         struct socket *sock;
137         int fd, err;
138
139         fd = *(int *)key;
140         sock = sockfd_lookup(fd, &err);
141         if (sock) {
142                 sdata = bpf_local_storage_update(
143                         sock->sk, (struct bpf_local_storage_map *)map, value,
144                         map_flags);
145                 sockfd_put(sock);
146                 return PTR_ERR_OR_ZERO(sdata);
147         }
148
149         return err;
150 }
151
152 static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key)
153 {
154         struct socket *sock;
155         int fd, err;
156
157         fd = *(int *)key;
158         sock = sockfd_lookup(fd, &err);
159         if (sock) {
160                 err = bpf_sk_storage_del(sock->sk, map);
161                 sockfd_put(sock);
162                 return err;
163         }
164
165         return err;
166 }
167
168 static struct bpf_local_storage_elem *
169 bpf_sk_storage_clone_elem(struct sock *newsk,
170                           struct bpf_local_storage_map *smap,
171                           struct bpf_local_storage_elem *selem)
172 {
173         struct bpf_local_storage_elem *copy_selem;
174
175         copy_selem = bpf_selem_alloc(smap, newsk, NULL, true);
176         if (!copy_selem)
177                 return NULL;
178
179         if (map_value_has_spin_lock(&smap->map))
180                 copy_map_value_locked(&smap->map, SDATA(copy_selem)->data,
181                                       SDATA(selem)->data, true);
182         else
183                 copy_map_value(&smap->map, SDATA(copy_selem)->data,
184                                SDATA(selem)->data);
185
186         return copy_selem;
187 }
188
189 int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
190 {
191         struct bpf_local_storage *new_sk_storage = NULL;
192         struct bpf_local_storage *sk_storage;
193         struct bpf_local_storage_elem *selem;
194         int ret = 0;
195
196         RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL);
197
198         rcu_read_lock();
199         sk_storage = rcu_dereference(sk->sk_bpf_storage);
200
201         if (!sk_storage || hlist_empty(&sk_storage->list))
202                 goto out;
203
204         hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
205                 struct bpf_local_storage_elem *copy_selem;
206                 struct bpf_local_storage_map *smap;
207                 struct bpf_map *map;
208
209                 smap = rcu_dereference(SDATA(selem)->smap);
210                 if (!(smap->map.map_flags & BPF_F_CLONE))
211                         continue;
212
213                 /* Note that for lockless listeners adding new element
214                  * here can race with cleanup in bpf_local_storage_map_free.
215                  * Try to grab map refcnt to make sure that it's still
216                  * alive and prevent concurrent removal.
217                  */
218                 map = bpf_map_inc_not_zero(&smap->map);
219                 if (IS_ERR(map))
220                         continue;
221
222                 copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem);
223                 if (!copy_selem) {
224                         ret = -ENOMEM;
225                         bpf_map_put(map);
226                         goto out;
227                 }
228
229                 if (new_sk_storage) {
230                         bpf_selem_link_map(smap, copy_selem);
231                         bpf_selem_link_storage_nolock(new_sk_storage, copy_selem);
232                 } else {
233                         ret = bpf_local_storage_alloc(newsk, smap, copy_selem);
234                         if (ret) {
235                                 kfree(copy_selem);
236                                 atomic_sub(smap->elem_size,
237                                            &newsk->sk_omem_alloc);
238                                 bpf_map_put(map);
239                                 goto out;
240                         }
241
242                         new_sk_storage =
243                                 rcu_dereference(copy_selem->local_storage);
244                 }
245                 bpf_map_put(map);
246         }
247
248 out:
249         rcu_read_unlock();
250
251         /* In case of an error, don't free anything explicitly here, the
252          * caller is responsible to call bpf_sk_storage_free.
253          */
254
255         return ret;
256 }
257
258 BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk,
259            void *, value, u64, flags)
260 {
261         struct bpf_local_storage_data *sdata;
262
263         WARN_ON_ONCE(!bpf_rcu_lock_held());
264         if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE)
265                 return (unsigned long)NULL;
266
267         sdata = bpf_sk_storage_lookup(sk, map, true);
268         if (sdata)
269                 return (unsigned long)sdata->data;
270
271         if (flags == BPF_SK_STORAGE_GET_F_CREATE &&
272             /* Cannot add new elem to a going away sk.
273              * Otherwise, the new elem may become a leak
274              * (and also other memory issues during map
275              *  destruction).
276              */
277             refcount_inc_not_zero(&sk->sk_refcnt)) {
278                 sdata = bpf_local_storage_update(
279                         sk, (struct bpf_local_storage_map *)map, value,
280                         BPF_NOEXIST);
281                 /* sk must be a fullsock (guaranteed by verifier),
282                  * so sock_gen_put() is unnecessary.
283                  */
284                 sock_put(sk);
285                 return IS_ERR(sdata) ?
286                         (unsigned long)NULL : (unsigned long)sdata->data;
287         }
288
289         return (unsigned long)NULL;
290 }
291
292 BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk)
293 {
294         WARN_ON_ONCE(!bpf_rcu_lock_held());
295         if (!sk || !sk_fullsock(sk))
296                 return -EINVAL;
297
298         if (refcount_inc_not_zero(&sk->sk_refcnt)) {
299                 int err;
300
301                 err = bpf_sk_storage_del(sk, map);
302                 sock_put(sk);
303                 return err;
304         }
305
306         return -ENOENT;
307 }
308
309 static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap,
310                                  void *owner, u32 size)
311 {
312         struct sock *sk = (struct sock *)owner;
313
314         /* same check as in sock_kmalloc() */
315         if (size <= sysctl_optmem_max &&
316             atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
317                 atomic_add(size, &sk->sk_omem_alloc);
318                 return 0;
319         }
320
321         return -ENOMEM;
322 }
323
324 static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap,
325                                     void *owner, u32 size)
326 {
327         struct sock *sk = owner;
328
329         atomic_sub(size, &sk->sk_omem_alloc);
330 }
331
332 static struct bpf_local_storage __rcu **
333 bpf_sk_storage_ptr(void *owner)
334 {
335         struct sock *sk = owner;
336
337         return &sk->sk_bpf_storage;
338 }
339
340 static int sk_storage_map_btf_id;
341 const struct bpf_map_ops sk_storage_map_ops = {
342         .map_meta_equal = bpf_map_meta_equal,
343         .map_alloc_check = bpf_local_storage_map_alloc_check,
344         .map_alloc = bpf_sk_storage_map_alloc,
345         .map_free = bpf_sk_storage_map_free,
346         .map_get_next_key = notsupp_get_next_key,
347         .map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
348         .map_update_elem = bpf_fd_sk_storage_update_elem,
349         .map_delete_elem = bpf_fd_sk_storage_delete_elem,
350         .map_check_btf = bpf_local_storage_map_check_btf,
351         .map_btf_name = "bpf_local_storage_map",
352         .map_btf_id = &sk_storage_map_btf_id,
353         .map_local_storage_charge = bpf_sk_storage_charge,
354         .map_local_storage_uncharge = bpf_sk_storage_uncharge,
355         .map_owner_storage_ptr = bpf_sk_storage_ptr,
356 };
357
358 const struct bpf_func_proto bpf_sk_storage_get_proto = {
359         .func           = bpf_sk_storage_get,
360         .gpl_only       = false,
361         .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
362         .arg1_type      = ARG_CONST_MAP_PTR,
363         .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
364         .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
365         .arg4_type      = ARG_ANYTHING,
366 };
367
368 const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = {
369         .func           = bpf_sk_storage_get,
370         .gpl_only       = false,
371         .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
372         .arg1_type      = ARG_CONST_MAP_PTR,
373         .arg2_type      = ARG_PTR_TO_CTX, /* context is 'struct sock' */
374         .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
375         .arg4_type      = ARG_ANYTHING,
376 };
377
378 const struct bpf_func_proto bpf_sk_storage_delete_proto = {
379         .func           = bpf_sk_storage_delete,
380         .gpl_only       = false,
381         .ret_type       = RET_INTEGER,
382         .arg1_type      = ARG_CONST_MAP_PTR,
383         .arg2_type      = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
384 };
385
386 static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog)
387 {
388         const struct btf *btf_vmlinux;
389         const struct btf_type *t;
390         const char *tname;
391         u32 btf_id;
392
393         if (prog->aux->dst_prog)
394                 return false;
395
396         /* Ensure the tracing program is not tracing
397          * any bpf_sk_storage*() function and also
398          * use the bpf_sk_storage_(get|delete) helper.
399          */
400         switch (prog->expected_attach_type) {
401         case BPF_TRACE_ITER:
402         case BPF_TRACE_RAW_TP:
403                 /* bpf_sk_storage has no trace point */
404                 return true;
405         case BPF_TRACE_FENTRY:
406         case BPF_TRACE_FEXIT:
407                 btf_vmlinux = bpf_get_btf_vmlinux();
408                 btf_id = prog->aux->attach_btf_id;
409                 t = btf_type_by_id(btf_vmlinux, btf_id);
410                 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
411                 return !!strncmp(tname, "bpf_sk_storage",
412                                  strlen("bpf_sk_storage"));
413         default:
414                 return false;
415         }
416
417         return false;
418 }
419
420 BPF_CALL_4(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk,
421            void *, value, u64, flags)
422 {
423         WARN_ON_ONCE(!bpf_rcu_lock_held());
424         if (in_hardirq() || in_nmi())
425                 return (unsigned long)NULL;
426
427         return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags);
428 }
429
430 BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map,
431            struct sock *, sk)
432 {
433         WARN_ON_ONCE(!bpf_rcu_lock_held());
434         if (in_hardirq() || in_nmi())
435                 return -EPERM;
436
437         return ____bpf_sk_storage_delete(map, sk);
438 }
439
440 const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = {
441         .func           = bpf_sk_storage_get_tracing,
442         .gpl_only       = false,
443         .ret_type       = RET_PTR_TO_MAP_VALUE_OR_NULL,
444         .arg1_type      = ARG_CONST_MAP_PTR,
445         .arg2_type      = ARG_PTR_TO_BTF_ID,
446         .arg2_btf_id    = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
447         .arg3_type      = ARG_PTR_TO_MAP_VALUE_OR_NULL,
448         .arg4_type      = ARG_ANYTHING,
449         .allowed        = bpf_sk_storage_tracing_allowed,
450 };
451
452 const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = {
453         .func           = bpf_sk_storage_delete_tracing,
454         .gpl_only       = false,
455         .ret_type       = RET_INTEGER,
456         .arg1_type      = ARG_CONST_MAP_PTR,
457         .arg2_type      = ARG_PTR_TO_BTF_ID,
458         .arg2_btf_id    = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
459         .allowed        = bpf_sk_storage_tracing_allowed,
460 };
461
462 struct bpf_sk_storage_diag {
463         u32 nr_maps;
464         struct bpf_map *maps[];
465 };
466
467 /* The reply will be like:
468  * INET_DIAG_BPF_SK_STORAGES (nla_nest)
469  *      SK_DIAG_BPF_STORAGE (nla_nest)
470  *              SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
471  *              SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
472  *      SK_DIAG_BPF_STORAGE (nla_nest)
473  *              SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
474  *              SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
475  *      ....
476  */
477 static int nla_value_size(u32 value_size)
478 {
479         /* SK_DIAG_BPF_STORAGE (nla_nest)
480          *      SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32)
481          *      SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit)
482          */
483         return nla_total_size(0) + nla_total_size(sizeof(u32)) +
484                 nla_total_size_64bit(value_size);
485 }
486
487 void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag)
488 {
489         u32 i;
490
491         if (!diag)
492                 return;
493
494         for (i = 0; i < diag->nr_maps; i++)
495                 bpf_map_put(diag->maps[i]);
496
497         kfree(diag);
498 }
499 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free);
500
501 static bool diag_check_dup(const struct bpf_sk_storage_diag *diag,
502                            const struct bpf_map *map)
503 {
504         u32 i;
505
506         for (i = 0; i < diag->nr_maps; i++) {
507                 if (diag->maps[i] == map)
508                         return true;
509         }
510
511         return false;
512 }
513
514 struct bpf_sk_storage_diag *
515 bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
516 {
517         struct bpf_sk_storage_diag *diag;
518         struct nlattr *nla;
519         u32 nr_maps = 0;
520         int rem, err;
521
522         /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as
523          * the map_alloc_check() side also does.
524          */
525         if (!bpf_capable())
526                 return ERR_PTR(-EPERM);
527
528         nla_for_each_nested(nla, nla_stgs, rem) {
529                 if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
530                         nr_maps++;
531         }
532
533         diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
534         if (!diag)
535                 return ERR_PTR(-ENOMEM);
536
537         nla_for_each_nested(nla, nla_stgs, rem) {
538                 struct bpf_map *map;
539                 int map_fd;
540
541                 if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
542                         continue;
543
544                 map_fd = nla_get_u32(nla);
545                 map = bpf_map_get(map_fd);
546                 if (IS_ERR(map)) {
547                         err = PTR_ERR(map);
548                         goto err_free;
549                 }
550                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
551                         bpf_map_put(map);
552                         err = -EINVAL;
553                         goto err_free;
554                 }
555                 if (diag_check_dup(diag, map)) {
556                         bpf_map_put(map);
557                         err = -EEXIST;
558                         goto err_free;
559                 }
560                 diag->maps[diag->nr_maps++] = map;
561         }
562
563         return diag;
564
565 err_free:
566         bpf_sk_storage_diag_free(diag);
567         return ERR_PTR(err);
568 }
569 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc);
570
571 static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb)
572 {
573         struct nlattr *nla_stg, *nla_value;
574         struct bpf_local_storage_map *smap;
575
576         /* It cannot exceed max nlattr's payload */
577         BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE);
578
579         nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE);
580         if (!nla_stg)
581                 return -EMSGSIZE;
582
583         smap = rcu_dereference(sdata->smap);
584         if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id))
585                 goto errout;
586
587         nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE,
588                                       smap->map.value_size,
589                                       SK_DIAG_BPF_STORAGE_PAD);
590         if (!nla_value)
591                 goto errout;
592
593         if (map_value_has_spin_lock(&smap->map))
594                 copy_map_value_locked(&smap->map, nla_data(nla_value),
595                                       sdata->data, true);
596         else
597                 copy_map_value(&smap->map, nla_data(nla_value), sdata->data);
598
599         nla_nest_end(skb, nla_stg);
600         return 0;
601
602 errout:
603         nla_nest_cancel(skb, nla_stg);
604         return -EMSGSIZE;
605 }
606
607 static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb,
608                                        int stg_array_type,
609                                        unsigned int *res_diag_size)
610 {
611         /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
612         unsigned int diag_size = nla_total_size(0);
613         struct bpf_local_storage *sk_storage;
614         struct bpf_local_storage_elem *selem;
615         struct bpf_local_storage_map *smap;
616         struct nlattr *nla_stgs;
617         unsigned int saved_len;
618         int err = 0;
619
620         rcu_read_lock();
621
622         sk_storage = rcu_dereference(sk->sk_bpf_storage);
623         if (!sk_storage || hlist_empty(&sk_storage->list)) {
624                 rcu_read_unlock();
625                 return 0;
626         }
627
628         nla_stgs = nla_nest_start(skb, stg_array_type);
629         if (!nla_stgs)
630                 /* Continue to learn diag_size */
631                 err = -EMSGSIZE;
632
633         saved_len = skb->len;
634         hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) {
635                 smap = rcu_dereference(SDATA(selem)->smap);
636                 diag_size += nla_value_size(smap->map.value_size);
637
638                 if (nla_stgs && diag_get(SDATA(selem), skb))
639                         /* Continue to learn diag_size */
640                         err = -EMSGSIZE;
641         }
642
643         rcu_read_unlock();
644
645         if (nla_stgs) {
646                 if (saved_len == skb->len)
647                         nla_nest_cancel(skb, nla_stgs);
648                 else
649                         nla_nest_end(skb, nla_stgs);
650         }
651
652         if (diag_size == nla_total_size(0)) {
653                 *res_diag_size = 0;
654                 return 0;
655         }
656
657         *res_diag_size = diag_size;
658         return err;
659 }
660
661 int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag,
662                             struct sock *sk, struct sk_buff *skb,
663                             int stg_array_type,
664                             unsigned int *res_diag_size)
665 {
666         /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */
667         unsigned int diag_size = nla_total_size(0);
668         struct bpf_local_storage *sk_storage;
669         struct bpf_local_storage_data *sdata;
670         struct nlattr *nla_stgs;
671         unsigned int saved_len;
672         int err = 0;
673         u32 i;
674
675         *res_diag_size = 0;
676
677         /* No map has been specified.  Dump all. */
678         if (!diag->nr_maps)
679                 return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type,
680                                                    res_diag_size);
681
682         rcu_read_lock();
683         sk_storage = rcu_dereference(sk->sk_bpf_storage);
684         if (!sk_storage || hlist_empty(&sk_storage->list)) {
685                 rcu_read_unlock();
686                 return 0;
687         }
688
689         nla_stgs = nla_nest_start(skb, stg_array_type);
690         if (!nla_stgs)
691                 /* Continue to learn diag_size */
692                 err = -EMSGSIZE;
693
694         saved_len = skb->len;
695         for (i = 0; i < diag->nr_maps; i++) {
696                 sdata = bpf_local_storage_lookup(sk_storage,
697                                 (struct bpf_local_storage_map *)diag->maps[i],
698                                 false);
699
700                 if (!sdata)
701                         continue;
702
703                 diag_size += nla_value_size(diag->maps[i]->value_size);
704
705                 if (nla_stgs && diag_get(sdata, skb))
706                         /* Continue to learn diag_size */
707                         err = -EMSGSIZE;
708         }
709         rcu_read_unlock();
710
711         if (nla_stgs) {
712                 if (saved_len == skb->len)
713                         nla_nest_cancel(skb, nla_stgs);
714                 else
715                         nla_nest_end(skb, nla_stgs);
716         }
717
718         if (diag_size == nla_total_size(0)) {
719                 *res_diag_size = 0;
720                 return 0;
721         }
722
723         *res_diag_size = diag_size;
724         return err;
725 }
726 EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put);
727
728 struct bpf_iter_seq_sk_storage_map_info {
729         struct bpf_map *map;
730         unsigned int bucket_id;
731         unsigned skip_elems;
732 };
733
734 static struct bpf_local_storage_elem *
735 bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info,
736                                  struct bpf_local_storage_elem *prev_selem)
737         __acquires(RCU) __releases(RCU)
738 {
739         struct bpf_local_storage *sk_storage;
740         struct bpf_local_storage_elem *selem;
741         u32 skip_elems = info->skip_elems;
742         struct bpf_local_storage_map *smap;
743         u32 bucket_id = info->bucket_id;
744         u32 i, count, n_buckets;
745         struct bpf_local_storage_map_bucket *b;
746
747         smap = (struct bpf_local_storage_map *)info->map;
748         n_buckets = 1U << smap->bucket_log;
749         if (bucket_id >= n_buckets)
750                 return NULL;
751
752         /* try to find next selem in the same bucket */
753         selem = prev_selem;
754         count = 0;
755         while (selem) {
756                 selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)),
757                                          struct bpf_local_storage_elem, map_node);
758                 if (!selem) {
759                         /* not found, unlock and go to the next bucket */
760                         b = &smap->buckets[bucket_id++];
761                         rcu_read_unlock();
762                         skip_elems = 0;
763                         break;
764                 }
765                 sk_storage = rcu_dereference(selem->local_storage);
766                 if (sk_storage) {
767                         info->skip_elems = skip_elems + count;
768                         return selem;
769                 }
770                 count++;
771         }
772
773         for (i = bucket_id; i < (1U << smap->bucket_log); i++) {
774                 b = &smap->buckets[i];
775                 rcu_read_lock();
776                 count = 0;
777                 hlist_for_each_entry_rcu(selem, &b->list, map_node) {
778                         sk_storage = rcu_dereference(selem->local_storage);
779                         if (sk_storage && count >= skip_elems) {
780                                 info->bucket_id = i;
781                                 info->skip_elems = count;
782                                 return selem;
783                         }
784                         count++;
785                 }
786                 rcu_read_unlock();
787                 skip_elems = 0;
788         }
789
790         info->bucket_id = i;
791         info->skip_elems = 0;
792         return NULL;
793 }
794
795 static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos)
796 {
797         struct bpf_local_storage_elem *selem;
798
799         selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL);
800         if (!selem)
801                 return NULL;
802
803         if (*pos == 0)
804                 ++*pos;
805         return selem;
806 }
807
808 static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v,
809                                          loff_t *pos)
810 {
811         struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
812
813         ++*pos;
814         ++info->skip_elems;
815         return bpf_sk_storage_map_seq_find_next(seq->private, v);
816 }
817
818 struct bpf_iter__bpf_sk_storage_map {
819         __bpf_md_ptr(struct bpf_iter_meta *, meta);
820         __bpf_md_ptr(struct bpf_map *, map);
821         __bpf_md_ptr(struct sock *, sk);
822         __bpf_md_ptr(void *, value);
823 };
824
825 DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta,
826                      struct bpf_map *map, struct sock *sk,
827                      void *value)
828
829 static int __bpf_sk_storage_map_seq_show(struct seq_file *seq,
830                                          struct bpf_local_storage_elem *selem)
831 {
832         struct bpf_iter_seq_sk_storage_map_info *info = seq->private;
833         struct bpf_iter__bpf_sk_storage_map ctx = {};
834         struct bpf_local_storage *sk_storage;
835         struct bpf_iter_meta meta;
836         struct bpf_prog *prog;
837         int ret = 0;
838
839         meta.seq = seq;
840         prog = bpf_iter_get_info(&meta, selem == NULL);
841         if (prog) {
842                 ctx.meta = &meta;
843                 ctx.map = info->map;
844                 if (selem) {
845                         sk_storage = rcu_dereference(selem->local_storage);
846                         ctx.sk = sk_storage->owner;
847                         ctx.value = SDATA(selem)->data;
848                 }
849                 ret = bpf_iter_run_prog(prog, &ctx);
850         }
851
852         return ret;
853 }
854
855 static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v)
856 {
857         return __bpf_sk_storage_map_seq_show(seq, v);
858 }
859
860 static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v)
861         __releases(RCU)
862 {
863         if (!v)
864                 (void)__bpf_sk_storage_map_seq_show(seq, v);
865         else
866                 rcu_read_unlock();
867 }
868
869 static int bpf_iter_init_sk_storage_map(void *priv_data,
870                                         struct bpf_iter_aux_info *aux)
871 {
872         struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data;
873
874         seq_info->map = aux->map;
875         return 0;
876 }
877
878 static int bpf_iter_attach_map(struct bpf_prog *prog,
879                                union bpf_iter_link_info *linfo,
880                                struct bpf_iter_aux_info *aux)
881 {
882         struct bpf_map *map;
883         int err = -EINVAL;
884
885         if (!linfo->map.map_fd)
886                 return -EBADF;
887
888         map = bpf_map_get_with_uref(linfo->map.map_fd);
889         if (IS_ERR(map))
890                 return PTR_ERR(map);
891
892         if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
893                 goto put_map;
894
895         if (prog->aux->max_rdonly_access > map->value_size) {
896                 err = -EACCES;
897                 goto put_map;
898         }
899
900         aux->map = map;
901         return 0;
902
903 put_map:
904         bpf_map_put_with_uref(map);
905         return err;
906 }
907
908 static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux)
909 {
910         bpf_map_put_with_uref(aux->map);
911 }
912
913 static const struct seq_operations bpf_sk_storage_map_seq_ops = {
914         .start  = bpf_sk_storage_map_seq_start,
915         .next   = bpf_sk_storage_map_seq_next,
916         .stop   = bpf_sk_storage_map_seq_stop,
917         .show   = bpf_sk_storage_map_seq_show,
918 };
919
920 static const struct bpf_iter_seq_info iter_seq_info = {
921         .seq_ops                = &bpf_sk_storage_map_seq_ops,
922         .init_seq_private       = bpf_iter_init_sk_storage_map,
923         .fini_seq_private       = NULL,
924         .seq_priv_size          = sizeof(struct bpf_iter_seq_sk_storage_map_info),
925 };
926
927 static struct bpf_iter_reg bpf_sk_storage_map_reg_info = {
928         .target                 = "bpf_sk_storage_map",
929         .attach_target          = bpf_iter_attach_map,
930         .detach_target          = bpf_iter_detach_map,
931         .show_fdinfo            = bpf_iter_map_show_fdinfo,
932         .fill_link_info         = bpf_iter_map_fill_link_info,
933         .ctx_arg_info_size      = 2,
934         .ctx_arg_info           = {
935                 { offsetof(struct bpf_iter__bpf_sk_storage_map, sk),
936                   PTR_TO_BTF_ID_OR_NULL },
937                 { offsetof(struct bpf_iter__bpf_sk_storage_map, value),
938                   PTR_TO_BUF | PTR_MAYBE_NULL },
939         },
940         .seq_info               = &iter_seq_info,
941 };
942
943 static int __init bpf_sk_storage_map_iter_init(void)
944 {
945         bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id =
946                 btf_sock_ids[BTF_SOCK_TYPE_SOCK];
947         return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info);
948 }
949 late_initcall(bpf_sk_storage_map_iter_init);
This page took 0.086761 seconds and 4 git commands to generate.