]> Git Repo - linux.git/blame - kernel/bpf/arraymap.c
bpf: Add redirect_peer helper
[linux.git] / kernel / bpf / arraymap.c
CommitLineData
5b497af4 1// SPDX-License-Identifier: GPL-2.0-only
28fbcfa0 2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
81ed18ab 3 * Copyright (c) 2016,2017 Facebook
28fbcfa0
AS
4 */
5#include <linux/bpf.h>
a26ca7c9 6#include <linux/btf.h>
28fbcfa0 7#include <linux/err.h>
28fbcfa0
AS
8#include <linux/slab.h>
9#include <linux/mm.h>
04fd61ab 10#include <linux/filter.h>
0cdf5640 11#include <linux/perf_event.h>
a26ca7c9 12#include <uapi/linux/btf.h>
1e6c62a8 13#include <linux/rcupdate_trace.h>
28fbcfa0 14
56f668df
MKL
15#include "map_in_map.h"
16
6e71b04a 17#define ARRAY_CREATE_FLAG_MASK \
792caccc
SL
18 (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK | \
19 BPF_F_PRESERVE_ELEMS)
6e71b04a 20
a10423b8
AS
21static void bpf_array_free_percpu(struct bpf_array *array)
22{
23 int i;
24
32fff239 25 for (i = 0; i < array->map.max_entries; i++) {
a10423b8 26 free_percpu(array->pptrs[i]);
32fff239
ED
27 cond_resched();
28 }
a10423b8
AS
29}
30
31static int bpf_array_alloc_percpu(struct bpf_array *array)
32{
33 void __percpu *ptr;
34 int i;
35
36 for (i = 0; i < array->map.max_entries; i++) {
37 ptr = __alloc_percpu_gfp(array->elem_size, 8,
38 GFP_USER | __GFP_NOWARN);
39 if (!ptr) {
40 bpf_array_free_percpu(array);
41 return -ENOMEM;
42 }
43 array->pptrs[i] = ptr;
32fff239 44 cond_resched();
a10423b8
AS
45 }
46
47 return 0;
48}
49
28fbcfa0 50/* Called from syscall */
5dc4c4b7 51int array_map_alloc_check(union bpf_attr *attr)
28fbcfa0 52{
a10423b8 53 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
96eabe7a 54 int numa_node = bpf_map_attr_numa_node(attr);
28fbcfa0
AS
55
56 /* check sanity of attributes */
57 if (attr->max_entries == 0 || attr->key_size != 4 ||
6e71b04a
CF
58 attr->value_size == 0 ||
59 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK ||
591fe988 60 !bpf_map_flags_access_ok(attr->map_flags) ||
96eabe7a 61 (percpu && numa_node != NUMA_NO_NODE))
ad46061f 62 return -EINVAL;
28fbcfa0 63
fc970227
AN
64 if (attr->map_type != BPF_MAP_TYPE_ARRAY &&
65 attr->map_flags & BPF_F_MMAPABLE)
66 return -EINVAL;
67
792caccc
SL
68 if (attr->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
69 attr->map_flags & BPF_F_PRESERVE_ELEMS)
70 return -EINVAL;
71
7984c27c 72 if (attr->value_size > KMALLOC_MAX_SIZE)
01b3f521
AS
73 /* if value_size is bigger, the user space won't be able to
74 * access the elements.
75 */
ad46061f
JK
76 return -E2BIG;
77
78 return 0;
79}
80
81static struct bpf_map *array_map_alloc(union bpf_attr *attr)
82{
83 bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
9c2d63b8 84 int ret, numa_node = bpf_map_attr_numa_node(attr);
ad46061f 85 u32 elem_size, index_mask, max_entries;
2c78ee89 86 bool bypass_spec_v1 = bpf_bypass_spec_v1();
9c2d63b8 87 u64 cost, array_size, mask64;
b936ca64 88 struct bpf_map_memory mem;
ad46061f 89 struct bpf_array *array;
01b3f521 90
28fbcfa0
AS
91 elem_size = round_up(attr->value_size, 8);
92
b2157399 93 max_entries = attr->max_entries;
b2157399 94
bbeb6e43
DB
95 /* On 32 bit archs roundup_pow_of_two() with max_entries that has
96 * upper most bit set in u32 space is undefined behavior due to
97 * resulting 1U << 32, so do it manually here in u64 space.
98 */
99 mask64 = fls_long(max_entries - 1);
100 mask64 = 1ULL << mask64;
101 mask64 -= 1;
102
103 index_mask = mask64;
2c78ee89 104 if (!bypass_spec_v1) {
b2157399
AS
105 /* round up array size to nearest power of 2,
106 * since cpu will speculate within index_mask limits
107 */
108 max_entries = index_mask + 1;
bbeb6e43
DB
109 /* Check for overflows. */
110 if (max_entries < attr->max_entries)
111 return ERR_PTR(-E2BIG);
112 }
b2157399 113
a10423b8 114 array_size = sizeof(*array);
fc970227 115 if (percpu) {
b2157399 116 array_size += (u64) max_entries * sizeof(void *);
fc970227
AN
117 } else {
118 /* rely on vmalloc() to return page-aligned memory and
119 * ensure array->value is exactly page-aligned
120 */
121 if (attr->map_flags & BPF_F_MMAPABLE) {
122 array_size = PAGE_ALIGN(array_size);
123 array_size += PAGE_ALIGN((u64) max_entries * elem_size);
124 } else {
125 array_size += (u64) max_entries * elem_size;
126 }
127 }
a10423b8
AS
128
129 /* make sure there is no u32 overflow later in round_up() */
9c2d63b8 130 cost = array_size;
c85d6913 131 if (percpu)
9c2d63b8 132 cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
9c2d63b8 133
b936ca64 134 ret = bpf_map_charge_init(&mem, cost);
9c2d63b8
DB
135 if (ret < 0)
136 return ERR_PTR(ret);
daaf427c 137
28fbcfa0 138 /* allocate all map elements and zero-initialize them */
fc970227
AN
139 if (attr->map_flags & BPF_F_MMAPABLE) {
140 void *data;
141
142 /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */
143 data = bpf_map_area_mmapable_alloc(array_size, numa_node);
144 if (!data) {
145 bpf_map_charge_finish(&mem);
146 return ERR_PTR(-ENOMEM);
147 }
148 array = data + PAGE_ALIGN(sizeof(struct bpf_array))
149 - offsetof(struct bpf_array, value);
150 } else {
151 array = bpf_map_area_alloc(array_size, numa_node);
152 }
b936ca64
RG
153 if (!array) {
154 bpf_map_charge_finish(&mem);
d407bd25 155 return ERR_PTR(-ENOMEM);
b936ca64 156 }
b2157399 157 array->index_mask = index_mask;
2c78ee89 158 array->map.bypass_spec_v1 = bypass_spec_v1;
28fbcfa0
AS
159
160 /* copy mandatory map attributes */
32852649 161 bpf_map_init_from_attr(&array->map, attr);
b936ca64 162 bpf_map_charge_move(&array->map.memory, &mem);
28fbcfa0
AS
163 array->elem_size = elem_size;
164
9c2d63b8 165 if (percpu && bpf_array_alloc_percpu(array)) {
b936ca64 166 bpf_map_charge_finish(&array->map.memory);
d407bd25 167 bpf_map_area_free(array);
a10423b8
AS
168 return ERR_PTR(-ENOMEM);
169 }
a10423b8 170
28fbcfa0 171 return &array->map;
28fbcfa0
AS
172}
173
174/* Called from syscall or from eBPF program */
175static void *array_map_lookup_elem(struct bpf_map *map, void *key)
176{
177 struct bpf_array *array = container_of(map, struct bpf_array, map);
178 u32 index = *(u32 *)key;
179
a10423b8 180 if (unlikely(index >= array->map.max_entries))
28fbcfa0
AS
181 return NULL;
182
b2157399 183 return array->value + array->elem_size * (index & array->index_mask);
28fbcfa0
AS
184}
185
d8eca5bb
DB
186static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm,
187 u32 off)
188{
189 struct bpf_array *array = container_of(map, struct bpf_array, map);
190
191 if (map->max_entries != 1)
192 return -ENOTSUPP;
193 if (off >= map->value_size)
194 return -EINVAL;
195
196 *imm = (unsigned long)array->value;
197 return 0;
198}
199
200static int array_map_direct_value_meta(const struct bpf_map *map, u64 imm,
201 u32 *off)
202{
203 struct bpf_array *array = container_of(map, struct bpf_array, map);
204 u64 base = (unsigned long)array->value;
205 u64 range = array->elem_size;
206
207 if (map->max_entries != 1)
208 return -ENOTSUPP;
209 if (imm < base || imm >= base + range)
210 return -ENOENT;
211
212 *off = imm - base;
213 return 0;
214}
215
81ed18ab
AS
216/* emit BPF instructions equivalent to C code of array_map_lookup_elem() */
217static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
218{
b2157399 219 struct bpf_array *array = container_of(map, struct bpf_array, map);
81ed18ab 220 struct bpf_insn *insn = insn_buf;
fad73a1a 221 u32 elem_size = round_up(map->value_size, 8);
81ed18ab
AS
222 const int ret = BPF_REG_0;
223 const int map_ptr = BPF_REG_1;
224 const int index = BPF_REG_2;
225
226 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
227 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
2c78ee89 228 if (!map->bypass_spec_v1) {
b2157399
AS
229 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
230 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
231 } else {
232 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
233 }
fad73a1a
MKL
234
235 if (is_power_of_2(elem_size)) {
81ed18ab
AS
236 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
237 } else {
238 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
239 }
240 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
241 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
242 *insn++ = BPF_MOV64_IMM(ret, 0);
243 return insn - insn_buf;
244}
245
a10423b8
AS
246/* Called from eBPF program */
247static void *percpu_array_map_lookup_elem(struct bpf_map *map, void *key)
248{
249 struct bpf_array *array = container_of(map, struct bpf_array, map);
250 u32 index = *(u32 *)key;
251
252 if (unlikely(index >= array->map.max_entries))
253 return NULL;
254
b2157399 255 return this_cpu_ptr(array->pptrs[index & array->index_mask]);
a10423b8
AS
256}
257
15a07b33
AS
258int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value)
259{
260 struct bpf_array *array = container_of(map, struct bpf_array, map);
261 u32 index = *(u32 *)key;
262 void __percpu *pptr;
263 int cpu, off = 0;
264 u32 size;
265
266 if (unlikely(index >= array->map.max_entries))
267 return -ENOENT;
268
269 /* per_cpu areas are zero-filled and bpf programs can only
270 * access 'value_size' of them, so copying rounded areas
271 * will not leak any kernel data
272 */
273 size = round_up(map->value_size, 8);
274 rcu_read_lock();
b2157399 275 pptr = array->pptrs[index & array->index_mask];
15a07b33
AS
276 for_each_possible_cpu(cpu) {
277 bpf_long_memcpy(value + off, per_cpu_ptr(pptr, cpu), size);
278 off += size;
279 }
280 rcu_read_unlock();
281 return 0;
282}
283
28fbcfa0
AS
284/* Called from syscall */
285static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
286{
287 struct bpf_array *array = container_of(map, struct bpf_array, map);
8fe45924 288 u32 index = key ? *(u32 *)key : U32_MAX;
28fbcfa0
AS
289 u32 *next = (u32 *)next_key;
290
291 if (index >= array->map.max_entries) {
292 *next = 0;
293 return 0;
294 }
295
296 if (index == array->map.max_entries - 1)
297 return -ENOENT;
298
299 *next = index + 1;
300 return 0;
301}
302
303/* Called from syscall or from eBPF program */
304static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
305 u64 map_flags)
306{
307 struct bpf_array *array = container_of(map, struct bpf_array, map);
308 u32 index = *(u32 *)key;
96049f3a 309 char *val;
28fbcfa0 310
96049f3a 311 if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
28fbcfa0
AS
312 /* unknown flags */
313 return -EINVAL;
314
a10423b8 315 if (unlikely(index >= array->map.max_entries))
28fbcfa0
AS
316 /* all elements were pre-allocated, cannot insert a new one */
317 return -E2BIG;
318
96049f3a 319 if (unlikely(map_flags & BPF_NOEXIST))
daaf427c 320 /* all elements already exist */
28fbcfa0
AS
321 return -EEXIST;
322
96049f3a
AS
323 if (unlikely((map_flags & BPF_F_LOCK) &&
324 !map_value_has_spin_lock(map)))
325 return -EINVAL;
326
327 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
b2157399 328 memcpy(this_cpu_ptr(array->pptrs[index & array->index_mask]),
a10423b8 329 value, map->value_size);
96049f3a
AS
330 } else {
331 val = array->value +
332 array->elem_size * (index & array->index_mask);
333 if (map_flags & BPF_F_LOCK)
334 copy_map_value_locked(map, val, value, false);
335 else
336 copy_map_value(map, val, value);
337 }
28fbcfa0
AS
338 return 0;
339}
340
15a07b33
AS
341int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
342 u64 map_flags)
343{
344 struct bpf_array *array = container_of(map, struct bpf_array, map);
345 u32 index = *(u32 *)key;
346 void __percpu *pptr;
347 int cpu, off = 0;
348 u32 size;
349
350 if (unlikely(map_flags > BPF_EXIST))
351 /* unknown flags */
352 return -EINVAL;
353
354 if (unlikely(index >= array->map.max_entries))
355 /* all elements were pre-allocated, cannot insert a new one */
356 return -E2BIG;
357
358 if (unlikely(map_flags == BPF_NOEXIST))
359 /* all elements already exist */
360 return -EEXIST;
361
362 /* the user space will provide round_up(value_size, 8) bytes that
363 * will be copied into per-cpu area. bpf programs can only access
364 * value_size of it. During lookup the same extra bytes will be
365 * returned or zeros which were zero-filled by percpu_alloc,
366 * so no kernel data leaks possible
367 */
368 size = round_up(map->value_size, 8);
369 rcu_read_lock();
b2157399 370 pptr = array->pptrs[index & array->index_mask];
15a07b33
AS
371 for_each_possible_cpu(cpu) {
372 bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
373 off += size;
374 }
375 rcu_read_unlock();
376 return 0;
377}
378
28fbcfa0
AS
379/* Called from syscall or from eBPF program */
380static int array_map_delete_elem(struct bpf_map *map, void *key)
381{
382 return -EINVAL;
383}
384
fc970227
AN
385static void *array_map_vmalloc_addr(struct bpf_array *array)
386{
387 return (void *)round_down((unsigned long)array, PAGE_SIZE);
388}
389
28fbcfa0
AS
390/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
391static void array_map_free(struct bpf_map *map)
392{
393 struct bpf_array *array = container_of(map, struct bpf_array, map);
394
a10423b8
AS
395 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
396 bpf_array_free_percpu(array);
397
fc970227
AN
398 if (array->map.map_flags & BPF_F_MMAPABLE)
399 bpf_map_area_free(array_map_vmalloc_addr(array));
400 else
401 bpf_map_area_free(array);
28fbcfa0
AS
402}
403
a26ca7c9
MKL
404static void array_map_seq_show_elem(struct bpf_map *map, void *key,
405 struct seq_file *m)
406{
407 void *value;
408
409 rcu_read_lock();
410
411 value = array_map_lookup_elem(map, key);
412 if (!value) {
413 rcu_read_unlock();
414 return;
415 }
416
2824ecb7
DB
417 if (map->btf_key_type_id)
418 seq_printf(m, "%u: ", *(u32 *)key);
9b2cf328 419 btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
a26ca7c9
MKL
420 seq_puts(m, "\n");
421
422 rcu_read_unlock();
423}
424
c7b27c37
YS
425static void percpu_array_map_seq_show_elem(struct bpf_map *map, void *key,
426 struct seq_file *m)
427{
428 struct bpf_array *array = container_of(map, struct bpf_array, map);
429 u32 index = *(u32 *)key;
430 void __percpu *pptr;
431 int cpu;
432
433 rcu_read_lock();
434
435 seq_printf(m, "%u: {\n", *(u32 *)key);
436 pptr = array->pptrs[index & array->index_mask];
437 for_each_possible_cpu(cpu) {
438 seq_printf(m, "\tcpu%d: ", cpu);
439 btf_type_seq_show(map->btf, map->btf_value_type_id,
440 per_cpu_ptr(pptr, cpu), m);
441 seq_puts(m, "\n");
442 }
443 seq_puts(m, "}\n");
444
445 rcu_read_unlock();
446}
447
e8d2bec0 448static int array_map_check_btf(const struct bpf_map *map,
1b2b234b 449 const struct btf *btf,
e8d2bec0
DB
450 const struct btf_type *key_type,
451 const struct btf_type *value_type)
a26ca7c9 452{
a26ca7c9
MKL
453 u32 int_data;
454
2824ecb7
DB
455 /* One exception for keyless BTF: .bss/.data/.rodata map */
456 if (btf_type_is_void(key_type)) {
457 if (map->map_type != BPF_MAP_TYPE_ARRAY ||
458 map->max_entries != 1)
459 return -EINVAL;
460
461 if (BTF_INFO_KIND(value_type->info) != BTF_KIND_DATASEC)
462 return -EINVAL;
463
464 return 0;
465 }
466
e8d2bec0 467 if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
a26ca7c9
MKL
468 return -EINVAL;
469
470 int_data = *(u32 *)(key_type + 1);
e8d2bec0
DB
471 /* bpf array can only take a u32 key. This check makes sure
472 * that the btf matches the attr used during map_create.
a26ca7c9 473 */
e8d2bec0 474 if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
a26ca7c9
MKL
475 return -EINVAL;
476
477 return 0;
478}
479
b2e2f0e6 480static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
fc970227
AN
481{
482 struct bpf_array *array = container_of(map, struct bpf_array, map);
483 pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT;
484
485 if (!(map->map_flags & BPF_F_MMAPABLE))
486 return -EINVAL;
487
333291ce
AN
488 if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) >
489 PAGE_ALIGN((u64)array->map.max_entries * array->elem_size))
490 return -EINVAL;
491
492 return remap_vmalloc_range(vma, array_map_vmalloc_addr(array),
493 vma->vm_pgoff + pgoff);
fc970227
AN
494}
495
134fede4
MKL
496static bool array_map_meta_equal(const struct bpf_map *meta0,
497 const struct bpf_map *meta1)
498{
499 return meta0->max_entries == meta1->max_entries &&
500 bpf_map_meta_equal(meta0, meta1);
501}
502
d3cc2ab5
YS
503struct bpf_iter_seq_array_map_info {
504 struct bpf_map *map;
505 void *percpu_value_buf;
506 u32 index;
507};
508
509static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
510{
511 struct bpf_iter_seq_array_map_info *info = seq->private;
512 struct bpf_map *map = info->map;
513 struct bpf_array *array;
514 u32 index;
515
516 if (info->index >= map->max_entries)
517 return NULL;
518
519 if (*pos == 0)
520 ++*pos;
521 array = container_of(map, struct bpf_array, map);
522 index = info->index & array->index_mask;
523 if (info->percpu_value_buf)
524 return array->pptrs[index];
525 return array->value + array->elem_size * index;
526}
527
528static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
529{
530 struct bpf_iter_seq_array_map_info *info = seq->private;
531 struct bpf_map *map = info->map;
532 struct bpf_array *array;
533 u32 index;
534
535 ++*pos;
536 ++info->index;
537 if (info->index >= map->max_entries)
538 return NULL;
539
540 array = container_of(map, struct bpf_array, map);
541 index = info->index & array->index_mask;
542 if (info->percpu_value_buf)
543 return array->pptrs[index];
544 return array->value + array->elem_size * index;
545}
546
547static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
548{
549 struct bpf_iter_seq_array_map_info *info = seq->private;
550 struct bpf_iter__bpf_map_elem ctx = {};
551 struct bpf_map *map = info->map;
552 struct bpf_iter_meta meta;
553 struct bpf_prog *prog;
554 int off = 0, cpu = 0;
555 void __percpu **pptr;
556 u32 size;
557
558 meta.seq = seq;
559 prog = bpf_iter_get_info(&meta, v == NULL);
560 if (!prog)
561 return 0;
562
563 ctx.meta = &meta;
564 ctx.map = info->map;
565 if (v) {
566 ctx.key = &info->index;
567
568 if (!info->percpu_value_buf) {
569 ctx.value = v;
570 } else {
571 pptr = v;
572 size = round_up(map->value_size, 8);
573 for_each_possible_cpu(cpu) {
574 bpf_long_memcpy(info->percpu_value_buf + off,
575 per_cpu_ptr(pptr, cpu),
576 size);
577 off += size;
578 }
579 ctx.value = info->percpu_value_buf;
580 }
581 }
582
583 return bpf_iter_run_prog(prog, &ctx);
584}
585
586static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
587{
588 return __bpf_array_map_seq_show(seq, v);
589}
590
591static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
592{
593 if (!v)
594 (void)__bpf_array_map_seq_show(seq, NULL);
595}
596
597static int bpf_iter_init_array_map(void *priv_data,
598 struct bpf_iter_aux_info *aux)
599{
600 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
601 struct bpf_map *map = aux->map;
602 void *value_buf;
603 u32 buf_size;
604
605 if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
606 buf_size = round_up(map->value_size, 8) * num_possible_cpus();
607 value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
608 if (!value_buf)
609 return -ENOMEM;
610
611 seq_info->percpu_value_buf = value_buf;
612 }
613
614 seq_info->map = map;
615 return 0;
616}
617
618static void bpf_iter_fini_array_map(void *priv_data)
619{
620 struct bpf_iter_seq_array_map_info *seq_info = priv_data;
621
622 kfree(seq_info->percpu_value_buf);
623}
624
625static const struct seq_operations bpf_array_map_seq_ops = {
626 .start = bpf_array_map_seq_start,
627 .next = bpf_array_map_seq_next,
628 .stop = bpf_array_map_seq_stop,
629 .show = bpf_array_map_seq_show,
630};
631
632static const struct bpf_iter_seq_info iter_seq_info = {
633 .seq_ops = &bpf_array_map_seq_ops,
634 .init_seq_private = bpf_iter_init_array_map,
635 .fini_seq_private = bpf_iter_fini_array_map,
636 .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
637};
638
41c48f3a 639static int array_map_btf_id;
40077e0c 640const struct bpf_map_ops array_map_ops = {
134fede4 641 .map_meta_equal = array_map_meta_equal,
ad46061f 642 .map_alloc_check = array_map_alloc_check,
28fbcfa0
AS
643 .map_alloc = array_map_alloc,
644 .map_free = array_map_free,
645 .map_get_next_key = array_map_get_next_key,
646 .map_lookup_elem = array_map_lookup_elem,
647 .map_update_elem = array_map_update_elem,
648 .map_delete_elem = array_map_delete_elem,
81ed18ab 649 .map_gen_lookup = array_map_gen_lookup,
d8eca5bb
DB
650 .map_direct_value_addr = array_map_direct_value_addr,
651 .map_direct_value_meta = array_map_direct_value_meta,
fc970227 652 .map_mmap = array_map_mmap,
a26ca7c9
MKL
653 .map_seq_show_elem = array_map_seq_show_elem,
654 .map_check_btf = array_map_check_btf,
c60f2d28
BV
655 .map_lookup_batch = generic_map_lookup_batch,
656 .map_update_batch = generic_map_update_batch,
41c48f3a
AI
657 .map_btf_name = "bpf_array",
658 .map_btf_id = &array_map_btf_id,
d3cc2ab5 659 .iter_seq_info = &iter_seq_info,
28fbcfa0
AS
660};
661
2872e9ac 662static int percpu_array_map_btf_id;
40077e0c 663const struct bpf_map_ops percpu_array_map_ops = {
f4d05259 664 .map_meta_equal = bpf_map_meta_equal,
ad46061f 665 .map_alloc_check = array_map_alloc_check,
a10423b8
AS
666 .map_alloc = array_map_alloc,
667 .map_free = array_map_free,
668 .map_get_next_key = array_map_get_next_key,
669 .map_lookup_elem = percpu_array_map_lookup_elem,
670 .map_update_elem = array_map_update_elem,
671 .map_delete_elem = array_map_delete_elem,
c7b27c37 672 .map_seq_show_elem = percpu_array_map_seq_show_elem,
e8d2bec0 673 .map_check_btf = array_map_check_btf,
2872e9ac
AI
674 .map_btf_name = "bpf_array",
675 .map_btf_id = &percpu_array_map_btf_id,
d3cc2ab5 676 .iter_seq_info = &iter_seq_info,
a10423b8
AS
677};
678
ad46061f 679static int fd_array_map_alloc_check(union bpf_attr *attr)
04fd61ab 680{
2a36f0b9 681 /* only file descriptors can be stored in this type of map */
04fd61ab 682 if (attr->value_size != sizeof(u32))
ad46061f 683 return -EINVAL;
591fe988
DB
684 /* Program read-only/write-only not supported for special maps yet. */
685 if (attr->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG))
686 return -EINVAL;
ad46061f 687 return array_map_alloc_check(attr);
04fd61ab
AS
688}
689
2a36f0b9 690static void fd_array_map_free(struct bpf_map *map)
04fd61ab
AS
691{
692 struct bpf_array *array = container_of(map, struct bpf_array, map);
693 int i;
694
04fd61ab
AS
695 /* make sure it's empty */
696 for (i = 0; i < array->map.max_entries; i++)
2a36f0b9 697 BUG_ON(array->ptrs[i] != NULL);
d407bd25
DB
698
699 bpf_map_area_free(array);
04fd61ab
AS
700}
701
2a36f0b9 702static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
04fd61ab 703{
3b4a63f6 704 return ERR_PTR(-EOPNOTSUPP);
04fd61ab
AS
705}
706
14dc6f04
MKL
707/* only called from syscall */
708int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value)
709{
710 void **elem, *ptr;
711 int ret = 0;
712
713 if (!map->ops->map_fd_sys_lookup_elem)
714 return -ENOTSUPP;
715
716 rcu_read_lock();
717 elem = array_map_lookup_elem(map, key);
718 if (elem && (ptr = READ_ONCE(*elem)))
719 *value = map->ops->map_fd_sys_lookup_elem(ptr);
720 else
721 ret = -ENOENT;
722 rcu_read_unlock();
723
724 return ret;
725}
726
04fd61ab 727/* only called from syscall */
d056a788
DB
728int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
729 void *key, void *value, u64 map_flags)
04fd61ab
AS
730{
731 struct bpf_array *array = container_of(map, struct bpf_array, map);
2a36f0b9 732 void *new_ptr, *old_ptr;
04fd61ab
AS
733 u32 index = *(u32 *)key, ufd;
734
735 if (map_flags != BPF_ANY)
736 return -EINVAL;
737
738 if (index >= array->map.max_entries)
739 return -E2BIG;
740
741 ufd = *(u32 *)value;
d056a788 742 new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
2a36f0b9
WN
743 if (IS_ERR(new_ptr))
744 return PTR_ERR(new_ptr);
04fd61ab 745
da765a2f
DB
746 if (map->ops->map_poke_run) {
747 mutex_lock(&array->aux->poke_mutex);
748 old_ptr = xchg(array->ptrs + index, new_ptr);
749 map->ops->map_poke_run(map, index, old_ptr, new_ptr);
750 mutex_unlock(&array->aux->poke_mutex);
751 } else {
752 old_ptr = xchg(array->ptrs + index, new_ptr);
753 }
754
2a36f0b9
WN
755 if (old_ptr)
756 map->ops->map_fd_put_ptr(old_ptr);
04fd61ab
AS
757 return 0;
758}
759
2a36f0b9 760static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
04fd61ab
AS
761{
762 struct bpf_array *array = container_of(map, struct bpf_array, map);
2a36f0b9 763 void *old_ptr;
04fd61ab
AS
764 u32 index = *(u32 *)key;
765
766 if (index >= array->map.max_entries)
767 return -E2BIG;
768
da765a2f
DB
769 if (map->ops->map_poke_run) {
770 mutex_lock(&array->aux->poke_mutex);
771 old_ptr = xchg(array->ptrs + index, NULL);
772 map->ops->map_poke_run(map, index, old_ptr, NULL);
773 mutex_unlock(&array->aux->poke_mutex);
774 } else {
775 old_ptr = xchg(array->ptrs + index, NULL);
776 }
777
2a36f0b9
WN
778 if (old_ptr) {
779 map->ops->map_fd_put_ptr(old_ptr);
04fd61ab
AS
780 return 0;
781 } else {
782 return -ENOENT;
783 }
784}
785
d056a788
DB
786static void *prog_fd_array_get_ptr(struct bpf_map *map,
787 struct file *map_file, int fd)
2a36f0b9
WN
788{
789 struct bpf_array *array = container_of(map, struct bpf_array, map);
790 struct bpf_prog *prog = bpf_prog_get(fd);
d056a788 791
2a36f0b9
WN
792 if (IS_ERR(prog))
793 return prog;
794
795 if (!bpf_prog_array_compatible(array, prog)) {
796 bpf_prog_put(prog);
797 return ERR_PTR(-EINVAL);
798 }
d056a788 799
2a36f0b9
WN
800 return prog;
801}
802
803static void prog_fd_array_put_ptr(void *ptr)
804{
1aacde3d 805 bpf_prog_put(ptr);
2a36f0b9
WN
806}
807
14dc6f04
MKL
808static u32 prog_fd_array_sys_lookup_elem(void *ptr)
809{
810 return ((struct bpf_prog *)ptr)->aux->id;
811}
812
04fd61ab 813/* decrement refcnt of all bpf_progs that are stored in this map */
ba6b8de4 814static void bpf_fd_array_map_clear(struct bpf_map *map)
04fd61ab
AS
815{
816 struct bpf_array *array = container_of(map, struct bpf_array, map);
817 int i;
818
819 for (i = 0; i < array->map.max_entries; i++)
2a36f0b9 820 fd_array_map_delete_elem(map, &i);
04fd61ab
AS
821}
822
a7c19db3
YS
823static void prog_array_map_seq_show_elem(struct bpf_map *map, void *key,
824 struct seq_file *m)
825{
826 void **elem, *ptr;
827 u32 prog_id;
828
829 rcu_read_lock();
830
831 elem = array_map_lookup_elem(map, key);
832 if (elem) {
833 ptr = READ_ONCE(*elem);
834 if (ptr) {
835 seq_printf(m, "%u: ", *(u32 *)key);
836 prog_id = prog_fd_array_sys_lookup_elem(ptr);
837 btf_type_seq_show(map->btf, map->btf_value_type_id,
838 &prog_id, m);
839 seq_puts(m, "\n");
840 }
841 }
842
843 rcu_read_unlock();
844}
845
da765a2f
DB
846struct prog_poke_elem {
847 struct list_head list;
848 struct bpf_prog_aux *aux;
849};
850
851static int prog_array_map_poke_track(struct bpf_map *map,
852 struct bpf_prog_aux *prog_aux)
853{
854 struct prog_poke_elem *elem;
855 struct bpf_array_aux *aux;
856 int ret = 0;
857
858 aux = container_of(map, struct bpf_array, map)->aux;
859 mutex_lock(&aux->poke_mutex);
860 list_for_each_entry(elem, &aux->poke_progs, list) {
861 if (elem->aux == prog_aux)
862 goto out;
863 }
864
865 elem = kmalloc(sizeof(*elem), GFP_KERNEL);
866 if (!elem) {
867 ret = -ENOMEM;
868 goto out;
869 }
870
871 INIT_LIST_HEAD(&elem->list);
872 /* We must track the program's aux info at this point in time
873 * since the program pointer itself may not be stable yet, see
874 * also comment in prog_array_map_poke_run().
875 */
876 elem->aux = prog_aux;
877
878 list_add_tail(&elem->list, &aux->poke_progs);
879out:
880 mutex_unlock(&aux->poke_mutex);
881 return ret;
882}
883
884static void prog_array_map_poke_untrack(struct bpf_map *map,
885 struct bpf_prog_aux *prog_aux)
886{
887 struct prog_poke_elem *elem, *tmp;
888 struct bpf_array_aux *aux;
889
890 aux = container_of(map, struct bpf_array, map)->aux;
891 mutex_lock(&aux->poke_mutex);
892 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
893 if (elem->aux == prog_aux) {
894 list_del_init(&elem->list);
895 kfree(elem);
896 break;
897 }
898 }
899 mutex_unlock(&aux->poke_mutex);
900}
901
902static void prog_array_map_poke_run(struct bpf_map *map, u32 key,
903 struct bpf_prog *old,
904 struct bpf_prog *new)
905{
ebf7d1f5 906 u8 *old_addr, *new_addr, *old_bypass_addr;
da765a2f
DB
907 struct prog_poke_elem *elem;
908 struct bpf_array_aux *aux;
909
da765a2f
DB
910 aux = container_of(map, struct bpf_array, map)->aux;
911 WARN_ON_ONCE(!mutex_is_locked(&aux->poke_mutex));
912
913 list_for_each_entry(elem, &aux->poke_progs, list) {
914 struct bpf_jit_poke_descriptor *poke;
915 int i, ret;
916
917 for (i = 0; i < elem->aux->size_poke_tab; i++) {
918 poke = &elem->aux->poke_tab[i];
919
920 /* Few things to be aware of:
921 *
922 * 1) We can only ever access aux in this context, but
923 * not aux->prog since it might not be stable yet and
924 * there could be danger of use after free otherwise.
925 * 2) Initially when we start tracking aux, the program
926 * is not JITed yet and also does not have a kallsyms
cf71b174
MF
927 * entry. We skip these as poke->tailcall_target_stable
928 * is not active yet. The JIT will do the final fixup
929 * before setting it stable. The various
930 * poke->tailcall_target_stable are successively
931 * activated, so tail call updates can arrive from here
932 * while JIT is still finishing its final fixup for
933 * non-activated poke entries.
da765a2f
DB
934 * 3) On program teardown, the program's kallsym entry gets
935 * removed out of RCU callback, but we can only untrack
936 * from sleepable context, therefore bpf_arch_text_poke()
937 * might not see that this is in BPF text section and
938 * bails out with -EINVAL. As these are unreachable since
939 * RCU grace period already passed, we simply skip them.
940 * 4) Also programs reaching refcount of zero while patching
941 * is in progress is okay since we're protected under
942 * poke_mutex and untrack the programs before the JIT
943 * buffer is freed. When we're still in the middle of
944 * patching and suddenly kallsyms entry of the program
945 * gets evicted, we just skip the rest which is fine due
946 * to point 3).
947 * 5) Any other error happening below from bpf_arch_text_poke()
948 * is a unexpected bug.
949 */
cf71b174 950 if (!READ_ONCE(poke->tailcall_target_stable))
da765a2f
DB
951 continue;
952 if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
953 continue;
954 if (poke->tail_call.map != map ||
955 poke->tail_call.key != key)
956 continue;
957
ebf7d1f5
MF
958 old_bypass_addr = old ? NULL : poke->bypass_addr;
959 old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL;
960 new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL;
961
962 if (new) {
963 ret = bpf_arch_text_poke(poke->tailcall_target,
964 BPF_MOD_JUMP,
965 old_addr, new_addr);
966 BUG_ON(ret < 0 && ret != -EINVAL);
967 if (!old) {
968 ret = bpf_arch_text_poke(poke->tailcall_bypass,
969 BPF_MOD_JUMP,
970 poke->bypass_addr,
971 NULL);
972 BUG_ON(ret < 0 && ret != -EINVAL);
973 }
974 } else {
975 ret = bpf_arch_text_poke(poke->tailcall_bypass,
976 BPF_MOD_JUMP,
977 old_bypass_addr,
978 poke->bypass_addr);
979 BUG_ON(ret < 0 && ret != -EINVAL);
980 /* let other CPUs finish the execution of program
981 * so that it will not possible to expose them
982 * to invalid nop, stack unwind, nop state
983 */
984 if (!ret)
985 synchronize_rcu();
986 ret = bpf_arch_text_poke(poke->tailcall_target,
987 BPF_MOD_JUMP,
988 old_addr, NULL);
989 BUG_ON(ret < 0 && ret != -EINVAL);
990 }
da765a2f
DB
991 }
992 }
993}
994
995static void prog_array_map_clear_deferred(struct work_struct *work)
996{
997 struct bpf_map *map = container_of(work, struct bpf_array_aux,
998 work)->map;
999 bpf_fd_array_map_clear(map);
1000 bpf_map_put(map);
1001}
1002
1003static void prog_array_map_clear(struct bpf_map *map)
1004{
1005 struct bpf_array_aux *aux = container_of(map, struct bpf_array,
1006 map)->aux;
1007 bpf_map_inc(map);
1008 schedule_work(&aux->work);
1009}
1010
2beee5f5
DB
1011static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
1012{
1013 struct bpf_array_aux *aux;
1014 struct bpf_map *map;
1015
1016 aux = kzalloc(sizeof(*aux), GFP_KERNEL);
1017 if (!aux)
1018 return ERR_PTR(-ENOMEM);
1019
da765a2f
DB
1020 INIT_WORK(&aux->work, prog_array_map_clear_deferred);
1021 INIT_LIST_HEAD(&aux->poke_progs);
1022 mutex_init(&aux->poke_mutex);
1023
2beee5f5
DB
1024 map = array_map_alloc(attr);
1025 if (IS_ERR(map)) {
1026 kfree(aux);
1027 return map;
1028 }
1029
1030 container_of(map, struct bpf_array, map)->aux = aux;
da765a2f
DB
1031 aux->map = map;
1032
2beee5f5
DB
1033 return map;
1034}
1035
1036static void prog_array_map_free(struct bpf_map *map)
1037{
da765a2f 1038 struct prog_poke_elem *elem, *tmp;
2beee5f5
DB
1039 struct bpf_array_aux *aux;
1040
1041 aux = container_of(map, struct bpf_array, map)->aux;
da765a2f
DB
1042 list_for_each_entry_safe(elem, tmp, &aux->poke_progs, list) {
1043 list_del_init(&elem->list);
1044 kfree(elem);
1045 }
2beee5f5
DB
1046 kfree(aux);
1047 fd_array_map_free(map);
1048}
1049
f4d05259
MKL
1050/* prog_array->aux->{type,jited} is a runtime binding.
1051 * Doing static check alone in the verifier is not enough.
1052 * Thus, prog_array_map cannot be used as an inner_map
1053 * and map_meta_equal is not implemented.
1054 */
2872e9ac 1055static int prog_array_map_btf_id;
40077e0c 1056const struct bpf_map_ops prog_array_map_ops = {
ad46061f 1057 .map_alloc_check = fd_array_map_alloc_check,
2beee5f5
DB
1058 .map_alloc = prog_array_map_alloc,
1059 .map_free = prog_array_map_free,
da765a2f
DB
1060 .map_poke_track = prog_array_map_poke_track,
1061 .map_poke_untrack = prog_array_map_poke_untrack,
1062 .map_poke_run = prog_array_map_poke_run,
04fd61ab 1063 .map_get_next_key = array_map_get_next_key,
2a36f0b9 1064 .map_lookup_elem = fd_array_map_lookup_elem,
2a36f0b9
WN
1065 .map_delete_elem = fd_array_map_delete_elem,
1066 .map_fd_get_ptr = prog_fd_array_get_ptr,
1067 .map_fd_put_ptr = prog_fd_array_put_ptr,
14dc6f04 1068 .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem,
da765a2f 1069 .map_release_uref = prog_array_map_clear,
a7c19db3 1070 .map_seq_show_elem = prog_array_map_seq_show_elem,
2872e9ac
AI
1071 .map_btf_name = "bpf_array",
1072 .map_btf_id = &prog_array_map_btf_id,
04fd61ab
AS
1073};
1074
3b1efb19
DB
1075static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
1076 struct file *map_file)
ea317b26 1077{
3b1efb19
DB
1078 struct bpf_event_entry *ee;
1079
858d68f1 1080 ee = kzalloc(sizeof(*ee), GFP_ATOMIC);
3b1efb19
DB
1081 if (ee) {
1082 ee->event = perf_file->private_data;
1083 ee->perf_file = perf_file;
1084 ee->map_file = map_file;
1085 }
1086
1087 return ee;
1088}
1089
1090static void __bpf_event_entry_free(struct rcu_head *rcu)
1091{
1092 struct bpf_event_entry *ee;
1093
1094 ee = container_of(rcu, struct bpf_event_entry, rcu);
1095 fput(ee->perf_file);
1096 kfree(ee);
1097}
1098
1099static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
1100{
1101 call_rcu(&ee->rcu, __bpf_event_entry_free);
ea317b26
KX
1102}
1103
d056a788
DB
1104static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
1105 struct file *map_file, int fd)
ea317b26 1106{
3b1efb19
DB
1107 struct bpf_event_entry *ee;
1108 struct perf_event *event;
1109 struct file *perf_file;
f91840a3 1110 u64 value;
ea317b26 1111
3b1efb19
DB
1112 perf_file = perf_event_get(fd);
1113 if (IS_ERR(perf_file))
1114 return perf_file;
e03e7ee3 1115
f91840a3 1116 ee = ERR_PTR(-EOPNOTSUPP);
3b1efb19 1117 event = perf_file->private_data;
97562633 1118 if (perf_event_read_local(event, &value, NULL, NULL) == -EOPNOTSUPP)
3b1efb19
DB
1119 goto err_out;
1120
f91840a3
AS
1121 ee = bpf_event_entry_gen(perf_file, map_file);
1122 if (ee)
1123 return ee;
1124 ee = ERR_PTR(-ENOMEM);
3b1efb19
DB
1125err_out:
1126 fput(perf_file);
1127 return ee;
ea317b26
KX
1128}
1129
1130static void perf_event_fd_array_put_ptr(void *ptr)
1131{
3b1efb19
DB
1132 bpf_event_entry_free_rcu(ptr);
1133}
1134
1135static void perf_event_fd_array_release(struct bpf_map *map,
1136 struct file *map_file)
1137{
1138 struct bpf_array *array = container_of(map, struct bpf_array, map);
1139 struct bpf_event_entry *ee;
1140 int i;
1141
792caccc
SL
1142 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1143 return;
1144
3b1efb19
DB
1145 rcu_read_lock();
1146 for (i = 0; i < array->map.max_entries; i++) {
1147 ee = READ_ONCE(array->ptrs[i]);
1148 if (ee && ee->map_file == map_file)
1149 fd_array_map_delete_elem(map, &i);
1150 }
1151 rcu_read_unlock();
ea317b26
KX
1152}
1153
792caccc
SL
1154static void perf_event_fd_array_map_free(struct bpf_map *map)
1155{
1156 if (map->map_flags & BPF_F_PRESERVE_ELEMS)
1157 bpf_fd_array_map_clear(map);
1158 fd_array_map_free(map);
1159}
1160
2872e9ac 1161static int perf_event_array_map_btf_id;
40077e0c 1162const struct bpf_map_ops perf_event_array_map_ops = {
f4d05259 1163 .map_meta_equal = bpf_map_meta_equal,
ad46061f
JK
1164 .map_alloc_check = fd_array_map_alloc_check,
1165 .map_alloc = array_map_alloc,
792caccc 1166 .map_free = perf_event_fd_array_map_free,
ea317b26
KX
1167 .map_get_next_key = array_map_get_next_key,
1168 .map_lookup_elem = fd_array_map_lookup_elem,
ea317b26
KX
1169 .map_delete_elem = fd_array_map_delete_elem,
1170 .map_fd_get_ptr = perf_event_fd_array_get_ptr,
1171 .map_fd_put_ptr = perf_event_fd_array_put_ptr,
3b1efb19 1172 .map_release = perf_event_fd_array_release,
e8d2bec0 1173 .map_check_btf = map_check_no_btf,
2872e9ac
AI
1174 .map_btf_name = "bpf_array",
1175 .map_btf_id = &perf_event_array_map_btf_id,
ea317b26
KX
1176};
1177
60d20f91 1178#ifdef CONFIG_CGROUPS
4ed8ec52
MKL
1179static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
1180 struct file *map_file /* not used */,
1181 int fd)
1182{
1183 return cgroup_get_from_fd(fd);
1184}
1185
1186static void cgroup_fd_array_put_ptr(void *ptr)
1187{
1188 /* cgroup_put free cgrp after a rcu grace period */
1189 cgroup_put(ptr);
1190}
1191
1192static void cgroup_fd_array_free(struct bpf_map *map)
1193{
1194 bpf_fd_array_map_clear(map);
1195 fd_array_map_free(map);
1196}
1197
2872e9ac 1198static int cgroup_array_map_btf_id;
40077e0c 1199const struct bpf_map_ops cgroup_array_map_ops = {
f4d05259 1200 .map_meta_equal = bpf_map_meta_equal,
ad46061f
JK
1201 .map_alloc_check = fd_array_map_alloc_check,
1202 .map_alloc = array_map_alloc,
4ed8ec52
MKL
1203 .map_free = cgroup_fd_array_free,
1204 .map_get_next_key = array_map_get_next_key,
1205 .map_lookup_elem = fd_array_map_lookup_elem,
1206 .map_delete_elem = fd_array_map_delete_elem,
1207 .map_fd_get_ptr = cgroup_fd_array_get_ptr,
1208 .map_fd_put_ptr = cgroup_fd_array_put_ptr,
e8d2bec0 1209 .map_check_btf = map_check_no_btf,
2872e9ac
AI
1210 .map_btf_name = "bpf_array",
1211 .map_btf_id = &cgroup_array_map_btf_id,
4ed8ec52 1212};
4ed8ec52 1213#endif
56f668df
MKL
1214
1215static struct bpf_map *array_of_map_alloc(union bpf_attr *attr)
1216{
1217 struct bpf_map *map, *inner_map_meta;
1218
1219 inner_map_meta = bpf_map_meta_alloc(attr->inner_map_fd);
1220 if (IS_ERR(inner_map_meta))
1221 return inner_map_meta;
1222
ad46061f 1223 map = array_map_alloc(attr);
56f668df
MKL
1224 if (IS_ERR(map)) {
1225 bpf_map_meta_free(inner_map_meta);
1226 return map;
1227 }
1228
1229 map->inner_map_meta = inner_map_meta;
1230
1231 return map;
1232}
1233
1234static void array_of_map_free(struct bpf_map *map)
1235{
1236 /* map->inner_map_meta is only accessed by syscall which
1237 * is protected by fdget/fdput.
1238 */
1239 bpf_map_meta_free(map->inner_map_meta);
1240 bpf_fd_array_map_clear(map);
1241 fd_array_map_free(map);
1242}
1243
1244static void *array_of_map_lookup_elem(struct bpf_map *map, void *key)
1245{
1246 struct bpf_map **inner_map = array_map_lookup_elem(map, key);
1247
1248 if (!inner_map)
1249 return NULL;
1250
1251 return READ_ONCE(*inner_map);
1252}
1253
7b0c2a05
DB
1254static u32 array_of_map_gen_lookup(struct bpf_map *map,
1255 struct bpf_insn *insn_buf)
1256{
b2157399 1257 struct bpf_array *array = container_of(map, struct bpf_array, map);
7b0c2a05
DB
1258 u32 elem_size = round_up(map->value_size, 8);
1259 struct bpf_insn *insn = insn_buf;
1260 const int ret = BPF_REG_0;
1261 const int map_ptr = BPF_REG_1;
1262 const int index = BPF_REG_2;
1263
1264 *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
1265 *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
2c78ee89 1266 if (!map->bypass_spec_v1) {
b2157399
AS
1267 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
1268 *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
1269 } else {
1270 *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
1271 }
7b0c2a05
DB
1272 if (is_power_of_2(elem_size))
1273 *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size));
1274 else
1275 *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size);
1276 *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr);
1277 *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0);
1278 *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
1279 *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
1280 *insn++ = BPF_MOV64_IMM(ret, 0);
1281
1282 return insn - insn_buf;
1283}
1284
2872e9ac 1285static int array_of_maps_map_btf_id;
40077e0c 1286const struct bpf_map_ops array_of_maps_map_ops = {
ad46061f 1287 .map_alloc_check = fd_array_map_alloc_check,
56f668df
MKL
1288 .map_alloc = array_of_map_alloc,
1289 .map_free = array_of_map_free,
1290 .map_get_next_key = array_map_get_next_key,
1291 .map_lookup_elem = array_of_map_lookup_elem,
1292 .map_delete_elem = fd_array_map_delete_elem,
1293 .map_fd_get_ptr = bpf_map_fd_get_ptr,
1294 .map_fd_put_ptr = bpf_map_fd_put_ptr,
14dc6f04 1295 .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem,
7b0c2a05 1296 .map_gen_lookup = array_of_map_gen_lookup,
e8d2bec0 1297 .map_check_btf = map_check_no_btf,
2872e9ac
AI
1298 .map_btf_name = "bpf_array",
1299 .map_btf_id = &array_of_maps_map_btf_id,
56f668df 1300};
This page took 0.815021 seconds and 4 git commands to generate.