]>
Commit | Line | Data |
---|---|---|
27ae7997 MKL |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2019 Facebook */ | |
3 | ||
4 | #include <linux/bpf.h> | |
5 | #include <linux/bpf_verifier.h> | |
6 | #include <linux/btf.h> | |
7 | #include <linux/filter.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/numa.h> | |
10 | #include <linux/seq_file.h> | |
11 | #include <linux/refcount.h> | |
85d33df3 | 12 | #include <linux/mutex.h> |
27ae7997 | 13 | |
85d33df3 MKL |
14 | enum bpf_struct_ops_state { |
15 | BPF_STRUCT_OPS_STATE_INIT, | |
16 | BPF_STRUCT_OPS_STATE_INUSE, | |
17 | BPF_STRUCT_OPS_STATE_TOBEFREE, | |
18 | }; | |
19 | ||
20 | #define BPF_STRUCT_OPS_COMMON_VALUE \ | |
21 | refcount_t refcnt; \ | |
22 | enum bpf_struct_ops_state state | |
23 | ||
24 | struct bpf_struct_ops_value { | |
25 | BPF_STRUCT_OPS_COMMON_VALUE; | |
d7f10df8 | 26 | char data[] ____cacheline_aligned_in_smp; |
85d33df3 MKL |
27 | }; |
28 | ||
29 | struct bpf_struct_ops_map { | |
30 | struct bpf_map map; | |
eb18b49e | 31 | struct rcu_head rcu; |
85d33df3 MKL |
32 | const struct bpf_struct_ops *st_ops; |
33 | /* protect map_update */ | |
34 | struct mutex lock; | |
35 | /* progs has all the bpf_prog that is populated | |
36 | * to the func ptr of the kernel's struct | |
37 | * (in kvalue.data). | |
38 | */ | |
39 | struct bpf_prog **progs; | |
40 | /* image is a page that has all the trampolines | |
41 | * that stores the func args before calling the bpf_prog. | |
42 | * A PAGE_SIZE "image" is enough to store all trampoline for | |
43 | * "progs[]". | |
44 | */ | |
45 | void *image; | |
46 | /* uvalue->data stores the kernel struct | |
47 | * (e.g. tcp_congestion_ops) that is more useful | |
48 | * to userspace than the kvalue. For example, | |
49 | * the bpf_prog's id is stored instead of the kernel | |
50 | * address of a func ptr. | |
51 | */ | |
52 | struct bpf_struct_ops_value *uvalue; | |
53 | /* kvalue.data stores the actual kernel's struct | |
54 | * (e.g. tcp_congestion_ops) that will be | |
55 | * registered to the kernel subsystem. | |
56 | */ | |
57 | struct bpf_struct_ops_value kvalue; | |
58 | }; | |
59 | ||
60 | #define VALUE_PREFIX "bpf_struct_ops_" | |
61 | #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1) | |
62 | ||
63 | /* bpf_struct_ops_##_name (e.g. bpf_struct_ops_tcp_congestion_ops) is | |
64 | * the map's value exposed to the userspace and its btf-type-id is | |
65 | * stored at the map->btf_vmlinux_value_type_id. | |
66 | * | |
67 | */ | |
27ae7997 | 68 | #define BPF_STRUCT_OPS_TYPE(_name) \ |
85d33df3 MKL |
69 | extern struct bpf_struct_ops bpf_##_name; \ |
70 | \ | |
71 | struct bpf_struct_ops_##_name { \ | |
72 | BPF_STRUCT_OPS_COMMON_VALUE; \ | |
73 | struct _name data ____cacheline_aligned_in_smp; \ | |
74 | }; | |
27ae7997 MKL |
75 | #include "bpf_struct_ops_types.h" |
76 | #undef BPF_STRUCT_OPS_TYPE | |
77 | ||
78 | enum { | |
79 | #define BPF_STRUCT_OPS_TYPE(_name) BPF_STRUCT_OPS_TYPE_##_name, | |
80 | #include "bpf_struct_ops_types.h" | |
81 | #undef BPF_STRUCT_OPS_TYPE | |
82 | __NR_BPF_STRUCT_OPS_TYPE, | |
83 | }; | |
84 | ||
85 | static struct bpf_struct_ops * const bpf_struct_ops[] = { | |
86 | #define BPF_STRUCT_OPS_TYPE(_name) \ | |
87 | [BPF_STRUCT_OPS_TYPE_##_name] = &bpf_##_name, | |
88 | #include "bpf_struct_ops_types.h" | |
89 | #undef BPF_STRUCT_OPS_TYPE | |
90 | }; | |
91 | ||
92 | const struct bpf_verifier_ops bpf_struct_ops_verifier_ops = { | |
93 | }; | |
94 | ||
95 | const struct bpf_prog_ops bpf_struct_ops_prog_ops = { | |
c196906d HT |
96 | #ifdef CONFIG_NET |
97 | .test_run = bpf_struct_ops_test_run, | |
98 | #endif | |
27ae7997 MKL |
99 | }; |
100 | ||
85d33df3 MKL |
101 | static const struct btf_type *module_type; |
102 | ||
d3e42bb0 | 103 | void bpf_struct_ops_init(struct btf *btf, struct bpf_verifier_log *log) |
27ae7997 | 104 | { |
85d33df3 | 105 | s32 type_id, value_id, module_id; |
27ae7997 MKL |
106 | const struct btf_member *member; |
107 | struct bpf_struct_ops *st_ops; | |
27ae7997 | 108 | const struct btf_type *t; |
85d33df3 | 109 | char value_name[128]; |
27ae7997 | 110 | const char *mname; |
27ae7997 MKL |
111 | u32 i, j; |
112 | ||
85d33df3 MKL |
113 | /* Ensure BTF type is emitted for "struct bpf_struct_ops_##_name" */ |
114 | #define BPF_STRUCT_OPS_TYPE(_name) BTF_TYPE_EMIT(struct bpf_struct_ops_##_name); | |
115 | #include "bpf_struct_ops_types.h" | |
116 | #undef BPF_STRUCT_OPS_TYPE | |
117 | ||
118 | module_id = btf_find_by_name_kind(btf, "module", BTF_KIND_STRUCT); | |
119 | if (module_id < 0) { | |
120 | pr_warn("Cannot find struct module in btf_vmlinux\n"); | |
121 | return; | |
122 | } | |
123 | module_type = btf_type_by_id(btf, module_id); | |
124 | ||
27ae7997 MKL |
125 | for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { |
126 | st_ops = bpf_struct_ops[i]; | |
127 | ||
85d33df3 MKL |
128 | if (strlen(st_ops->name) + VALUE_PREFIX_LEN >= |
129 | sizeof(value_name)) { | |
130 | pr_warn("struct_ops name %s is too long\n", | |
131 | st_ops->name); | |
132 | continue; | |
133 | } | |
134 | sprintf(value_name, "%s%s", VALUE_PREFIX, st_ops->name); | |
135 | ||
136 | value_id = btf_find_by_name_kind(btf, value_name, | |
137 | BTF_KIND_STRUCT); | |
138 | if (value_id < 0) { | |
139 | pr_warn("Cannot find struct %s in btf_vmlinux\n", | |
140 | value_name); | |
141 | continue; | |
142 | } | |
143 | ||
27ae7997 MKL |
144 | type_id = btf_find_by_name_kind(btf, st_ops->name, |
145 | BTF_KIND_STRUCT); | |
146 | if (type_id < 0) { | |
147 | pr_warn("Cannot find struct %s in btf_vmlinux\n", | |
148 | st_ops->name); | |
149 | continue; | |
150 | } | |
151 | t = btf_type_by_id(btf, type_id); | |
152 | if (btf_type_vlen(t) > BPF_STRUCT_OPS_MAX_NR_MEMBERS) { | |
153 | pr_warn("Cannot support #%u members in struct %s\n", | |
154 | btf_type_vlen(t), st_ops->name); | |
155 | continue; | |
156 | } | |
157 | ||
158 | for_each_member(j, t, member) { | |
159 | const struct btf_type *func_proto; | |
160 | ||
161 | mname = btf_name_by_offset(btf, member->name_off); | |
162 | if (!*mname) { | |
163 | pr_warn("anon member in struct %s is not supported\n", | |
164 | st_ops->name); | |
165 | break; | |
166 | } | |
167 | ||
8293eb99 | 168 | if (__btf_member_bitfield_size(t, member)) { |
27ae7997 MKL |
169 | pr_warn("bit field member %s in struct %s is not supported\n", |
170 | mname, st_ops->name); | |
171 | break; | |
172 | } | |
173 | ||
174 | func_proto = btf_type_resolve_func_ptr(btf, | |
175 | member->type, | |
176 | NULL); | |
177 | if (func_proto && | |
d3e42bb0 | 178 | btf_distill_func_proto(log, btf, |
27ae7997 MKL |
179 | func_proto, mname, |
180 | &st_ops->func_models[j])) { | |
181 | pr_warn("Error in parsing func ptr %s in struct %s\n", | |
182 | mname, st_ops->name); | |
183 | break; | |
184 | } | |
185 | } | |
186 | ||
187 | if (j == btf_type_vlen(t)) { | |
188 | if (st_ops->init(btf)) { | |
189 | pr_warn("Error in init bpf_struct_ops %s\n", | |
190 | st_ops->name); | |
191 | } else { | |
192 | st_ops->type_id = type_id; | |
193 | st_ops->type = t; | |
85d33df3 MKL |
194 | st_ops->value_id = value_id; |
195 | st_ops->value_type = btf_type_by_id(btf, | |
196 | value_id); | |
27ae7997 MKL |
197 | } |
198 | } | |
199 | } | |
200 | } | |
201 | ||
202 | extern struct btf *btf_vmlinux; | |
203 | ||
85d33df3 MKL |
204 | static const struct bpf_struct_ops * |
205 | bpf_struct_ops_find_value(u32 value_id) | |
206 | { | |
207 | unsigned int i; | |
208 | ||
209 | if (!value_id || !btf_vmlinux) | |
210 | return NULL; | |
211 | ||
212 | for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { | |
213 | if (bpf_struct_ops[i]->value_id == value_id) | |
214 | return bpf_struct_ops[i]; | |
215 | } | |
216 | ||
217 | return NULL; | |
218 | } | |
219 | ||
27ae7997 MKL |
220 | const struct bpf_struct_ops *bpf_struct_ops_find(u32 type_id) |
221 | { | |
222 | unsigned int i; | |
223 | ||
224 | if (!type_id || !btf_vmlinux) | |
225 | return NULL; | |
226 | ||
227 | for (i = 0; i < ARRAY_SIZE(bpf_struct_ops); i++) { | |
228 | if (bpf_struct_ops[i]->type_id == type_id) | |
229 | return bpf_struct_ops[i]; | |
230 | } | |
231 | ||
232 | return NULL; | |
233 | } | |
85d33df3 MKL |
234 | |
235 | static int bpf_struct_ops_map_get_next_key(struct bpf_map *map, void *key, | |
236 | void *next_key) | |
237 | { | |
238 | if (key && *(u32 *)key == 0) | |
239 | return -ENOENT; | |
240 | ||
241 | *(u32 *)next_key = 0; | |
242 | return 0; | |
243 | } | |
244 | ||
245 | int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key, | |
246 | void *value) | |
247 | { | |
248 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
249 | struct bpf_struct_ops_value *uvalue, *kvalue; | |
250 | enum bpf_struct_ops_state state; | |
251 | ||
252 | if (unlikely(*(u32 *)key != 0)) | |
253 | return -ENOENT; | |
254 | ||
255 | kvalue = &st_map->kvalue; | |
256 | /* Pair with smp_store_release() during map_update */ | |
257 | state = smp_load_acquire(&kvalue->state); | |
258 | if (state == BPF_STRUCT_OPS_STATE_INIT) { | |
259 | memset(value, 0, map->value_size); | |
260 | return 0; | |
261 | } | |
262 | ||
263 | /* No lock is needed. state and refcnt do not need | |
264 | * to be updated together under atomic context. | |
265 | */ | |
266 | uvalue = (struct bpf_struct_ops_value *)value; | |
267 | memcpy(uvalue, st_map->uvalue, map->value_size); | |
268 | uvalue->state = state; | |
269 | refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt)); | |
270 | ||
271 | return 0; | |
272 | } | |
273 | ||
274 | static void *bpf_struct_ops_map_lookup_elem(struct bpf_map *map, void *key) | |
275 | { | |
276 | return ERR_PTR(-EINVAL); | |
277 | } | |
278 | ||
279 | static void bpf_struct_ops_map_put_progs(struct bpf_struct_ops_map *st_map) | |
280 | { | |
281 | const struct btf_type *t = st_map->st_ops->type; | |
282 | u32 i; | |
283 | ||
284 | for (i = 0; i < btf_type_vlen(t); i++) { | |
285 | if (st_map->progs[i]) { | |
286 | bpf_prog_put(st_map->progs[i]); | |
287 | st_map->progs[i] = NULL; | |
288 | } | |
289 | } | |
290 | } | |
291 | ||
292 | static int check_zero_holes(const struct btf_type *t, void *data) | |
293 | { | |
294 | const struct btf_member *member; | |
295 | u32 i, moff, msize, prev_mend = 0; | |
296 | const struct btf_type *mtype; | |
297 | ||
298 | for_each_member(i, t, member) { | |
8293eb99 | 299 | moff = __btf_member_bit_offset(t, member) / 8; |
85d33df3 MKL |
300 | if (moff > prev_mend && |
301 | memchr_inv(data + prev_mend, 0, moff - prev_mend)) | |
302 | return -EINVAL; | |
303 | ||
304 | mtype = btf_type_by_id(btf_vmlinux, member->type); | |
6298399b | 305 | mtype = btf_resolve_size(btf_vmlinux, mtype, &msize); |
85d33df3 MKL |
306 | if (IS_ERR(mtype)) |
307 | return PTR_ERR(mtype); | |
308 | prev_mend = moff + msize; | |
309 | } | |
310 | ||
311 | if (t->size > prev_mend && | |
312 | memchr_inv(data + prev_mend, 0, t->size - prev_mend)) | |
313 | return -EINVAL; | |
314 | ||
315 | return 0; | |
316 | } | |
317 | ||
31a645ae HT |
318 | int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_progs *tprogs, |
319 | struct bpf_prog *prog, | |
320 | const struct btf_func_model *model, | |
321 | void *image, void *image_end) | |
322 | { | |
323 | u32 flags; | |
324 | ||
325 | tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; | |
326 | tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; | |
327 | flags = model->ret_size > 0 ? BPF_TRAMP_F_RET_FENTRY_RET : 0; | |
328 | return arch_prepare_bpf_trampoline(NULL, image, image_end, | |
329 | model, flags, tprogs, NULL); | |
330 | } | |
331 | ||
85d33df3 MKL |
332 | static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, |
333 | void *value, u64 flags) | |
334 | { | |
335 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
336 | const struct bpf_struct_ops *st_ops = st_map->st_ops; | |
337 | struct bpf_struct_ops_value *uvalue, *kvalue; | |
338 | const struct btf_member *member; | |
339 | const struct btf_type *t = st_ops->type; | |
88fd9e53 | 340 | struct bpf_tramp_progs *tprogs = NULL; |
85d33df3 MKL |
341 | void *udata, *kdata; |
342 | int prog_fd, err = 0; | |
31a645ae | 343 | void *image, *image_end; |
85d33df3 MKL |
344 | u32 i; |
345 | ||
346 | if (flags) | |
347 | return -EINVAL; | |
348 | ||
349 | if (*(u32 *)key != 0) | |
350 | return -E2BIG; | |
351 | ||
352 | err = check_zero_holes(st_ops->value_type, value); | |
353 | if (err) | |
354 | return err; | |
355 | ||
356 | uvalue = (struct bpf_struct_ops_value *)value; | |
357 | err = check_zero_holes(t, uvalue->data); | |
358 | if (err) | |
359 | return err; | |
360 | ||
361 | if (uvalue->state || refcount_read(&uvalue->refcnt)) | |
362 | return -EINVAL; | |
363 | ||
88fd9e53 KS |
364 | tprogs = kcalloc(BPF_TRAMP_MAX, sizeof(*tprogs), GFP_KERNEL); |
365 | if (!tprogs) | |
366 | return -ENOMEM; | |
367 | ||
85d33df3 MKL |
368 | uvalue = (struct bpf_struct_ops_value *)st_map->uvalue; |
369 | kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue; | |
370 | ||
371 | mutex_lock(&st_map->lock); | |
372 | ||
373 | if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) { | |
374 | err = -EBUSY; | |
375 | goto unlock; | |
376 | } | |
377 | ||
378 | memcpy(uvalue, value, map->value_size); | |
379 | ||
380 | udata = &uvalue->data; | |
381 | kdata = &kvalue->data; | |
382 | image = st_map->image; | |
31a645ae | 383 | image_end = st_map->image + PAGE_SIZE; |
85d33df3 MKL |
384 | |
385 | for_each_member(i, t, member) { | |
386 | const struct btf_type *mtype, *ptype; | |
387 | struct bpf_prog *prog; | |
388 | u32 moff; | |
389 | ||
8293eb99 | 390 | moff = __btf_member_bit_offset(t, member) / 8; |
85d33df3 MKL |
391 | ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL); |
392 | if (ptype == module_type) { | |
393 | if (*(void **)(udata + moff)) | |
394 | goto reset_unlock; | |
395 | *(void **)(kdata + moff) = BPF_MODULE_OWNER; | |
396 | continue; | |
397 | } | |
398 | ||
399 | err = st_ops->init_member(t, member, kdata, udata); | |
400 | if (err < 0) | |
401 | goto reset_unlock; | |
402 | ||
403 | /* The ->init_member() has handled this member */ | |
404 | if (err > 0) | |
405 | continue; | |
406 | ||
407 | /* If st_ops->init_member does not handle it, | |
408 | * we will only handle func ptrs and zero-ed members | |
409 | * here. Reject everything else. | |
410 | */ | |
411 | ||
412 | /* All non func ptr member must be 0 */ | |
413 | if (!ptype || !btf_type_is_func_proto(ptype)) { | |
414 | u32 msize; | |
415 | ||
416 | mtype = btf_type_by_id(btf_vmlinux, member->type); | |
6298399b | 417 | mtype = btf_resolve_size(btf_vmlinux, mtype, &msize); |
85d33df3 MKL |
418 | if (IS_ERR(mtype)) { |
419 | err = PTR_ERR(mtype); | |
420 | goto reset_unlock; | |
421 | } | |
422 | ||
423 | if (memchr_inv(udata + moff, 0, msize)) { | |
424 | err = -EINVAL; | |
425 | goto reset_unlock; | |
426 | } | |
427 | ||
428 | continue; | |
429 | } | |
430 | ||
431 | prog_fd = (int)(*(unsigned long *)(udata + moff)); | |
432 | /* Similar check as the attr->attach_prog_fd */ | |
433 | if (!prog_fd) | |
434 | continue; | |
435 | ||
436 | prog = bpf_prog_get(prog_fd); | |
437 | if (IS_ERR(prog)) { | |
438 | err = PTR_ERR(prog); | |
439 | goto reset_unlock; | |
440 | } | |
441 | st_map->progs[i] = prog; | |
442 | ||
443 | if (prog->type != BPF_PROG_TYPE_STRUCT_OPS || | |
444 | prog->aux->attach_btf_id != st_ops->type_id || | |
445 | prog->expected_attach_type != i) { | |
446 | err = -EINVAL; | |
447 | goto reset_unlock; | |
448 | } | |
449 | ||
31a645ae HT |
450 | err = bpf_struct_ops_prepare_trampoline(tprogs, prog, |
451 | &st_ops->func_models[i], | |
452 | image, image_end); | |
85d33df3 MKL |
453 | if (err < 0) |
454 | goto reset_unlock; | |
455 | ||
456 | *(void **)(kdata + moff) = image; | |
457 | image += err; | |
458 | ||
459 | /* put prog_id to udata */ | |
460 | *(unsigned long *)(udata + moff) = prog->aux->id; | |
461 | } | |
462 | ||
463 | refcount_set(&kvalue->refcnt, 1); | |
464 | bpf_map_inc(map); | |
465 | ||
466 | set_memory_ro((long)st_map->image, 1); | |
467 | set_memory_x((long)st_map->image, 1); | |
468 | err = st_ops->reg(kdata); | |
469 | if (likely(!err)) { | |
470 | /* Pair with smp_load_acquire() during lookup_elem(). | |
471 | * It ensures the above udata updates (e.g. prog->aux->id) | |
472 | * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set. | |
473 | */ | |
474 | smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE); | |
475 | goto unlock; | |
476 | } | |
477 | ||
478 | /* Error during st_ops->reg(). It is very unlikely since | |
479 | * the above init_member() should have caught it earlier | |
480 | * before reg(). The only possibility is if there was a race | |
481 | * in registering the struct_ops (under the same name) to | |
482 | * a sub-system through different struct_ops's maps. | |
483 | */ | |
484 | set_memory_nx((long)st_map->image, 1); | |
485 | set_memory_rw((long)st_map->image, 1); | |
486 | bpf_map_put(map); | |
487 | ||
488 | reset_unlock: | |
489 | bpf_struct_ops_map_put_progs(st_map); | |
490 | memset(uvalue, 0, map->value_size); | |
491 | memset(kvalue, 0, map->value_size); | |
492 | unlock: | |
88fd9e53 | 493 | kfree(tprogs); |
85d33df3 MKL |
494 | mutex_unlock(&st_map->lock); |
495 | return err; | |
496 | } | |
497 | ||
498 | static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key) | |
499 | { | |
500 | enum bpf_struct_ops_state prev_state; | |
501 | struct bpf_struct_ops_map *st_map; | |
502 | ||
503 | st_map = (struct bpf_struct_ops_map *)map; | |
504 | prev_state = cmpxchg(&st_map->kvalue.state, | |
505 | BPF_STRUCT_OPS_STATE_INUSE, | |
506 | BPF_STRUCT_OPS_STATE_TOBEFREE); | |
8e5290e7 MKL |
507 | switch (prev_state) { |
508 | case BPF_STRUCT_OPS_STATE_INUSE: | |
85d33df3 MKL |
509 | st_map->st_ops->unreg(&st_map->kvalue.data); |
510 | if (refcount_dec_and_test(&st_map->kvalue.refcnt)) | |
511 | bpf_map_put(map); | |
8e5290e7 MKL |
512 | return 0; |
513 | case BPF_STRUCT_OPS_STATE_TOBEFREE: | |
514 | return -EINPROGRESS; | |
515 | case BPF_STRUCT_OPS_STATE_INIT: | |
516 | return -ENOENT; | |
517 | default: | |
518 | WARN_ON_ONCE(1); | |
519 | /* Should never happen. Treat it as not found. */ | |
520 | return -ENOENT; | |
85d33df3 | 521 | } |
85d33df3 MKL |
522 | } |
523 | ||
524 | static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key, | |
525 | struct seq_file *m) | |
526 | { | |
527 | void *value; | |
3b413041 | 528 | int err; |
85d33df3 | 529 | |
3b413041 | 530 | value = kmalloc(map->value_size, GFP_USER | __GFP_NOWARN); |
85d33df3 MKL |
531 | if (!value) |
532 | return; | |
533 | ||
3b413041 MKL |
534 | err = bpf_struct_ops_map_sys_lookup_elem(map, key, value); |
535 | if (!err) { | |
536 | btf_type_seq_show(btf_vmlinux, map->btf_vmlinux_value_type_id, | |
537 | value, m); | |
538 | seq_puts(m, "\n"); | |
539 | } | |
540 | ||
541 | kfree(value); | |
85d33df3 MKL |
542 | } |
543 | ||
544 | static void bpf_struct_ops_map_free(struct bpf_map *map) | |
545 | { | |
546 | struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map; | |
547 | ||
548 | if (st_map->progs) | |
549 | bpf_struct_ops_map_put_progs(st_map); | |
550 | bpf_map_area_free(st_map->progs); | |
551 | bpf_jit_free_exec(st_map->image); | |
552 | bpf_map_area_free(st_map->uvalue); | |
553 | bpf_map_area_free(st_map); | |
554 | } | |
555 | ||
556 | static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr) | |
557 | { | |
558 | if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 || | |
559 | attr->map_flags || !attr->btf_vmlinux_value_type_id) | |
560 | return -EINVAL; | |
561 | return 0; | |
562 | } | |
563 | ||
564 | static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr) | |
565 | { | |
566 | const struct bpf_struct_ops *st_ops; | |
f043733f | 567 | size_t st_map_size; |
85d33df3 MKL |
568 | struct bpf_struct_ops_map *st_map; |
569 | const struct btf_type *t, *vt; | |
85d33df3 | 570 | struct bpf_map *map; |
85d33df3 | 571 | |
2c78ee89 | 572 | if (!bpf_capable()) |
85d33df3 MKL |
573 | return ERR_PTR(-EPERM); |
574 | ||
575 | st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id); | |
576 | if (!st_ops) | |
577 | return ERR_PTR(-ENOTSUPP); | |
578 | ||
579 | vt = st_ops->value_type; | |
580 | if (attr->value_size != vt->size) | |
581 | return ERR_PTR(-EINVAL); | |
582 | ||
583 | t = st_ops->type; | |
584 | ||
585 | st_map_size = sizeof(*st_map) + | |
586 | /* kvalue stores the | |
587 | * struct bpf_struct_ops_tcp_congestions_ops | |
588 | */ | |
589 | (vt->size - sizeof(struct bpf_struct_ops_value)); | |
85d33df3 MKL |
590 | |
591 | st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE); | |
f043733f | 592 | if (!st_map) |
85d33df3 | 593 | return ERR_PTR(-ENOMEM); |
f043733f | 594 | |
85d33df3 MKL |
595 | st_map->st_ops = st_ops; |
596 | map = &st_map->map; | |
597 | ||
598 | st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE); | |
599 | st_map->progs = | |
600 | bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_prog *), | |
601 | NUMA_NO_NODE); | |
602 | st_map->image = bpf_jit_alloc_exec(PAGE_SIZE); | |
603 | if (!st_map->uvalue || !st_map->progs || !st_map->image) { | |
604 | bpf_struct_ops_map_free(map); | |
85d33df3 MKL |
605 | return ERR_PTR(-ENOMEM); |
606 | } | |
607 | ||
608 | mutex_init(&st_map->lock); | |
609 | set_vm_flush_reset_perms(st_map->image); | |
610 | bpf_map_init_from_attr(map, attr); | |
85d33df3 MKL |
611 | |
612 | return map; | |
613 | } | |
614 | ||
2872e9ac | 615 | static int bpf_struct_ops_map_btf_id; |
85d33df3 MKL |
616 | const struct bpf_map_ops bpf_struct_ops_map_ops = { |
617 | .map_alloc_check = bpf_struct_ops_map_alloc_check, | |
618 | .map_alloc = bpf_struct_ops_map_alloc, | |
619 | .map_free = bpf_struct_ops_map_free, | |
620 | .map_get_next_key = bpf_struct_ops_map_get_next_key, | |
621 | .map_lookup_elem = bpf_struct_ops_map_lookup_elem, | |
622 | .map_delete_elem = bpf_struct_ops_map_delete_elem, | |
623 | .map_update_elem = bpf_struct_ops_map_update_elem, | |
624 | .map_seq_show_elem = bpf_struct_ops_map_seq_show_elem, | |
2872e9ac AI |
625 | .map_btf_name = "bpf_struct_ops_map", |
626 | .map_btf_id = &bpf_struct_ops_map_btf_id, | |
85d33df3 MKL |
627 | }; |
628 | ||
629 | /* "const void *" because some subsystem is | |
630 | * passing a const (e.g. const struct tcp_congestion_ops *) | |
631 | */ | |
632 | bool bpf_struct_ops_get(const void *kdata) | |
633 | { | |
634 | struct bpf_struct_ops_value *kvalue; | |
635 | ||
636 | kvalue = container_of(kdata, struct bpf_struct_ops_value, data); | |
637 | ||
638 | return refcount_inc_not_zero(&kvalue->refcnt); | |
639 | } | |
640 | ||
eb18b49e MKL |
641 | static void bpf_struct_ops_put_rcu(struct rcu_head *head) |
642 | { | |
643 | struct bpf_struct_ops_map *st_map; | |
644 | ||
645 | st_map = container_of(head, struct bpf_struct_ops_map, rcu); | |
646 | bpf_map_put(&st_map->map); | |
647 | } | |
648 | ||
85d33df3 MKL |
649 | void bpf_struct_ops_put(const void *kdata) |
650 | { | |
651 | struct bpf_struct_ops_value *kvalue; | |
652 | ||
653 | kvalue = container_of(kdata, struct bpf_struct_ops_value, data); | |
654 | if (refcount_dec_and_test(&kvalue->refcnt)) { | |
655 | struct bpf_struct_ops_map *st_map; | |
656 | ||
657 | st_map = container_of(kvalue, struct bpf_struct_ops_map, | |
658 | kvalue); | |
eb18b49e MKL |
659 | /* The struct_ops's function may switch to another struct_ops. |
660 | * | |
661 | * For example, bpf_tcp_cc_x->init() may switch to | |
662 | * another tcp_cc_y by calling | |
663 | * setsockopt(TCP_CONGESTION, "tcp_cc_y"). | |
664 | * During the switch, bpf_struct_ops_put(tcp_cc_x) is called | |
665 | * and its map->refcnt may reach 0 which then free its | |
666 | * trampoline image while tcp_cc_x is still running. | |
667 | * | |
668 | * Thus, a rcu grace period is needed here. | |
669 | */ | |
670 | call_rcu(&st_map->rcu, bpf_struct_ops_put_rcu); | |
85d33df3 MKL |
671 | } |
672 | } |