]>
Commit | Line | Data |
---|---|---|
f1a2e44a MV |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * queue_stack_maps.c: BPF queue and stack maps | |
4 | * | |
5 | * Copyright (c) 2018 Politecnico di Torino | |
6 | */ | |
7 | #include <linux/bpf.h> | |
8 | #include <linux/list.h> | |
9 | #include <linux/slab.h> | |
813961de | 10 | #include <linux/capability.h> |
f1a2e44a MV |
11 | #include "percpu_freelist.h" |
12 | ||
13 | #define QUEUE_STACK_CREATE_FLAG_MASK \ | |
591fe988 | 14 | (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) |
f1a2e44a MV |
15 | |
16 | struct bpf_queue_stack { | |
17 | struct bpf_map map; | |
18 | raw_spinlock_t lock; | |
19 | u32 head, tail; | |
20 | u32 size; /* max_entries + 1 */ | |
21 | ||
22 | char elements[0] __aligned(8); | |
23 | }; | |
24 | ||
25 | static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) | |
26 | { | |
27 | return container_of(map, struct bpf_queue_stack, map); | |
28 | } | |
29 | ||
30 | static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) | |
31 | { | |
32 | return qs->head == qs->tail; | |
33 | } | |
34 | ||
35 | static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) | |
36 | { | |
37 | u32 head = qs->head + 1; | |
38 | ||
39 | if (unlikely(head >= qs->size)) | |
40 | head = 0; | |
41 | ||
42 | return head == qs->tail; | |
43 | } | |
44 | ||
45 | /* Called from syscall */ | |
46 | static int queue_stack_map_alloc_check(union bpf_attr *attr) | |
47 | { | |
813961de AS |
48 | if (!capable(CAP_SYS_ADMIN)) |
49 | return -EPERM; | |
50 | ||
f1a2e44a MV |
51 | /* check sanity of attributes */ |
52 | if (attr->max_entries == 0 || attr->key_size != 0 || | |
813961de | 53 | attr->value_size == 0 || |
591fe988 DB |
54 | attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK || |
55 | !bpf_map_flags_access_ok(attr->map_flags)) | |
f1a2e44a MV |
56 | return -EINVAL; |
57 | ||
58 | if (attr->value_size > KMALLOC_MAX_SIZE) | |
59 | /* if value_size is bigger, the user space won't be able to | |
60 | * access the elements. | |
61 | */ | |
62 | return -E2BIG; | |
63 | ||
64 | return 0; | |
65 | } | |
66 | ||
67 | static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) | |
68 | { | |
69 | int ret, numa_node = bpf_map_attr_numa_node(attr); | |
b936ca64 | 70 | struct bpf_map_memory mem = {0}; |
f1a2e44a | 71 | struct bpf_queue_stack *qs; |
813961de | 72 | u64 size, queue_size, cost; |
f1a2e44a | 73 | |
813961de AS |
74 | size = (u64) attr->max_entries + 1; |
75 | cost = queue_size = sizeof(*qs) + size * attr->value_size; | |
f1a2e44a | 76 | |
b936ca64 | 77 | ret = bpf_map_charge_init(&mem, cost); |
f1a2e44a MV |
78 | if (ret < 0) |
79 | return ERR_PTR(ret); | |
80 | ||
81 | qs = bpf_map_area_alloc(queue_size, numa_node); | |
b936ca64 RG |
82 | if (!qs) { |
83 | bpf_map_charge_finish(&mem); | |
f1a2e44a | 84 | return ERR_PTR(-ENOMEM); |
b936ca64 | 85 | } |
f1a2e44a MV |
86 | |
87 | memset(qs, 0, sizeof(*qs)); | |
88 | ||
89 | bpf_map_init_from_attr(&qs->map, attr); | |
90 | ||
b936ca64 | 91 | bpf_map_charge_move(&qs->map.memory, &mem); |
f1a2e44a MV |
92 | qs->size = size; |
93 | ||
94 | raw_spin_lock_init(&qs->lock); | |
95 | ||
96 | return &qs->map; | |
97 | } | |
98 | ||
99 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | |
100 | static void queue_stack_map_free(struct bpf_map *map) | |
101 | { | |
102 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
103 | ||
104 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | |
105 | * so the programs (can be more than one that used this map) were | |
106 | * disconnected from events. Wait for outstanding critical sections in | |
107 | * these programs to complete | |
108 | */ | |
109 | synchronize_rcu(); | |
110 | ||
111 | bpf_map_area_free(qs); | |
112 | } | |
113 | ||
114 | static int __queue_map_get(struct bpf_map *map, void *value, bool delete) | |
115 | { | |
116 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
117 | unsigned long flags; | |
118 | int err = 0; | |
119 | void *ptr; | |
120 | ||
121 | raw_spin_lock_irqsave(&qs->lock, flags); | |
122 | ||
123 | if (queue_stack_map_is_empty(qs)) { | |
d3f66e41 | 124 | memset(value, 0, qs->map.value_size); |
f1a2e44a MV |
125 | err = -ENOENT; |
126 | goto out; | |
127 | } | |
128 | ||
129 | ptr = &qs->elements[qs->tail * qs->map.value_size]; | |
130 | memcpy(value, ptr, qs->map.value_size); | |
131 | ||
132 | if (delete) { | |
133 | if (unlikely(++qs->tail >= qs->size)) | |
134 | qs->tail = 0; | |
135 | } | |
136 | ||
137 | out: | |
138 | raw_spin_unlock_irqrestore(&qs->lock, flags); | |
139 | return err; | |
140 | } | |
141 | ||
142 | ||
143 | static int __stack_map_get(struct bpf_map *map, void *value, bool delete) | |
144 | { | |
145 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
146 | unsigned long flags; | |
147 | int err = 0; | |
148 | void *ptr; | |
149 | u32 index; | |
150 | ||
151 | raw_spin_lock_irqsave(&qs->lock, flags); | |
152 | ||
153 | if (queue_stack_map_is_empty(qs)) { | |
d3f66e41 | 154 | memset(value, 0, qs->map.value_size); |
f1a2e44a MV |
155 | err = -ENOENT; |
156 | goto out; | |
157 | } | |
158 | ||
159 | index = qs->head - 1; | |
160 | if (unlikely(index >= qs->size)) | |
161 | index = qs->size - 1; | |
162 | ||
163 | ptr = &qs->elements[index * qs->map.value_size]; | |
164 | memcpy(value, ptr, qs->map.value_size); | |
165 | ||
166 | if (delete) | |
167 | qs->head = index; | |
168 | ||
169 | out: | |
170 | raw_spin_unlock_irqrestore(&qs->lock, flags); | |
171 | return err; | |
172 | } | |
173 | ||
174 | /* Called from syscall or from eBPF program */ | |
175 | static int queue_map_peek_elem(struct bpf_map *map, void *value) | |
176 | { | |
177 | return __queue_map_get(map, value, false); | |
178 | } | |
179 | ||
180 | /* Called from syscall or from eBPF program */ | |
181 | static int stack_map_peek_elem(struct bpf_map *map, void *value) | |
182 | { | |
183 | return __stack_map_get(map, value, false); | |
184 | } | |
185 | ||
186 | /* Called from syscall or from eBPF program */ | |
187 | static int queue_map_pop_elem(struct bpf_map *map, void *value) | |
188 | { | |
189 | return __queue_map_get(map, value, true); | |
190 | } | |
191 | ||
192 | /* Called from syscall or from eBPF program */ | |
193 | static int stack_map_pop_elem(struct bpf_map *map, void *value) | |
194 | { | |
195 | return __stack_map_get(map, value, true); | |
196 | } | |
197 | ||
198 | /* Called from syscall or from eBPF program */ | |
199 | static int queue_stack_map_push_elem(struct bpf_map *map, void *value, | |
200 | u64 flags) | |
201 | { | |
202 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
203 | unsigned long irq_flags; | |
204 | int err = 0; | |
205 | void *dst; | |
206 | ||
207 | /* BPF_EXIST is used to force making room for a new element in case the | |
208 | * map is full | |
209 | */ | |
210 | bool replace = (flags & BPF_EXIST); | |
211 | ||
212 | /* Check supported flags for queue and stack maps */ | |
213 | if (flags & BPF_NOEXIST || flags > BPF_EXIST) | |
214 | return -EINVAL; | |
215 | ||
216 | raw_spin_lock_irqsave(&qs->lock, irq_flags); | |
217 | ||
218 | if (queue_stack_map_is_full(qs)) { | |
219 | if (!replace) { | |
220 | err = -E2BIG; | |
221 | goto out; | |
222 | } | |
223 | /* advance tail pointer to overwrite oldest element */ | |
224 | if (unlikely(++qs->tail >= qs->size)) | |
225 | qs->tail = 0; | |
226 | } | |
227 | ||
228 | dst = &qs->elements[qs->head * qs->map.value_size]; | |
229 | memcpy(dst, value, qs->map.value_size); | |
230 | ||
231 | if (unlikely(++qs->head >= qs->size)) | |
232 | qs->head = 0; | |
233 | ||
234 | out: | |
235 | raw_spin_unlock_irqrestore(&qs->lock, irq_flags); | |
236 | return err; | |
237 | } | |
238 | ||
239 | /* Called from syscall or from eBPF program */ | |
240 | static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) | |
241 | { | |
242 | return NULL; | |
243 | } | |
244 | ||
245 | /* Called from syscall or from eBPF program */ | |
246 | static int queue_stack_map_update_elem(struct bpf_map *map, void *key, | |
247 | void *value, u64 flags) | |
248 | { | |
249 | return -EINVAL; | |
250 | } | |
251 | ||
252 | /* Called from syscall or from eBPF program */ | |
253 | static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) | |
254 | { | |
255 | return -EINVAL; | |
256 | } | |
257 | ||
258 | /* Called from syscall */ | |
259 | static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, | |
260 | void *next_key) | |
261 | { | |
262 | return -EINVAL; | |
263 | } | |
264 | ||
265 | const struct bpf_map_ops queue_map_ops = { | |
266 | .map_alloc_check = queue_stack_map_alloc_check, | |
267 | .map_alloc = queue_stack_map_alloc, | |
268 | .map_free = queue_stack_map_free, | |
269 | .map_lookup_elem = queue_stack_map_lookup_elem, | |
270 | .map_update_elem = queue_stack_map_update_elem, | |
271 | .map_delete_elem = queue_stack_map_delete_elem, | |
272 | .map_push_elem = queue_stack_map_push_elem, | |
273 | .map_pop_elem = queue_map_pop_elem, | |
274 | .map_peek_elem = queue_map_peek_elem, | |
275 | .map_get_next_key = queue_stack_map_get_next_key, | |
276 | }; | |
277 | ||
278 | const struct bpf_map_ops stack_map_ops = { | |
279 | .map_alloc_check = queue_stack_map_alloc_check, | |
280 | .map_alloc = queue_stack_map_alloc, | |
281 | .map_free = queue_stack_map_free, | |
282 | .map_lookup_elem = queue_stack_map_lookup_elem, | |
283 | .map_update_elem = queue_stack_map_update_elem, | |
284 | .map_delete_elem = queue_stack_map_delete_elem, | |
285 | .map_push_elem = queue_stack_map_push_elem, | |
286 | .map_pop_elem = stack_map_pop_elem, | |
287 | .map_peek_elem = stack_map_peek_elem, | |
288 | .map_get_next_key = queue_stack_map_get_next_key, | |
289 | }; |