]>
Commit | Line | Data |
---|---|---|
f1a2e44a MV |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * queue_stack_maps.c: BPF queue and stack maps | |
4 | * | |
5 | * Copyright (c) 2018 Politecnico di Torino | |
6 | */ | |
7 | #include <linux/bpf.h> | |
8 | #include <linux/list.h> | |
9 | #include <linux/slab.h> | |
813961de | 10 | #include <linux/capability.h> |
f1a2e44a MV |
11 | #include "percpu_freelist.h" |
12 | ||
13 | #define QUEUE_STACK_CREATE_FLAG_MASK \ | |
14 | (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) | |
15 | ||
16 | ||
17 | struct bpf_queue_stack { | |
18 | struct bpf_map map; | |
19 | raw_spinlock_t lock; | |
20 | u32 head, tail; | |
21 | u32 size; /* max_entries + 1 */ | |
22 | ||
23 | char elements[0] __aligned(8); | |
24 | }; | |
25 | ||
26 | static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map) | |
27 | { | |
28 | return container_of(map, struct bpf_queue_stack, map); | |
29 | } | |
30 | ||
31 | static bool queue_stack_map_is_empty(struct bpf_queue_stack *qs) | |
32 | { | |
33 | return qs->head == qs->tail; | |
34 | } | |
35 | ||
36 | static bool queue_stack_map_is_full(struct bpf_queue_stack *qs) | |
37 | { | |
38 | u32 head = qs->head + 1; | |
39 | ||
40 | if (unlikely(head >= qs->size)) | |
41 | head = 0; | |
42 | ||
43 | return head == qs->tail; | |
44 | } | |
45 | ||
46 | /* Called from syscall */ | |
47 | static int queue_stack_map_alloc_check(union bpf_attr *attr) | |
48 | { | |
813961de AS |
49 | if (!capable(CAP_SYS_ADMIN)) |
50 | return -EPERM; | |
51 | ||
f1a2e44a MV |
52 | /* check sanity of attributes */ |
53 | if (attr->max_entries == 0 || attr->key_size != 0 || | |
813961de | 54 | attr->value_size == 0 || |
f1a2e44a MV |
55 | attr->map_flags & ~QUEUE_STACK_CREATE_FLAG_MASK) |
56 | return -EINVAL; | |
57 | ||
58 | if (attr->value_size > KMALLOC_MAX_SIZE) | |
59 | /* if value_size is bigger, the user space won't be able to | |
60 | * access the elements. | |
61 | */ | |
62 | return -E2BIG; | |
63 | ||
64 | return 0; | |
65 | } | |
66 | ||
67 | static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) | |
68 | { | |
69 | int ret, numa_node = bpf_map_attr_numa_node(attr); | |
70 | struct bpf_queue_stack *qs; | |
813961de | 71 | u64 size, queue_size, cost; |
f1a2e44a | 72 | |
813961de AS |
73 | size = (u64) attr->max_entries + 1; |
74 | cost = queue_size = sizeof(*qs) + size * attr->value_size; | |
f1a2e44a MV |
75 | if (cost >= U32_MAX - PAGE_SIZE) |
76 | return ERR_PTR(-E2BIG); | |
77 | ||
78 | cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; | |
79 | ||
80 | ret = bpf_map_precharge_memlock(cost); | |
81 | if (ret < 0) | |
82 | return ERR_PTR(ret); | |
83 | ||
84 | qs = bpf_map_area_alloc(queue_size, numa_node); | |
85 | if (!qs) | |
86 | return ERR_PTR(-ENOMEM); | |
87 | ||
88 | memset(qs, 0, sizeof(*qs)); | |
89 | ||
90 | bpf_map_init_from_attr(&qs->map, attr); | |
91 | ||
92 | qs->map.pages = cost; | |
93 | qs->size = size; | |
94 | ||
95 | raw_spin_lock_init(&qs->lock); | |
96 | ||
97 | return &qs->map; | |
98 | } | |
99 | ||
100 | /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ | |
101 | static void queue_stack_map_free(struct bpf_map *map) | |
102 | { | |
103 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
104 | ||
105 | /* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, | |
106 | * so the programs (can be more than one that used this map) were | |
107 | * disconnected from events. Wait for outstanding critical sections in | |
108 | * these programs to complete | |
109 | */ | |
110 | synchronize_rcu(); | |
111 | ||
112 | bpf_map_area_free(qs); | |
113 | } | |
114 | ||
115 | static int __queue_map_get(struct bpf_map *map, void *value, bool delete) | |
116 | { | |
117 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
118 | unsigned long flags; | |
119 | int err = 0; | |
120 | void *ptr; | |
121 | ||
122 | raw_spin_lock_irqsave(&qs->lock, flags); | |
123 | ||
124 | if (queue_stack_map_is_empty(qs)) { | |
d3f66e41 | 125 | memset(value, 0, qs->map.value_size); |
f1a2e44a MV |
126 | err = -ENOENT; |
127 | goto out; | |
128 | } | |
129 | ||
130 | ptr = &qs->elements[qs->tail * qs->map.value_size]; | |
131 | memcpy(value, ptr, qs->map.value_size); | |
132 | ||
133 | if (delete) { | |
134 | if (unlikely(++qs->tail >= qs->size)) | |
135 | qs->tail = 0; | |
136 | } | |
137 | ||
138 | out: | |
139 | raw_spin_unlock_irqrestore(&qs->lock, flags); | |
140 | return err; | |
141 | } | |
142 | ||
143 | ||
144 | static int __stack_map_get(struct bpf_map *map, void *value, bool delete) | |
145 | { | |
146 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
147 | unsigned long flags; | |
148 | int err = 0; | |
149 | void *ptr; | |
150 | u32 index; | |
151 | ||
152 | raw_spin_lock_irqsave(&qs->lock, flags); | |
153 | ||
154 | if (queue_stack_map_is_empty(qs)) { | |
d3f66e41 | 155 | memset(value, 0, qs->map.value_size); |
f1a2e44a MV |
156 | err = -ENOENT; |
157 | goto out; | |
158 | } | |
159 | ||
160 | index = qs->head - 1; | |
161 | if (unlikely(index >= qs->size)) | |
162 | index = qs->size - 1; | |
163 | ||
164 | ptr = &qs->elements[index * qs->map.value_size]; | |
165 | memcpy(value, ptr, qs->map.value_size); | |
166 | ||
167 | if (delete) | |
168 | qs->head = index; | |
169 | ||
170 | out: | |
171 | raw_spin_unlock_irqrestore(&qs->lock, flags); | |
172 | return err; | |
173 | } | |
174 | ||
175 | /* Called from syscall or from eBPF program */ | |
176 | static int queue_map_peek_elem(struct bpf_map *map, void *value) | |
177 | { | |
178 | return __queue_map_get(map, value, false); | |
179 | } | |
180 | ||
181 | /* Called from syscall or from eBPF program */ | |
182 | static int stack_map_peek_elem(struct bpf_map *map, void *value) | |
183 | { | |
184 | return __stack_map_get(map, value, false); | |
185 | } | |
186 | ||
187 | /* Called from syscall or from eBPF program */ | |
188 | static int queue_map_pop_elem(struct bpf_map *map, void *value) | |
189 | { | |
190 | return __queue_map_get(map, value, true); | |
191 | } | |
192 | ||
193 | /* Called from syscall or from eBPF program */ | |
194 | static int stack_map_pop_elem(struct bpf_map *map, void *value) | |
195 | { | |
196 | return __stack_map_get(map, value, true); | |
197 | } | |
198 | ||
199 | /* Called from syscall or from eBPF program */ | |
200 | static int queue_stack_map_push_elem(struct bpf_map *map, void *value, | |
201 | u64 flags) | |
202 | { | |
203 | struct bpf_queue_stack *qs = bpf_queue_stack(map); | |
204 | unsigned long irq_flags; | |
205 | int err = 0; | |
206 | void *dst; | |
207 | ||
208 | /* BPF_EXIST is used to force making room for a new element in case the | |
209 | * map is full | |
210 | */ | |
211 | bool replace = (flags & BPF_EXIST); | |
212 | ||
213 | /* Check supported flags for queue and stack maps */ | |
214 | if (flags & BPF_NOEXIST || flags > BPF_EXIST) | |
215 | return -EINVAL; | |
216 | ||
217 | raw_spin_lock_irqsave(&qs->lock, irq_flags); | |
218 | ||
219 | if (queue_stack_map_is_full(qs)) { | |
220 | if (!replace) { | |
221 | err = -E2BIG; | |
222 | goto out; | |
223 | } | |
224 | /* advance tail pointer to overwrite oldest element */ | |
225 | if (unlikely(++qs->tail >= qs->size)) | |
226 | qs->tail = 0; | |
227 | } | |
228 | ||
229 | dst = &qs->elements[qs->head * qs->map.value_size]; | |
230 | memcpy(dst, value, qs->map.value_size); | |
231 | ||
232 | if (unlikely(++qs->head >= qs->size)) | |
233 | qs->head = 0; | |
234 | ||
235 | out: | |
236 | raw_spin_unlock_irqrestore(&qs->lock, irq_flags); | |
237 | return err; | |
238 | } | |
239 | ||
240 | /* Called from syscall or from eBPF program */ | |
241 | static void *queue_stack_map_lookup_elem(struct bpf_map *map, void *key) | |
242 | { | |
243 | return NULL; | |
244 | } | |
245 | ||
246 | /* Called from syscall or from eBPF program */ | |
247 | static int queue_stack_map_update_elem(struct bpf_map *map, void *key, | |
248 | void *value, u64 flags) | |
249 | { | |
250 | return -EINVAL; | |
251 | } | |
252 | ||
253 | /* Called from syscall or from eBPF program */ | |
254 | static int queue_stack_map_delete_elem(struct bpf_map *map, void *key) | |
255 | { | |
256 | return -EINVAL; | |
257 | } | |
258 | ||
259 | /* Called from syscall */ | |
260 | static int queue_stack_map_get_next_key(struct bpf_map *map, void *key, | |
261 | void *next_key) | |
262 | { | |
263 | return -EINVAL; | |
264 | } | |
265 | ||
266 | const struct bpf_map_ops queue_map_ops = { | |
267 | .map_alloc_check = queue_stack_map_alloc_check, | |
268 | .map_alloc = queue_stack_map_alloc, | |
269 | .map_free = queue_stack_map_free, | |
270 | .map_lookup_elem = queue_stack_map_lookup_elem, | |
271 | .map_update_elem = queue_stack_map_update_elem, | |
272 | .map_delete_elem = queue_stack_map_delete_elem, | |
273 | .map_push_elem = queue_stack_map_push_elem, | |
274 | .map_pop_elem = queue_map_pop_elem, | |
275 | .map_peek_elem = queue_map_peek_elem, | |
276 | .map_get_next_key = queue_stack_map_get_next_key, | |
277 | }; | |
278 | ||
279 | const struct bpf_map_ops stack_map_ops = { | |
280 | .map_alloc_check = queue_stack_map_alloc_check, | |
281 | .map_alloc = queue_stack_map_alloc, | |
282 | .map_free = queue_stack_map_free, | |
283 | .map_lookup_elem = queue_stack_map_lookup_elem, | |
284 | .map_update_elem = queue_stack_map_update_elem, | |
285 | .map_delete_elem = queue_stack_map_delete_elem, | |
286 | .map_push_elem = queue_stack_map_push_elem, | |
287 | .map_pop_elem = stack_map_pop_elem, | |
288 | .map_peek_elem = stack_map_peek_elem, | |
289 | .map_get_next_key = queue_stack_map_get_next_key, | |
290 | }; |