Commit | Line | Data |
---|---|---|
4cf1bc1f KS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * Copyright (c) 2020 Facebook | |
4 | * Copyright 2020 Google LLC. | |
5 | */ | |
6 | ||
7 | #include <linux/pid.h> | |
8 | #include <linux/sched.h> | |
9 | #include <linux/rculist.h> | |
10 | #include <linux/list.h> | |
11 | #include <linux/hash.h> | |
12 | #include <linux/types.h> | |
13 | #include <linux/spinlock.h> | |
14 | #include <linux/bpf.h> | |
15 | #include <linux/bpf_local_storage.h> | |
16 | #include <linux/filter.h> | |
17 | #include <uapi/linux/btf.h> | |
4cf1bc1f KS |
18 | #include <linux/btf_ids.h> |
19 | #include <linux/fdtable.h> | |
0fe4b381 | 20 | #include <linux/rcupdate_trace.h> |
4cf1bc1f KS |
21 | |
22 | DEFINE_BPF_STORAGE_CACHE(task_cache); | |
23 | ||
4d0b9389 | 24 | static DEFINE_PER_CPU(int, bpf_task_storage_busy); |
bc235cdb SL |
25 | |
26 | static void bpf_task_storage_lock(void) | |
27 | { | |
28 | migrate_disable(); | |
29 | __this_cpu_inc(bpf_task_storage_busy); | |
30 | } | |
31 | ||
32 | static void bpf_task_storage_unlock(void) | |
33 | { | |
34 | __this_cpu_dec(bpf_task_storage_busy); | |
35 | migrate_enable(); | |
36 | } | |
37 | ||
38 | static bool bpf_task_storage_trylock(void) | |
39 | { | |
40 | migrate_disable(); | |
41 | if (unlikely(__this_cpu_inc_return(bpf_task_storage_busy) != 1)) { | |
42 | __this_cpu_dec(bpf_task_storage_busy); | |
43 | migrate_enable(); | |
44 | return false; | |
45 | } | |
46 | return true; | |
47 | } | |
48 | ||
4cf1bc1f KS |
49 | static struct bpf_local_storage __rcu **task_storage_ptr(void *owner) |
50 | { | |
51 | struct task_struct *task = owner; | |
4cf1bc1f | 52 | |
a10787e6 | 53 | return &task->bpf_storage; |
4cf1bc1f KS |
54 | } |
55 | ||
56 | static struct bpf_local_storage_data * | |
57 | task_storage_lookup(struct task_struct *task, struct bpf_map *map, | |
58 | bool cacheit_lockit) | |
59 | { | |
60 | struct bpf_local_storage *task_storage; | |
61 | struct bpf_local_storage_map *smap; | |
4cf1bc1f | 62 | |
0fe4b381 KS |
63 | task_storage = |
64 | rcu_dereference_check(task->bpf_storage, bpf_rcu_lock_held()); | |
4cf1bc1f KS |
65 | if (!task_storage) |
66 | return NULL; | |
67 | ||
68 | smap = (struct bpf_local_storage_map *)map; | |
69 | return bpf_local_storage_lookup(task_storage, smap, cacheit_lockit); | |
70 | } | |
71 | ||
72 | void bpf_task_storage_free(struct task_struct *task) | |
73 | { | |
74 | struct bpf_local_storage_elem *selem; | |
75 | struct bpf_local_storage *local_storage; | |
76 | bool free_task_storage = false; | |
4cf1bc1f | 77 | struct hlist_node *n; |
a10787e6 | 78 | unsigned long flags; |
4cf1bc1f KS |
79 | |
80 | rcu_read_lock(); | |
81 | ||
a10787e6 | 82 | local_storage = rcu_dereference(task->bpf_storage); |
4cf1bc1f KS |
83 | if (!local_storage) { |
84 | rcu_read_unlock(); | |
85 | return; | |
86 | } | |
87 | ||
88 | /* Neither the bpf_prog nor the bpf-map's syscall | |
89 | * could be modifying the local_storage->list now. | |
90 | * Thus, no elem can be added-to or deleted-from the | |
91 | * local_storage->list by the bpf_prog or by the bpf-map's syscall. | |
92 | * | |
93 | * It is racing with bpf_local_storage_map_free() alone | |
94 | * when unlinking elem from the local_storage->list and | |
95 | * the map's bucket->list. | |
96 | */ | |
bc235cdb | 97 | bpf_task_storage_lock(); |
a10787e6 | 98 | raw_spin_lock_irqsave(&local_storage->lock, flags); |
4cf1bc1f KS |
99 | hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { |
100 | /* Always unlink from map before unlinking from | |
101 | * local_storage. | |
102 | */ | |
103 | bpf_selem_unlink_map(selem); | |
104 | free_task_storage = bpf_selem_unlink_storage_nolock( | |
105 | local_storage, selem, false); | |
106 | } | |
a10787e6 | 107 | raw_spin_unlock_irqrestore(&local_storage->lock, flags); |
bc235cdb | 108 | bpf_task_storage_unlock(); |
4cf1bc1f KS |
109 | rcu_read_unlock(); |
110 | ||
111 | /* free_task_storage should always be true as long as | |
112 | * local_storage->list was non-empty. | |
113 | */ | |
114 | if (free_task_storage) | |
115 | kfree_rcu(local_storage, rcu); | |
116 | } | |
117 | ||
118 | static void *bpf_pid_task_storage_lookup_elem(struct bpf_map *map, void *key) | |
119 | { | |
120 | struct bpf_local_storage_data *sdata; | |
121 | struct task_struct *task; | |
122 | unsigned int f_flags; | |
123 | struct pid *pid; | |
124 | int fd, err; | |
125 | ||
126 | fd = *(int *)key; | |
127 | pid = pidfd_get_pid(fd, &f_flags); | |
128 | if (IS_ERR(pid)) | |
129 | return ERR_CAST(pid); | |
130 | ||
131 | /* We should be in an RCU read side critical section, it should be safe | |
132 | * to call pid_task. | |
133 | */ | |
134 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
135 | task = pid_task(pid, PIDTYPE_PID); | |
136 | if (!task) { | |
137 | err = -ENOENT; | |
138 | goto out; | |
139 | } | |
140 | ||
bc235cdb | 141 | bpf_task_storage_lock(); |
4cf1bc1f | 142 | sdata = task_storage_lookup(task, map, true); |
bc235cdb | 143 | bpf_task_storage_unlock(); |
4cf1bc1f KS |
144 | put_pid(pid); |
145 | return sdata ? sdata->data : NULL; | |
146 | out: | |
147 | put_pid(pid); | |
148 | return ERR_PTR(err); | |
149 | } | |
150 | ||
151 | static int bpf_pid_task_storage_update_elem(struct bpf_map *map, void *key, | |
152 | void *value, u64 map_flags) | |
153 | { | |
154 | struct bpf_local_storage_data *sdata; | |
155 | struct task_struct *task; | |
156 | unsigned int f_flags; | |
157 | struct pid *pid; | |
158 | int fd, err; | |
159 | ||
160 | fd = *(int *)key; | |
161 | pid = pidfd_get_pid(fd, &f_flags); | |
162 | if (IS_ERR(pid)) | |
163 | return PTR_ERR(pid); | |
164 | ||
165 | /* We should be in an RCU read side critical section, it should be safe | |
166 | * to call pid_task. | |
167 | */ | |
168 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
169 | task = pid_task(pid, PIDTYPE_PID); | |
a10787e6 | 170 | if (!task) { |
4cf1bc1f KS |
171 | err = -ENOENT; |
172 | goto out; | |
173 | } | |
174 | ||
bc235cdb | 175 | bpf_task_storage_lock(); |
4cf1bc1f KS |
176 | sdata = bpf_local_storage_update( |
177 | task, (struct bpf_local_storage_map *)map, value, map_flags); | |
bc235cdb | 178 | bpf_task_storage_unlock(); |
4cf1bc1f KS |
179 | |
180 | err = PTR_ERR_OR_ZERO(sdata); | |
181 | out: | |
182 | put_pid(pid); | |
183 | return err; | |
184 | } | |
185 | ||
186 | static int task_storage_delete(struct task_struct *task, struct bpf_map *map) | |
187 | { | |
188 | struct bpf_local_storage_data *sdata; | |
189 | ||
190 | sdata = task_storage_lookup(task, map, false); | |
191 | if (!sdata) | |
192 | return -ENOENT; | |
193 | ||
194 | bpf_selem_unlink(SELEM(sdata)); | |
195 | ||
196 | return 0; | |
197 | } | |
198 | ||
199 | static int bpf_pid_task_storage_delete_elem(struct bpf_map *map, void *key) | |
200 | { | |
201 | struct task_struct *task; | |
202 | unsigned int f_flags; | |
203 | struct pid *pid; | |
204 | int fd, err; | |
205 | ||
206 | fd = *(int *)key; | |
207 | pid = pidfd_get_pid(fd, &f_flags); | |
208 | if (IS_ERR(pid)) | |
209 | return PTR_ERR(pid); | |
210 | ||
211 | /* We should be in an RCU read side critical section, it should be safe | |
212 | * to call pid_task. | |
213 | */ | |
214 | WARN_ON_ONCE(!rcu_read_lock_held()); | |
215 | task = pid_task(pid, PIDTYPE_PID); | |
216 | if (!task) { | |
217 | err = -ENOENT; | |
218 | goto out; | |
219 | } | |
220 | ||
bc235cdb | 221 | bpf_task_storage_lock(); |
4cf1bc1f | 222 | err = task_storage_delete(task, map); |
bc235cdb | 223 | bpf_task_storage_unlock(); |
4cf1bc1f KS |
224 | out: |
225 | put_pid(pid); | |
226 | return err; | |
227 | } | |
228 | ||
229 | BPF_CALL_4(bpf_task_storage_get, struct bpf_map *, map, struct task_struct *, | |
230 | task, void *, value, u64, flags) | |
231 | { | |
232 | struct bpf_local_storage_data *sdata; | |
233 | ||
0fe4b381 | 234 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
4cf1bc1f KS |
235 | if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) |
236 | return (unsigned long)NULL; | |
237 | ||
a10787e6 | 238 | if (!task) |
4cf1bc1f KS |
239 | return (unsigned long)NULL; |
240 | ||
bc235cdb SL |
241 | if (!bpf_task_storage_trylock()) |
242 | return (unsigned long)NULL; | |
243 | ||
4cf1bc1f KS |
244 | sdata = task_storage_lookup(task, map, true); |
245 | if (sdata) | |
bc235cdb | 246 | goto unlock; |
4cf1bc1f | 247 | |
a10787e6 SL |
248 | /* only allocate new storage, when the task is refcounted */ |
249 | if (refcount_read(&task->usage) && | |
bc235cdb | 250 | (flags & BPF_LOCAL_STORAGE_GET_F_CREATE)) |
4cf1bc1f KS |
251 | sdata = bpf_local_storage_update( |
252 | task, (struct bpf_local_storage_map *)map, value, | |
253 | BPF_NOEXIST); | |
4cf1bc1f | 254 | |
bc235cdb SL |
255 | unlock: |
256 | bpf_task_storage_unlock(); | |
257 | return IS_ERR_OR_NULL(sdata) ? (unsigned long)NULL : | |
258 | (unsigned long)sdata->data; | |
4cf1bc1f KS |
259 | } |
260 | ||
261 | BPF_CALL_2(bpf_task_storage_delete, struct bpf_map *, map, struct task_struct *, | |
262 | task) | |
263 | { | |
bc235cdb SL |
264 | int ret; |
265 | ||
0fe4b381 | 266 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
1a9c72ad KS |
267 | if (!task) |
268 | return -EINVAL; | |
269 | ||
bc235cdb SL |
270 | if (!bpf_task_storage_trylock()) |
271 | return -EBUSY; | |
272 | ||
4cf1bc1f KS |
273 | /* This helper must only be called from places where the lifetime of the task |
274 | * is guaranteed. Either by being refcounted or by being protected | |
275 | * by an RCU read-side critical section. | |
276 | */ | |
bc235cdb SL |
277 | ret = task_storage_delete(task, map); |
278 | bpf_task_storage_unlock(); | |
279 | return ret; | |
4cf1bc1f KS |
280 | } |
281 | ||
282 | static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) | |
283 | { | |
284 | return -ENOTSUPP; | |
285 | } | |
286 | ||
287 | static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) | |
288 | { | |
289 | struct bpf_local_storage_map *smap; | |
290 | ||
291 | smap = bpf_local_storage_map_alloc(attr); | |
292 | if (IS_ERR(smap)) | |
293 | return ERR_CAST(smap); | |
294 | ||
295 | smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); | |
296 | return &smap->map; | |
297 | } | |
298 | ||
299 | static void task_storage_map_free(struct bpf_map *map) | |
300 | { | |
301 | struct bpf_local_storage_map *smap; | |
302 | ||
303 | smap = (struct bpf_local_storage_map *)map; | |
304 | bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); | |
bc235cdb | 305 | bpf_local_storage_map_free(smap, &bpf_task_storage_busy); |
4cf1bc1f KS |
306 | } |
307 | ||
308 | static int task_storage_map_btf_id; | |
309 | const struct bpf_map_ops task_storage_map_ops = { | |
310 | .map_meta_equal = bpf_map_meta_equal, | |
311 | .map_alloc_check = bpf_local_storage_map_alloc_check, | |
312 | .map_alloc = task_storage_map_alloc, | |
313 | .map_free = task_storage_map_free, | |
314 | .map_get_next_key = notsupp_get_next_key, | |
315 | .map_lookup_elem = bpf_pid_task_storage_lookup_elem, | |
316 | .map_update_elem = bpf_pid_task_storage_update_elem, | |
317 | .map_delete_elem = bpf_pid_task_storage_delete_elem, | |
318 | .map_check_btf = bpf_local_storage_map_check_btf, | |
319 | .map_btf_name = "bpf_local_storage_map", | |
320 | .map_btf_id = &task_storage_map_btf_id, | |
321 | .map_owner_storage_ptr = task_storage_ptr, | |
322 | }; | |
323 | ||
4cf1bc1f KS |
324 | const struct bpf_func_proto bpf_task_storage_get_proto = { |
325 | .func = bpf_task_storage_get, | |
326 | .gpl_only = false, | |
327 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, | |
328 | .arg1_type = ARG_CONST_MAP_PTR, | |
329 | .arg2_type = ARG_PTR_TO_BTF_ID, | |
d19ddb47 | 330 | .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], |
4cf1bc1f KS |
331 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, |
332 | .arg4_type = ARG_ANYTHING, | |
333 | }; | |
334 | ||
335 | const struct bpf_func_proto bpf_task_storage_delete_proto = { | |
336 | .func = bpf_task_storage_delete, | |
337 | .gpl_only = false, | |
338 | .ret_type = RET_INTEGER, | |
339 | .arg1_type = ARG_CONST_MAP_PTR, | |
340 | .arg2_type = ARG_PTR_TO_BTF_ID, | |
d19ddb47 | 341 | .arg2_btf_id = &btf_tracing_ids[BTF_TRACING_TYPE_TASK], |
4cf1bc1f | 342 | }; |