]> Git Repo - linux.git/blob - kernel/bpf/task_iter.c
crypto: akcipher - Drop sign/verify operations
[linux.git] / kernel / bpf / task_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2020 Facebook */
3
4 #include <linux/init.h>
5 #include <linux/namei.h>
6 #include <linux/pid_namespace.h>
7 #include <linux/fs.h>
8 #include <linux/fdtable.h>
9 #include <linux/filter.h>
10 #include <linux/bpf_mem_alloc.h>
11 #include <linux/btf_ids.h>
12 #include <linux/mm_types.h>
13 #include "mmap_unlock_work.h"
14
15 static const char * const iter_task_type_names[] = {
16         "ALL",
17         "TID",
18         "PID",
19 };
20
21 struct bpf_iter_seq_task_common {
22         struct pid_namespace *ns;
23         enum bpf_iter_task_type type;
24         u32 pid;
25         u32 pid_visiting;
26 };
27
28 struct bpf_iter_seq_task_info {
29         /* The first field must be struct bpf_iter_seq_task_common.
30          * this is assumed by {init, fini}_seq_pidns() callback functions.
31          */
32         struct bpf_iter_seq_task_common common;
33         u32 tid;
34 };
35
36 static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
37                                                    u32 *tid,
38                                                    bool skip_if_dup_files)
39 {
40         struct task_struct *task;
41         struct pid *pid;
42         u32 next_tid;
43
44         if (!*tid) {
45                 /* The first time, the iterator calls this function. */
46                 pid = find_pid_ns(common->pid, common->ns);
47                 task = get_pid_task(pid, PIDTYPE_TGID);
48                 if (!task)
49                         return NULL;
50
51                 *tid = common->pid;
52                 common->pid_visiting = common->pid;
53
54                 return task;
55         }
56
57         /* If the control returns to user space and comes back to the
58          * kernel again, *tid and common->pid_visiting should be the
59          * same for task_seq_start() to pick up the correct task.
60          */
61         if (*tid == common->pid_visiting) {
62                 pid = find_pid_ns(common->pid_visiting, common->ns);
63                 task = get_pid_task(pid, PIDTYPE_PID);
64
65                 return task;
66         }
67
68         task = find_task_by_pid_ns(common->pid_visiting, common->ns);
69         if (!task)
70                 return NULL;
71
72 retry:
73         task = __next_thread(task);
74         if (!task)
75                 return NULL;
76
77         next_tid = __task_pid_nr_ns(task, PIDTYPE_PID, common->ns);
78         if (!next_tid)
79                 goto retry;
80
81         if (skip_if_dup_files && task->files == task->group_leader->files)
82                 goto retry;
83
84         *tid = common->pid_visiting = next_tid;
85         get_task_struct(task);
86         return task;
87 }
88
89 static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
90                                              u32 *tid,
91                                              bool skip_if_dup_files)
92 {
93         struct task_struct *task = NULL;
94         struct pid *pid;
95
96         if (common->type == BPF_TASK_ITER_TID) {
97                 if (*tid && *tid != common->pid)
98                         return NULL;
99                 rcu_read_lock();
100                 pid = find_pid_ns(common->pid, common->ns);
101                 if (pid) {
102                         task = get_pid_task(pid, PIDTYPE_TGID);
103                         *tid = common->pid;
104                 }
105                 rcu_read_unlock();
106
107                 return task;
108         }
109
110         if (common->type == BPF_TASK_ITER_TGID) {
111                 rcu_read_lock();
112                 task = task_group_seq_get_next(common, tid, skip_if_dup_files);
113                 rcu_read_unlock();
114
115                 return task;
116         }
117
118         rcu_read_lock();
119 retry:
120         pid = find_ge_pid(*tid, common->ns);
121         if (pid) {
122                 *tid = pid_nr_ns(pid, common->ns);
123                 task = get_pid_task(pid, PIDTYPE_PID);
124                 if (!task) {
125                         ++*tid;
126                         goto retry;
127                 } else if (skip_if_dup_files && !thread_group_leader(task) &&
128                            task->files == task->group_leader->files) {
129                         put_task_struct(task);
130                         task = NULL;
131                         ++*tid;
132                         goto retry;
133                 }
134         }
135         rcu_read_unlock();
136
137         return task;
138 }
139
140 static void *task_seq_start(struct seq_file *seq, loff_t *pos)
141 {
142         struct bpf_iter_seq_task_info *info = seq->private;
143         struct task_struct *task;
144
145         task = task_seq_get_next(&info->common, &info->tid, false);
146         if (!task)
147                 return NULL;
148
149         if (*pos == 0)
150                 ++*pos;
151         return task;
152 }
153
154 static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
155 {
156         struct bpf_iter_seq_task_info *info = seq->private;
157         struct task_struct *task;
158
159         ++*pos;
160         ++info->tid;
161         put_task_struct((struct task_struct *)v);
162         task = task_seq_get_next(&info->common, &info->tid, false);
163         if (!task)
164                 return NULL;
165
166         return task;
167 }
168
169 struct bpf_iter__task {
170         __bpf_md_ptr(struct bpf_iter_meta *, meta);
171         __bpf_md_ptr(struct task_struct *, task);
172 };
173
174 DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
175
176 static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
177                            bool in_stop)
178 {
179         struct bpf_iter_meta meta;
180         struct bpf_iter__task ctx;
181         struct bpf_prog *prog;
182
183         meta.seq = seq;
184         prog = bpf_iter_get_info(&meta, in_stop);
185         if (!prog)
186                 return 0;
187
188         ctx.meta = &meta;
189         ctx.task = task;
190         return bpf_iter_run_prog(prog, &ctx);
191 }
192
193 static int task_seq_show(struct seq_file *seq, void *v)
194 {
195         return __task_seq_show(seq, v, false);
196 }
197
198 static void task_seq_stop(struct seq_file *seq, void *v)
199 {
200         if (!v)
201                 (void)__task_seq_show(seq, v, true);
202         else
203                 put_task_struct((struct task_struct *)v);
204 }
205
206 static int bpf_iter_attach_task(struct bpf_prog *prog,
207                                 union bpf_iter_link_info *linfo,
208                                 struct bpf_iter_aux_info *aux)
209 {
210         unsigned int flags;
211         struct pid *pid;
212         pid_t tgid;
213
214         if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
215                 return -EINVAL;
216
217         aux->task.type = BPF_TASK_ITER_ALL;
218         if (linfo->task.tid != 0) {
219                 aux->task.type = BPF_TASK_ITER_TID;
220                 aux->task.pid = linfo->task.tid;
221         }
222         if (linfo->task.pid != 0) {
223                 aux->task.type = BPF_TASK_ITER_TGID;
224                 aux->task.pid = linfo->task.pid;
225         }
226         if (linfo->task.pid_fd != 0) {
227                 aux->task.type = BPF_TASK_ITER_TGID;
228
229                 pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
230                 if (IS_ERR(pid))
231                         return PTR_ERR(pid);
232
233                 tgid = pid_nr_ns(pid, task_active_pid_ns(current));
234                 aux->task.pid = tgid;
235                 put_pid(pid);
236         }
237
238         return 0;
239 }
240
241 static const struct seq_operations task_seq_ops = {
242         .start  = task_seq_start,
243         .next   = task_seq_next,
244         .stop   = task_seq_stop,
245         .show   = task_seq_show,
246 };
247
248 struct bpf_iter_seq_task_file_info {
249         /* The first field must be struct bpf_iter_seq_task_common.
250          * this is assumed by {init, fini}_seq_pidns() callback functions.
251          */
252         struct bpf_iter_seq_task_common common;
253         struct task_struct *task;
254         u32 tid;
255         u32 fd;
256 };
257
258 static struct file *
259 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
260 {
261         u32 saved_tid = info->tid;
262         struct task_struct *curr_task;
263         unsigned int curr_fd = info->fd;
264         struct file *f;
265
266         /* If this function returns a non-NULL file object,
267          * it held a reference to the task/file.
268          * Otherwise, it does not hold any reference.
269          */
270 again:
271         if (info->task) {
272                 curr_task = info->task;
273                 curr_fd = info->fd;
274         } else {
275                 curr_task = task_seq_get_next(&info->common, &info->tid, true);
276                 if (!curr_task) {
277                         info->task = NULL;
278                         return NULL;
279                 }
280
281                 /* set info->task */
282                 info->task = curr_task;
283                 if (saved_tid == info->tid)
284                         curr_fd = info->fd;
285                 else
286                         curr_fd = 0;
287         }
288
289         rcu_read_lock();
290         f = task_lookup_next_fdget_rcu(curr_task, &curr_fd);
291         if (f) {
292                 /* set info->fd */
293                 info->fd = curr_fd;
294                 rcu_read_unlock();
295                 return f;
296         }
297
298         /* the current task is done, go to the next task */
299         rcu_read_unlock();
300         put_task_struct(curr_task);
301
302         if (info->common.type == BPF_TASK_ITER_TID) {
303                 info->task = NULL;
304                 return NULL;
305         }
306
307         info->task = NULL;
308         info->fd = 0;
309         saved_tid = ++(info->tid);
310         goto again;
311 }
312
313 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
314 {
315         struct bpf_iter_seq_task_file_info *info = seq->private;
316         struct file *file;
317
318         info->task = NULL;
319         file = task_file_seq_get_next(info);
320         if (file && *pos == 0)
321                 ++*pos;
322
323         return file;
324 }
325
326 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
327 {
328         struct bpf_iter_seq_task_file_info *info = seq->private;
329
330         ++*pos;
331         ++info->fd;
332         fput((struct file *)v);
333         return task_file_seq_get_next(info);
334 }
335
336 struct bpf_iter__task_file {
337         __bpf_md_ptr(struct bpf_iter_meta *, meta);
338         __bpf_md_ptr(struct task_struct *, task);
339         u32 fd __aligned(8);
340         __bpf_md_ptr(struct file *, file);
341 };
342
343 DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
344                      struct task_struct *task, u32 fd,
345                      struct file *file)
346
347 static int __task_file_seq_show(struct seq_file *seq, struct file *file,
348                                 bool in_stop)
349 {
350         struct bpf_iter_seq_task_file_info *info = seq->private;
351         struct bpf_iter__task_file ctx;
352         struct bpf_iter_meta meta;
353         struct bpf_prog *prog;
354
355         meta.seq = seq;
356         prog = bpf_iter_get_info(&meta, in_stop);
357         if (!prog)
358                 return 0;
359
360         ctx.meta = &meta;
361         ctx.task = info->task;
362         ctx.fd = info->fd;
363         ctx.file = file;
364         return bpf_iter_run_prog(prog, &ctx);
365 }
366
367 static int task_file_seq_show(struct seq_file *seq, void *v)
368 {
369         return __task_file_seq_show(seq, v, false);
370 }
371
372 static void task_file_seq_stop(struct seq_file *seq, void *v)
373 {
374         struct bpf_iter_seq_task_file_info *info = seq->private;
375
376         if (!v) {
377                 (void)__task_file_seq_show(seq, v, true);
378         } else {
379                 fput((struct file *)v);
380                 put_task_struct(info->task);
381                 info->task = NULL;
382         }
383 }
384
385 static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
386 {
387         struct bpf_iter_seq_task_common *common = priv_data;
388
389         common->ns = get_pid_ns(task_active_pid_ns(current));
390         common->type = aux->task.type;
391         common->pid = aux->task.pid;
392
393         return 0;
394 }
395
396 static void fini_seq_pidns(void *priv_data)
397 {
398         struct bpf_iter_seq_task_common *common = priv_data;
399
400         put_pid_ns(common->ns);
401 }
402
403 static const struct seq_operations task_file_seq_ops = {
404         .start  = task_file_seq_start,
405         .next   = task_file_seq_next,
406         .stop   = task_file_seq_stop,
407         .show   = task_file_seq_show,
408 };
409
410 struct bpf_iter_seq_task_vma_info {
411         /* The first field must be struct bpf_iter_seq_task_common.
412          * this is assumed by {init, fini}_seq_pidns() callback functions.
413          */
414         struct bpf_iter_seq_task_common common;
415         struct task_struct *task;
416         struct mm_struct *mm;
417         struct vm_area_struct *vma;
418         u32 tid;
419         unsigned long prev_vm_start;
420         unsigned long prev_vm_end;
421 };
422
423 enum bpf_task_vma_iter_find_op {
424         task_vma_iter_first_vma,   /* use find_vma() with addr 0 */
425         task_vma_iter_next_vma,    /* use vma_next() with curr_vma */
426         task_vma_iter_find_vma,    /* use find_vma() to find next vma */
427 };
428
429 static struct vm_area_struct *
430 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
431 {
432         enum bpf_task_vma_iter_find_op op;
433         struct vm_area_struct *curr_vma;
434         struct task_struct *curr_task;
435         struct mm_struct *curr_mm;
436         u32 saved_tid = info->tid;
437
438         /* If this function returns a non-NULL vma, it holds a reference to
439          * the task_struct, holds a refcount on mm->mm_users, and holds
440          * read lock on vma->mm->mmap_lock.
441          * If this function returns NULL, it does not hold any reference or
442          * lock.
443          */
444         if (info->task) {
445                 curr_task = info->task;
446                 curr_vma = info->vma;
447                 curr_mm = info->mm;
448                 /* In case of lock contention, drop mmap_lock to unblock
449                  * the writer.
450                  *
451                  * After relock, call find(mm, prev_vm_end - 1) to find
452                  * new vma to process.
453                  *
454                  *   +------+------+-----------+
455                  *   | VMA1 | VMA2 | VMA3      |
456                  *   +------+------+-----------+
457                  *   |      |      |           |
458                  *  4k     8k     16k         400k
459                  *
460                  * For example, curr_vma == VMA2. Before unlock, we set
461                  *
462                  *    prev_vm_start = 8k
463                  *    prev_vm_end   = 16k
464                  *
465                  * There are a few cases:
466                  *
467                  * 1) VMA2 is freed, but VMA3 exists.
468                  *
469                  *    find_vma() will return VMA3, just process VMA3.
470                  *
471                  * 2) VMA2 still exists.
472                  *
473                  *    find_vma() will return VMA2, process VMA2->next.
474                  *
475                  * 3) no more vma in this mm.
476                  *
477                  *    Process the next task.
478                  *
479                  * 4) find_vma() returns a different vma, VMA2'.
480                  *
481                  *    4.1) If VMA2 covers same range as VMA2', skip VMA2',
482                  *         because we already covered the range;
483                  *    4.2) VMA2 and VMA2' covers different ranges, process
484                  *         VMA2'.
485                  */
486                 if (mmap_lock_is_contended(curr_mm)) {
487                         info->prev_vm_start = curr_vma->vm_start;
488                         info->prev_vm_end = curr_vma->vm_end;
489                         op = task_vma_iter_find_vma;
490                         mmap_read_unlock(curr_mm);
491                         if (mmap_read_lock_killable(curr_mm)) {
492                                 mmput(curr_mm);
493                                 goto finish;
494                         }
495                 } else {
496                         op = task_vma_iter_next_vma;
497                 }
498         } else {
499 again:
500                 curr_task = task_seq_get_next(&info->common, &info->tid, true);
501                 if (!curr_task) {
502                         info->tid++;
503                         goto finish;
504                 }
505
506                 if (saved_tid != info->tid) {
507                         /* new task, process the first vma */
508                         op = task_vma_iter_first_vma;
509                 } else {
510                         /* Found the same tid, which means the user space
511                          * finished data in previous buffer and read more.
512                          * We dropped mmap_lock before returning to user
513                          * space, so it is necessary to use find_vma() to
514                          * find the next vma to process.
515                          */
516                         op = task_vma_iter_find_vma;
517                 }
518
519                 curr_mm = get_task_mm(curr_task);
520                 if (!curr_mm)
521                         goto next_task;
522
523                 if (mmap_read_lock_killable(curr_mm)) {
524                         mmput(curr_mm);
525                         goto finish;
526                 }
527         }
528
529         switch (op) {
530         case task_vma_iter_first_vma:
531                 curr_vma = find_vma(curr_mm, 0);
532                 break;
533         case task_vma_iter_next_vma:
534                 curr_vma = find_vma(curr_mm, curr_vma->vm_end);
535                 break;
536         case task_vma_iter_find_vma:
537                 /* We dropped mmap_lock so it is necessary to use find_vma
538                  * to find the next vma. This is similar to the  mechanism
539                  * in show_smaps_rollup().
540                  */
541                 curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);
542                 /* case 1) and 4.2) above just use curr_vma */
543
544                 /* check for case 2) or case 4.1) above */
545                 if (curr_vma &&
546                     curr_vma->vm_start == info->prev_vm_start &&
547                     curr_vma->vm_end == info->prev_vm_end)
548                         curr_vma = find_vma(curr_mm, curr_vma->vm_end);
549                 break;
550         }
551         if (!curr_vma) {
552                 /* case 3) above, or case 2) 4.1) with vma->next == NULL */
553                 mmap_read_unlock(curr_mm);
554                 mmput(curr_mm);
555                 goto next_task;
556         }
557         info->task = curr_task;
558         info->vma = curr_vma;
559         info->mm = curr_mm;
560         return curr_vma;
561
562 next_task:
563         if (info->common.type == BPF_TASK_ITER_TID)
564                 goto finish;
565
566         put_task_struct(curr_task);
567         info->task = NULL;
568         info->mm = NULL;
569         info->tid++;
570         goto again;
571
572 finish:
573         if (curr_task)
574                 put_task_struct(curr_task);
575         info->task = NULL;
576         info->vma = NULL;
577         info->mm = NULL;
578         return NULL;
579 }
580
581 static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
582 {
583         struct bpf_iter_seq_task_vma_info *info = seq->private;
584         struct vm_area_struct *vma;
585
586         vma = task_vma_seq_get_next(info);
587         if (vma && *pos == 0)
588                 ++*pos;
589
590         return vma;
591 }
592
593 static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
594 {
595         struct bpf_iter_seq_task_vma_info *info = seq->private;
596
597         ++*pos;
598         return task_vma_seq_get_next(info);
599 }
600
601 struct bpf_iter__task_vma {
602         __bpf_md_ptr(struct bpf_iter_meta *, meta);
603         __bpf_md_ptr(struct task_struct *, task);
604         __bpf_md_ptr(struct vm_area_struct *, vma);
605 };
606
607 DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
608                      struct task_struct *task, struct vm_area_struct *vma)
609
610 static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
611 {
612         struct bpf_iter_seq_task_vma_info *info = seq->private;
613         struct bpf_iter__task_vma ctx;
614         struct bpf_iter_meta meta;
615         struct bpf_prog *prog;
616
617         meta.seq = seq;
618         prog = bpf_iter_get_info(&meta, in_stop);
619         if (!prog)
620                 return 0;
621
622         ctx.meta = &meta;
623         ctx.task = info->task;
624         ctx.vma = info->vma;
625         return bpf_iter_run_prog(prog, &ctx);
626 }
627
628 static int task_vma_seq_show(struct seq_file *seq, void *v)
629 {
630         return __task_vma_seq_show(seq, false);
631 }
632
633 static void task_vma_seq_stop(struct seq_file *seq, void *v)
634 {
635         struct bpf_iter_seq_task_vma_info *info = seq->private;
636
637         if (!v) {
638                 (void)__task_vma_seq_show(seq, true);
639         } else {
640                 /* info->vma has not been seen by the BPF program. If the
641                  * user space reads more, task_vma_seq_get_next should
642                  * return this vma again. Set prev_vm_start to ~0UL,
643                  * so that we don't skip the vma returned by the next
644                  * find_vma() (case task_vma_iter_find_vma in
645                  * task_vma_seq_get_next()).
646                  */
647                 info->prev_vm_start = ~0UL;
648                 info->prev_vm_end = info->vma->vm_end;
649                 mmap_read_unlock(info->mm);
650                 mmput(info->mm);
651                 info->mm = NULL;
652                 put_task_struct(info->task);
653                 info->task = NULL;
654         }
655 }
656
657 static const struct seq_operations task_vma_seq_ops = {
658         .start  = task_vma_seq_start,
659         .next   = task_vma_seq_next,
660         .stop   = task_vma_seq_stop,
661         .show   = task_vma_seq_show,
662 };
663
664 static const struct bpf_iter_seq_info task_seq_info = {
665         .seq_ops                = &task_seq_ops,
666         .init_seq_private       = init_seq_pidns,
667         .fini_seq_private       = fini_seq_pidns,
668         .seq_priv_size          = sizeof(struct bpf_iter_seq_task_info),
669 };
670
671 static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info)
672 {
673         switch (aux->task.type) {
674         case BPF_TASK_ITER_TID:
675                 info->iter.task.tid = aux->task.pid;
676                 break;
677         case BPF_TASK_ITER_TGID:
678                 info->iter.task.pid = aux->task.pid;
679                 break;
680         default:
681                 break;
682         }
683         return 0;
684 }
685
686 static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq)
687 {
688         seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]);
689         if (aux->task.type == BPF_TASK_ITER_TID)
690                 seq_printf(seq, "tid:\t%u\n", aux->task.pid);
691         else if (aux->task.type == BPF_TASK_ITER_TGID)
692                 seq_printf(seq, "pid:\t%u\n", aux->task.pid);
693 }
694
695 static struct bpf_iter_reg task_reg_info = {
696         .target                 = "task",
697         .attach_target          = bpf_iter_attach_task,
698         .feature                = BPF_ITER_RESCHED,
699         .ctx_arg_info_size      = 1,
700         .ctx_arg_info           = {
701                 { offsetof(struct bpf_iter__task, task),
702                   PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
703         },
704         .seq_info               = &task_seq_info,
705         .fill_link_info         = bpf_iter_fill_link_info,
706         .show_fdinfo            = bpf_iter_task_show_fdinfo,
707 };
708
709 static const struct bpf_iter_seq_info task_file_seq_info = {
710         .seq_ops                = &task_file_seq_ops,
711         .init_seq_private       = init_seq_pidns,
712         .fini_seq_private       = fini_seq_pidns,
713         .seq_priv_size          = sizeof(struct bpf_iter_seq_task_file_info),
714 };
715
716 static struct bpf_iter_reg task_file_reg_info = {
717         .target                 = "task_file",
718         .attach_target          = bpf_iter_attach_task,
719         .feature                = BPF_ITER_RESCHED,
720         .ctx_arg_info_size      = 2,
721         .ctx_arg_info           = {
722                 { offsetof(struct bpf_iter__task_file, task),
723                   PTR_TO_BTF_ID_OR_NULL },
724                 { offsetof(struct bpf_iter__task_file, file),
725                   PTR_TO_BTF_ID_OR_NULL },
726         },
727         .seq_info               = &task_file_seq_info,
728         .fill_link_info         = bpf_iter_fill_link_info,
729         .show_fdinfo            = bpf_iter_task_show_fdinfo,
730 };
731
732 static const struct bpf_iter_seq_info task_vma_seq_info = {
733         .seq_ops                = &task_vma_seq_ops,
734         .init_seq_private       = init_seq_pidns,
735         .fini_seq_private       = fini_seq_pidns,
736         .seq_priv_size          = sizeof(struct bpf_iter_seq_task_vma_info),
737 };
738
739 static struct bpf_iter_reg task_vma_reg_info = {
740         .target                 = "task_vma",
741         .attach_target          = bpf_iter_attach_task,
742         .feature                = BPF_ITER_RESCHED,
743         .ctx_arg_info_size      = 2,
744         .ctx_arg_info           = {
745                 { offsetof(struct bpf_iter__task_vma, task),
746                   PTR_TO_BTF_ID_OR_NULL },
747                 { offsetof(struct bpf_iter__task_vma, vma),
748                   PTR_TO_BTF_ID_OR_NULL },
749         },
750         .seq_info               = &task_vma_seq_info,
751         .fill_link_info         = bpf_iter_fill_link_info,
752         .show_fdinfo            = bpf_iter_task_show_fdinfo,
753 };
754
755 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
756            bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
757 {
758         struct mmap_unlock_irq_work *work = NULL;
759         struct vm_area_struct *vma;
760         bool irq_work_busy = false;
761         struct mm_struct *mm;
762         int ret = -ENOENT;
763
764         if (flags)
765                 return -EINVAL;
766
767         if (!task)
768                 return -ENOENT;
769
770         mm = task->mm;
771         if (!mm)
772                 return -ENOENT;
773
774         irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
775
776         if (irq_work_busy || !mmap_read_trylock(mm))
777                 return -EBUSY;
778
779         vma = find_vma(mm, start);
780
781         if (vma && vma->vm_start <= start && vma->vm_end > start) {
782                 callback_fn((u64)(long)task, (u64)(long)vma,
783                             (u64)(long)callback_ctx, 0, 0);
784                 ret = 0;
785         }
786         bpf_mmap_unlock_mm(work, mm);
787         return ret;
788 }
789
790 const struct bpf_func_proto bpf_find_vma_proto = {
791         .func           = bpf_find_vma,
792         .ret_type       = RET_INTEGER,
793         .arg1_type      = ARG_PTR_TO_BTF_ID,
794         .arg1_btf_id    = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
795         .arg2_type      = ARG_ANYTHING,
796         .arg3_type      = ARG_PTR_TO_FUNC,
797         .arg4_type      = ARG_PTR_TO_STACK_OR_NULL,
798         .arg5_type      = ARG_ANYTHING,
799 };
800
801 struct bpf_iter_task_vma_kern_data {
802         struct task_struct *task;
803         struct mm_struct *mm;
804         struct mmap_unlock_irq_work *work;
805         struct vma_iterator vmi;
806 };
807
808 struct bpf_iter_task_vma {
809         /* opaque iterator state; having __u64 here allows to preserve correct
810          * alignment requirements in vmlinux.h, generated from BTF
811          */
812         __u64 __opaque[1];
813 } __attribute__((aligned(8)));
814
815 /* Non-opaque version of bpf_iter_task_vma */
816 struct bpf_iter_task_vma_kern {
817         struct bpf_iter_task_vma_kern_data *data;
818 } __attribute__((aligned(8)));
819
820 __bpf_kfunc_start_defs();
821
822 __bpf_kfunc int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
823                                       struct task_struct *task, u64 addr)
824 {
825         struct bpf_iter_task_vma_kern *kit = (void *)it;
826         bool irq_work_busy = false;
827         int err;
828
829         BUILD_BUG_ON(sizeof(struct bpf_iter_task_vma_kern) != sizeof(struct bpf_iter_task_vma));
830         BUILD_BUG_ON(__alignof__(struct bpf_iter_task_vma_kern) != __alignof__(struct bpf_iter_task_vma));
831
832         /* is_iter_reg_valid_uninit guarantees that kit hasn't been initialized
833          * before, so non-NULL kit->data doesn't point to previously
834          * bpf_mem_alloc'd bpf_iter_task_vma_kern_data
835          */
836         kit->data = bpf_mem_alloc(&bpf_global_ma, sizeof(struct bpf_iter_task_vma_kern_data));
837         if (!kit->data)
838                 return -ENOMEM;
839
840         kit->data->task = get_task_struct(task);
841         kit->data->mm = task->mm;
842         if (!kit->data->mm) {
843                 err = -ENOENT;
844                 goto err_cleanup_iter;
845         }
846
847         /* kit->data->work == NULL is valid after bpf_mmap_unlock_get_irq_work */
848         irq_work_busy = bpf_mmap_unlock_get_irq_work(&kit->data->work);
849         if (irq_work_busy || !mmap_read_trylock(kit->data->mm)) {
850                 err = -EBUSY;
851                 goto err_cleanup_iter;
852         }
853
854         vma_iter_init(&kit->data->vmi, kit->data->mm, addr);
855         return 0;
856
857 err_cleanup_iter:
858         if (kit->data->task)
859                 put_task_struct(kit->data->task);
860         bpf_mem_free(&bpf_global_ma, kit->data);
861         /* NULL kit->data signals failed bpf_iter_task_vma initialization */
862         kit->data = NULL;
863         return err;
864 }
865
866 __bpf_kfunc struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it)
867 {
868         struct bpf_iter_task_vma_kern *kit = (void *)it;
869
870         if (!kit->data) /* bpf_iter_task_vma_new failed */
871                 return NULL;
872         return vma_next(&kit->data->vmi);
873 }
874
875 __bpf_kfunc void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it)
876 {
877         struct bpf_iter_task_vma_kern *kit = (void *)it;
878
879         if (kit->data) {
880                 bpf_mmap_unlock_mm(kit->data->work, kit->data->mm);
881                 put_task_struct(kit->data->task);
882                 bpf_mem_free(&bpf_global_ma, kit->data);
883         }
884 }
885
886 __bpf_kfunc_end_defs();
887
888 #ifdef CONFIG_CGROUPS
889
890 struct bpf_iter_css_task {
891         __u64 __opaque[1];
892 } __attribute__((aligned(8)));
893
894 struct bpf_iter_css_task_kern {
895         struct css_task_iter *css_it;
896 } __attribute__((aligned(8)));
897
898 __bpf_kfunc_start_defs();
899
900 __bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
901                 struct cgroup_subsys_state *css, unsigned int flags)
902 {
903         struct bpf_iter_css_task_kern *kit = (void *)it;
904
905         BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern) != sizeof(struct bpf_iter_css_task));
906         BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern) !=
907                                         __alignof__(struct bpf_iter_css_task));
908         kit->css_it = NULL;
909         switch (flags) {
910         case CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED:
911         case CSS_TASK_ITER_PROCS:
912         case 0:
913                 break;
914         default:
915                 return -EINVAL;
916         }
917
918         kit->css_it = bpf_mem_alloc(&bpf_global_ma, sizeof(struct css_task_iter));
919         if (!kit->css_it)
920                 return -ENOMEM;
921         css_task_iter_start(css, flags, kit->css_it);
922         return 0;
923 }
924
925 __bpf_kfunc struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it)
926 {
927         struct bpf_iter_css_task_kern *kit = (void *)it;
928
929         if (!kit->css_it)
930                 return NULL;
931         return css_task_iter_next(kit->css_it);
932 }
933
934 __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it)
935 {
936         struct bpf_iter_css_task_kern *kit = (void *)it;
937
938         if (!kit->css_it)
939                 return;
940         css_task_iter_end(kit->css_it);
941         bpf_mem_free(&bpf_global_ma, kit->css_it);
942 }
943
944 __bpf_kfunc_end_defs();
945
946 #endif /* CONFIG_CGROUPS */
947
948 struct bpf_iter_task {
949         __u64 __opaque[3];
950 } __attribute__((aligned(8)));
951
952 struct bpf_iter_task_kern {
953         struct task_struct *task;
954         struct task_struct *pos;
955         unsigned int flags;
956 } __attribute__((aligned(8)));
957
958 enum {
959         /* all process in the system */
960         BPF_TASK_ITER_ALL_PROCS,
961         /* all threads in the system */
962         BPF_TASK_ITER_ALL_THREADS,
963         /* all threads of a specific process */
964         BPF_TASK_ITER_PROC_THREADS
965 };
966
967 __bpf_kfunc_start_defs();
968
969 __bpf_kfunc int bpf_iter_task_new(struct bpf_iter_task *it,
970                 struct task_struct *task__nullable, unsigned int flags)
971 {
972         struct bpf_iter_task_kern *kit = (void *)it;
973
974         BUILD_BUG_ON(sizeof(struct bpf_iter_task_kern) > sizeof(struct bpf_iter_task));
975         BUILD_BUG_ON(__alignof__(struct bpf_iter_task_kern) !=
976                                         __alignof__(struct bpf_iter_task));
977
978         kit->pos = NULL;
979
980         switch (flags) {
981         case BPF_TASK_ITER_ALL_THREADS:
982         case BPF_TASK_ITER_ALL_PROCS:
983                 break;
984         case BPF_TASK_ITER_PROC_THREADS:
985                 if (!task__nullable)
986                         return -EINVAL;
987                 break;
988         default:
989                 return -EINVAL;
990         }
991
992         if (flags == BPF_TASK_ITER_PROC_THREADS)
993                 kit->task = task__nullable;
994         else
995                 kit->task = &init_task;
996         kit->pos = kit->task;
997         kit->flags = flags;
998         return 0;
999 }
1000
1001 __bpf_kfunc struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it)
1002 {
1003         struct bpf_iter_task_kern *kit = (void *)it;
1004         struct task_struct *pos;
1005         unsigned int flags;
1006
1007         flags = kit->flags;
1008         pos = kit->pos;
1009
1010         if (!pos)
1011                 return pos;
1012
1013         if (flags == BPF_TASK_ITER_ALL_PROCS)
1014                 goto get_next_task;
1015
1016         kit->pos = __next_thread(kit->pos);
1017         if (kit->pos || flags == BPF_TASK_ITER_PROC_THREADS)
1018                 return pos;
1019
1020 get_next_task:
1021         kit->task = next_task(kit->task);
1022         if (kit->task == &init_task)
1023                 kit->pos = NULL;
1024         else
1025                 kit->pos = kit->task;
1026
1027         return pos;
1028 }
1029
1030 __bpf_kfunc void bpf_iter_task_destroy(struct bpf_iter_task *it)
1031 {
1032 }
1033
1034 __bpf_kfunc_end_defs();
1035
1036 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
1037
1038 static void do_mmap_read_unlock(struct irq_work *entry)
1039 {
1040         struct mmap_unlock_irq_work *work;
1041
1042         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
1043                 return;
1044
1045         work = container_of(entry, struct mmap_unlock_irq_work, irq_work);
1046         mmap_read_unlock_non_owner(work->mm);
1047 }
1048
1049 static int __init task_iter_init(void)
1050 {
1051         struct mmap_unlock_irq_work *work;
1052         int ret, cpu;
1053
1054         for_each_possible_cpu(cpu) {
1055                 work = per_cpu_ptr(&mmap_unlock_work, cpu);
1056                 init_irq_work(&work->irq_work, do_mmap_read_unlock);
1057         }
1058
1059         task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
1060         ret = bpf_iter_reg_target(&task_reg_info);
1061         if (ret)
1062                 return ret;
1063
1064         task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
1065         task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE];
1066         ret =  bpf_iter_reg_target(&task_file_reg_info);
1067         if (ret)
1068                 return ret;
1069
1070         task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
1071         task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
1072         return bpf_iter_reg_target(&task_vma_reg_info);
1073 }
1074 late_initcall(task_iter_init);
This page took 0.092666 seconds and 4 git commands to generate.