]>
Commit | Line | Data |
---|---|---|
b2197755 DB |
1 | /* |
2 | * Minimal file system backend for holding eBPF maps and programs, | |
3 | * used by bpf(2) object pinning. | |
4 | * | |
5 | * Authors: | |
6 | * | |
7 | * Daniel Borkmann <[email protected]> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * version 2 as published by the Free Software Foundation. | |
12 | */ | |
13 | ||
a536a6e1 | 14 | #include <linux/init.h> |
b2197755 DB |
15 | #include <linux/magic.h> |
16 | #include <linux/major.h> | |
17 | #include <linux/mount.h> | |
18 | #include <linux/namei.h> | |
19 | #include <linux/fs.h> | |
20 | #include <linux/kdev_t.h> | |
a3af5f80 | 21 | #include <linux/parser.h> |
b2197755 DB |
22 | #include <linux/filter.h> |
23 | #include <linux/bpf.h> | |
a67edbf4 | 24 | #include <linux/bpf_trace.h> |
b2197755 DB |
25 | |
26 | enum bpf_type { | |
27 | BPF_TYPE_UNSPEC = 0, | |
28 | BPF_TYPE_PROG, | |
29 | BPF_TYPE_MAP, | |
30 | }; | |
31 | ||
32 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
33 | { | |
34 | switch (type) { | |
35 | case BPF_TYPE_PROG: | |
92117d84 | 36 | raw = bpf_prog_inc(raw); |
b2197755 DB |
37 | break; |
38 | case BPF_TYPE_MAP: | |
92117d84 | 39 | raw = bpf_map_inc(raw, true); |
b2197755 DB |
40 | break; |
41 | default: | |
42 | WARN_ON_ONCE(1); | |
43 | break; | |
44 | } | |
45 | ||
46 | return raw; | |
47 | } | |
48 | ||
49 | static void bpf_any_put(void *raw, enum bpf_type type) | |
50 | { | |
51 | switch (type) { | |
52 | case BPF_TYPE_PROG: | |
53 | bpf_prog_put(raw); | |
54 | break; | |
55 | case BPF_TYPE_MAP: | |
c9da161c | 56 | bpf_map_put_with_uref(raw); |
b2197755 DB |
57 | break; |
58 | default: | |
59 | WARN_ON_ONCE(1); | |
60 | break; | |
61 | } | |
62 | } | |
63 | ||
64 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
65 | { | |
66 | void *raw; | |
67 | ||
68 | *type = BPF_TYPE_MAP; | |
c9da161c | 69 | raw = bpf_map_get_with_uref(ufd); |
b2197755 DB |
70 | if (IS_ERR(raw)) { |
71 | *type = BPF_TYPE_PROG; | |
72 | raw = bpf_prog_get(ufd); | |
73 | } | |
74 | ||
75 | return raw; | |
76 | } | |
77 | ||
78 | static const struct inode_operations bpf_dir_iops; | |
79 | ||
80 | static const struct inode_operations bpf_prog_iops = { }; | |
81 | static const struct inode_operations bpf_map_iops = { }; | |
82 | ||
83 | static struct inode *bpf_get_inode(struct super_block *sb, | |
84 | const struct inode *dir, | |
85 | umode_t mode) | |
86 | { | |
87 | struct inode *inode; | |
88 | ||
89 | switch (mode & S_IFMT) { | |
90 | case S_IFDIR: | |
91 | case S_IFREG: | |
0f98621b | 92 | case S_IFLNK: |
b2197755 DB |
93 | break; |
94 | default: | |
95 | return ERR_PTR(-EINVAL); | |
96 | } | |
97 | ||
98 | inode = new_inode(sb); | |
99 | if (!inode) | |
100 | return ERR_PTR(-ENOSPC); | |
101 | ||
102 | inode->i_ino = get_next_ino(); | |
078cd827 | 103 | inode->i_atime = current_time(inode); |
b2197755 DB |
104 | inode->i_mtime = inode->i_atime; |
105 | inode->i_ctime = inode->i_atime; | |
106 | ||
107 | inode_init_owner(inode, dir, mode); | |
108 | ||
109 | return inode; | |
110 | } | |
111 | ||
112 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
113 | { | |
114 | *type = BPF_TYPE_UNSPEC; | |
115 | if (inode->i_op == &bpf_prog_iops) | |
116 | *type = BPF_TYPE_PROG; | |
117 | else if (inode->i_op == &bpf_map_iops) | |
118 | *type = BPF_TYPE_MAP; | |
119 | else | |
120 | return -EACCES; | |
121 | ||
122 | return 0; | |
123 | } | |
124 | ||
0f98621b DB |
125 | static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, |
126 | struct inode *dir) | |
127 | { | |
128 | d_instantiate(dentry, inode); | |
129 | dget(dentry); | |
130 | ||
131 | dir->i_mtime = current_time(dir); | |
132 | dir->i_ctime = dir->i_mtime; | |
133 | } | |
134 | ||
b2197755 DB |
135 | static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
136 | { | |
137 | struct inode *inode; | |
138 | ||
b2197755 DB |
139 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); |
140 | if (IS_ERR(inode)) | |
141 | return PTR_ERR(inode); | |
142 | ||
143 | inode->i_op = &bpf_dir_iops; | |
144 | inode->i_fop = &simple_dir_operations; | |
145 | ||
146 | inc_nlink(inode); | |
147 | inc_nlink(dir); | |
148 | ||
0f98621b | 149 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
150 | return 0; |
151 | } | |
152 | ||
153 | static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, | |
154 | umode_t mode, const struct inode_operations *iops) | |
155 | { | |
156 | struct inode *inode; | |
157 | ||
b2197755 DB |
158 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); |
159 | if (IS_ERR(inode)) | |
160 | return PTR_ERR(inode); | |
161 | ||
162 | inode->i_op = iops; | |
163 | inode->i_private = dentry->d_fsdata; | |
164 | ||
0f98621b | 165 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
166 | return 0; |
167 | } | |
168 | ||
169 | static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, | |
170 | dev_t devt) | |
171 | { | |
172 | enum bpf_type type = MINOR(devt); | |
173 | ||
174 | if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || | |
175 | dentry->d_fsdata == NULL) | |
176 | return -EPERM; | |
177 | ||
178 | switch (type) { | |
179 | case BPF_TYPE_PROG: | |
180 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); | |
181 | case BPF_TYPE_MAP: | |
182 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); | |
183 | default: | |
184 | return -EPERM; | |
185 | } | |
186 | } | |
187 | ||
0c93b7d8 AV |
188 | static struct dentry * |
189 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | |
bb35a6ef | 190 | { |
0c93b7d8 AV |
191 | if (strchr(dentry->d_name.name, '.')) |
192 | return ERR_PTR(-EPERM); | |
0f98621b | 193 | |
0c93b7d8 | 194 | return simple_lookup(dir, dentry, flags); |
bb35a6ef DB |
195 | } |
196 | ||
0f98621b DB |
197 | static int bpf_symlink(struct inode *dir, struct dentry *dentry, |
198 | const char *target) | |
199 | { | |
200 | char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); | |
201 | struct inode *inode; | |
202 | ||
203 | if (!link) | |
204 | return -ENOMEM; | |
205 | ||
206 | inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); | |
207 | if (IS_ERR(inode)) { | |
208 | kfree(link); | |
209 | return PTR_ERR(inode); | |
210 | } | |
211 | ||
212 | inode->i_op = &simple_symlink_inode_operations; | |
213 | inode->i_link = link; | |
214 | ||
215 | bpf_dentry_finalize(dentry, inode, dir); | |
216 | return 0; | |
217 | } | |
218 | ||
b2197755 | 219 | static const struct inode_operations bpf_dir_iops = { |
0c93b7d8 | 220 | .lookup = bpf_lookup, |
b2197755 DB |
221 | .mknod = bpf_mkobj, |
222 | .mkdir = bpf_mkdir, | |
0f98621b | 223 | .symlink = bpf_symlink, |
b2197755 | 224 | .rmdir = simple_rmdir, |
0c93b7d8 AV |
225 | .rename = simple_rename, |
226 | .link = simple_link, | |
b2197755 DB |
227 | .unlink = simple_unlink, |
228 | }; | |
229 | ||
230 | static int bpf_obj_do_pin(const struct filename *pathname, void *raw, | |
231 | enum bpf_type type) | |
232 | { | |
233 | struct dentry *dentry; | |
234 | struct inode *dir; | |
235 | struct path path; | |
236 | umode_t mode; | |
237 | dev_t devt; | |
238 | int ret; | |
239 | ||
240 | dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); | |
241 | if (IS_ERR(dentry)) | |
242 | return PTR_ERR(dentry); | |
243 | ||
244 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); | |
245 | devt = MKDEV(UNNAMED_MAJOR, type); | |
246 | ||
247 | ret = security_path_mknod(&path, dentry, mode, devt); | |
248 | if (ret) | |
249 | goto out; | |
250 | ||
251 | dir = d_inode(path.dentry); | |
252 | if (dir->i_op != &bpf_dir_iops) { | |
253 | ret = -EPERM; | |
254 | goto out; | |
255 | } | |
256 | ||
257 | dentry->d_fsdata = raw; | |
258 | ret = vfs_mknod(dir, dentry, mode, devt); | |
259 | dentry->d_fsdata = NULL; | |
260 | out: | |
261 | done_path_create(&path, dentry); | |
262 | return ret; | |
263 | } | |
264 | ||
265 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |
266 | { | |
267 | struct filename *pname; | |
268 | enum bpf_type type; | |
269 | void *raw; | |
270 | int ret; | |
271 | ||
272 | pname = getname(pathname); | |
273 | if (IS_ERR(pname)) | |
274 | return PTR_ERR(pname); | |
275 | ||
276 | raw = bpf_fd_probe_obj(ufd, &type); | |
277 | if (IS_ERR(raw)) { | |
278 | ret = PTR_ERR(raw); | |
279 | goto out; | |
280 | } | |
281 | ||
282 | ret = bpf_obj_do_pin(pname, raw, type); | |
283 | if (ret != 0) | |
284 | bpf_any_put(raw, type); | |
a67edbf4 DB |
285 | if ((trace_bpf_obj_pin_prog_enabled() || |
286 | trace_bpf_obj_pin_map_enabled()) && !ret) { | |
287 | if (type == BPF_TYPE_PROG) | |
288 | trace_bpf_obj_pin_prog(raw, ufd, pname); | |
289 | if (type == BPF_TYPE_MAP) | |
290 | trace_bpf_obj_pin_map(raw, ufd, pname); | |
291 | } | |
b2197755 DB |
292 | out: |
293 | putname(pname); | |
294 | return ret; | |
295 | } | |
296 | ||
297 | static void *bpf_obj_do_get(const struct filename *pathname, | |
298 | enum bpf_type *type) | |
299 | { | |
300 | struct inode *inode; | |
301 | struct path path; | |
302 | void *raw; | |
303 | int ret; | |
304 | ||
305 | ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); | |
306 | if (ret) | |
307 | return ERR_PTR(ret); | |
308 | ||
309 | inode = d_backing_inode(path.dentry); | |
310 | ret = inode_permission(inode, MAY_WRITE); | |
311 | if (ret) | |
312 | goto out; | |
313 | ||
314 | ret = bpf_inode_type(inode, type); | |
315 | if (ret) | |
316 | goto out; | |
317 | ||
318 | raw = bpf_any_get(inode->i_private, *type); | |
92117d84 AS |
319 | if (!IS_ERR(raw)) |
320 | touch_atime(&path); | |
b2197755 DB |
321 | |
322 | path_put(&path); | |
323 | return raw; | |
324 | out: | |
325 | path_put(&path); | |
326 | return ERR_PTR(ret); | |
327 | } | |
328 | ||
329 | int bpf_obj_get_user(const char __user *pathname) | |
330 | { | |
331 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
332 | struct filename *pname; | |
333 | int ret = -ENOENT; | |
334 | void *raw; | |
335 | ||
336 | pname = getname(pathname); | |
337 | if (IS_ERR(pname)) | |
338 | return PTR_ERR(pname); | |
339 | ||
340 | raw = bpf_obj_do_get(pname, &type); | |
341 | if (IS_ERR(raw)) { | |
342 | ret = PTR_ERR(raw); | |
343 | goto out; | |
344 | } | |
345 | ||
346 | if (type == BPF_TYPE_PROG) | |
347 | ret = bpf_prog_new_fd(raw); | |
348 | else if (type == BPF_TYPE_MAP) | |
349 | ret = bpf_map_new_fd(raw); | |
350 | else | |
351 | goto out; | |
352 | ||
a67edbf4 | 353 | if (ret < 0) { |
b2197755 | 354 | bpf_any_put(raw, type); |
a67edbf4 DB |
355 | } else if (trace_bpf_obj_get_prog_enabled() || |
356 | trace_bpf_obj_get_map_enabled()) { | |
357 | if (type == BPF_TYPE_PROG) | |
358 | trace_bpf_obj_get_prog(raw, ret, pname); | |
359 | if (type == BPF_TYPE_MAP) | |
360 | trace_bpf_obj_get_map(raw, ret, pname); | |
361 | } | |
b2197755 DB |
362 | out: |
363 | putname(pname); | |
364 | return ret; | |
365 | } | |
366 | ||
367 | static void bpf_evict_inode(struct inode *inode) | |
368 | { | |
369 | enum bpf_type type; | |
370 | ||
371 | truncate_inode_pages_final(&inode->i_data); | |
372 | clear_inode(inode); | |
373 | ||
0f98621b DB |
374 | if (S_ISLNK(inode->i_mode)) |
375 | kfree(inode->i_link); | |
b2197755 DB |
376 | if (!bpf_inode_type(inode, &type)) |
377 | bpf_any_put(inode->i_private, type); | |
378 | } | |
379 | ||
4cc7c186 DH |
380 | /* |
381 | * Display the mount options in /proc/mounts. | |
382 | */ | |
383 | static int bpf_show_options(struct seq_file *m, struct dentry *root) | |
384 | { | |
385 | umode_t mode = d_inode(root)->i_mode & S_IALLUGO & ~S_ISVTX; | |
386 | ||
387 | if (mode != S_IRWXUGO) | |
388 | seq_printf(m, ",mode=%o", mode); | |
389 | return 0; | |
390 | } | |
391 | ||
b2197755 DB |
392 | static const struct super_operations bpf_super_ops = { |
393 | .statfs = simple_statfs, | |
394 | .drop_inode = generic_delete_inode, | |
4cc7c186 | 395 | .show_options = bpf_show_options, |
b2197755 DB |
396 | .evict_inode = bpf_evict_inode, |
397 | }; | |
398 | ||
a3af5f80 DB |
399 | enum { |
400 | OPT_MODE, | |
401 | OPT_ERR, | |
402 | }; | |
403 | ||
404 | static const match_table_t bpf_mount_tokens = { | |
405 | { OPT_MODE, "mode=%o" }, | |
406 | { OPT_ERR, NULL }, | |
407 | }; | |
408 | ||
409 | struct bpf_mount_opts { | |
410 | umode_t mode; | |
411 | }; | |
412 | ||
413 | static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) | |
414 | { | |
415 | substring_t args[MAX_OPT_ARGS]; | |
416 | int option, token; | |
417 | char *ptr; | |
418 | ||
419 | opts->mode = S_IRWXUGO; | |
420 | ||
421 | while ((ptr = strsep(&data, ",")) != NULL) { | |
422 | if (!*ptr) | |
423 | continue; | |
424 | ||
425 | token = match_token(ptr, bpf_mount_tokens, args); | |
426 | switch (token) { | |
427 | case OPT_MODE: | |
428 | if (match_octal(&args[0], &option)) | |
429 | return -EINVAL; | |
430 | opts->mode = option & S_IALLUGO; | |
431 | break; | |
432 | /* We might like to report bad mount options here, but | |
433 | * traditionally we've ignored all mount options, so we'd | |
434 | * better continue to ignore non-existing options for bpf. | |
435 | */ | |
436 | } | |
437 | } | |
438 | ||
439 | return 0; | |
440 | } | |
441 | ||
b2197755 DB |
442 | static int bpf_fill_super(struct super_block *sb, void *data, int silent) |
443 | { | |
cda37124 | 444 | static const struct tree_descr bpf_rfiles[] = { { "" } }; |
a3af5f80 | 445 | struct bpf_mount_opts opts; |
b2197755 DB |
446 | struct inode *inode; |
447 | int ret; | |
448 | ||
a3af5f80 DB |
449 | ret = bpf_parse_options(data, &opts); |
450 | if (ret) | |
451 | return ret; | |
452 | ||
b2197755 DB |
453 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); |
454 | if (ret) | |
455 | return ret; | |
456 | ||
457 | sb->s_op = &bpf_super_ops; | |
458 | ||
459 | inode = sb->s_root->d_inode; | |
460 | inode->i_op = &bpf_dir_iops; | |
461 | inode->i_mode &= ~S_IALLUGO; | |
a3af5f80 | 462 | inode->i_mode |= S_ISVTX | opts.mode; |
b2197755 DB |
463 | |
464 | return 0; | |
465 | } | |
466 | ||
467 | static struct dentry *bpf_mount(struct file_system_type *type, int flags, | |
468 | const char *dev_name, void *data) | |
469 | { | |
e27f4a94 | 470 | return mount_nodev(type, flags, data, bpf_fill_super); |
b2197755 DB |
471 | } |
472 | ||
473 | static struct file_system_type bpf_fs_type = { | |
474 | .owner = THIS_MODULE, | |
475 | .name = "bpf", | |
476 | .mount = bpf_mount, | |
477 | .kill_sb = kill_litter_super, | |
b2197755 DB |
478 | }; |
479 | ||
b2197755 DB |
480 | static int __init bpf_init(void) |
481 | { | |
482 | int ret; | |
483 | ||
484 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
485 | if (ret) | |
486 | return ret; | |
487 | ||
488 | ret = register_filesystem(&bpf_fs_type); | |
489 | if (ret) | |
490 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
491 | ||
492 | return ret; | |
493 | } | |
494 | fs_initcall(bpf_init); |