]>
Commit | Line | Data |
---|---|---|
b2197755 DB |
1 | /* |
2 | * Minimal file system backend for holding eBPF maps and programs, | |
3 | * used by bpf(2) object pinning. | |
4 | * | |
5 | * Authors: | |
6 | * | |
7 | * Daniel Borkmann <[email protected]> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * version 2 as published by the Free Software Foundation. | |
12 | */ | |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/magic.h> | |
16 | #include <linux/major.h> | |
17 | #include <linux/mount.h> | |
18 | #include <linux/namei.h> | |
19 | #include <linux/fs.h> | |
20 | #include <linux/kdev_t.h> | |
21 | #include <linux/filter.h> | |
22 | #include <linux/bpf.h> | |
23 | ||
24 | enum bpf_type { | |
25 | BPF_TYPE_UNSPEC = 0, | |
26 | BPF_TYPE_PROG, | |
27 | BPF_TYPE_MAP, | |
28 | }; | |
29 | ||
30 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
31 | { | |
32 | switch (type) { | |
33 | case BPF_TYPE_PROG: | |
34 | atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); | |
35 | break; | |
36 | case BPF_TYPE_MAP: | |
c9da161c | 37 | bpf_map_inc(raw, true); |
b2197755 DB |
38 | break; |
39 | default: | |
40 | WARN_ON_ONCE(1); | |
41 | break; | |
42 | } | |
43 | ||
44 | return raw; | |
45 | } | |
46 | ||
47 | static void bpf_any_put(void *raw, enum bpf_type type) | |
48 | { | |
49 | switch (type) { | |
50 | case BPF_TYPE_PROG: | |
51 | bpf_prog_put(raw); | |
52 | break; | |
53 | case BPF_TYPE_MAP: | |
c9da161c | 54 | bpf_map_put_with_uref(raw); |
b2197755 DB |
55 | break; |
56 | default: | |
57 | WARN_ON_ONCE(1); | |
58 | break; | |
59 | } | |
60 | } | |
61 | ||
62 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
63 | { | |
64 | void *raw; | |
65 | ||
66 | *type = BPF_TYPE_MAP; | |
c9da161c | 67 | raw = bpf_map_get_with_uref(ufd); |
b2197755 DB |
68 | if (IS_ERR(raw)) { |
69 | *type = BPF_TYPE_PROG; | |
70 | raw = bpf_prog_get(ufd); | |
71 | } | |
72 | ||
73 | return raw; | |
74 | } | |
75 | ||
76 | static const struct inode_operations bpf_dir_iops; | |
77 | ||
78 | static const struct inode_operations bpf_prog_iops = { }; | |
79 | static const struct inode_operations bpf_map_iops = { }; | |
80 | ||
81 | static struct inode *bpf_get_inode(struct super_block *sb, | |
82 | const struct inode *dir, | |
83 | umode_t mode) | |
84 | { | |
85 | struct inode *inode; | |
86 | ||
87 | switch (mode & S_IFMT) { | |
88 | case S_IFDIR: | |
89 | case S_IFREG: | |
90 | break; | |
91 | default: | |
92 | return ERR_PTR(-EINVAL); | |
93 | } | |
94 | ||
95 | inode = new_inode(sb); | |
96 | if (!inode) | |
97 | return ERR_PTR(-ENOSPC); | |
98 | ||
99 | inode->i_ino = get_next_ino(); | |
100 | inode->i_atime = CURRENT_TIME; | |
101 | inode->i_mtime = inode->i_atime; | |
102 | inode->i_ctime = inode->i_atime; | |
103 | ||
104 | inode_init_owner(inode, dir, mode); | |
105 | ||
106 | return inode; | |
107 | } | |
108 | ||
109 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
110 | { | |
111 | *type = BPF_TYPE_UNSPEC; | |
112 | if (inode->i_op == &bpf_prog_iops) | |
113 | *type = BPF_TYPE_PROG; | |
114 | else if (inode->i_op == &bpf_map_iops) | |
115 | *type = BPF_TYPE_MAP; | |
116 | else | |
117 | return -EACCES; | |
118 | ||
119 | return 0; | |
120 | } | |
121 | ||
122 | static bool bpf_dname_reserved(const struct dentry *dentry) | |
123 | { | |
124 | return strchr(dentry->d_name.name, '.'); | |
125 | } | |
126 | ||
127 | static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |
128 | { | |
129 | struct inode *inode; | |
130 | ||
131 | if (bpf_dname_reserved(dentry)) | |
132 | return -EPERM; | |
133 | ||
134 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); | |
135 | if (IS_ERR(inode)) | |
136 | return PTR_ERR(inode); | |
137 | ||
138 | inode->i_op = &bpf_dir_iops; | |
139 | inode->i_fop = &simple_dir_operations; | |
140 | ||
141 | inc_nlink(inode); | |
142 | inc_nlink(dir); | |
143 | ||
144 | d_instantiate(dentry, inode); | |
145 | dget(dentry); | |
146 | ||
147 | return 0; | |
148 | } | |
149 | ||
150 | static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, | |
151 | umode_t mode, const struct inode_operations *iops) | |
152 | { | |
153 | struct inode *inode; | |
154 | ||
155 | if (bpf_dname_reserved(dentry)) | |
156 | return -EPERM; | |
157 | ||
158 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); | |
159 | if (IS_ERR(inode)) | |
160 | return PTR_ERR(inode); | |
161 | ||
162 | inode->i_op = iops; | |
163 | inode->i_private = dentry->d_fsdata; | |
164 | ||
165 | d_instantiate(dentry, inode); | |
166 | dget(dentry); | |
167 | ||
168 | return 0; | |
169 | } | |
170 | ||
171 | static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, | |
172 | dev_t devt) | |
173 | { | |
174 | enum bpf_type type = MINOR(devt); | |
175 | ||
176 | if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || | |
177 | dentry->d_fsdata == NULL) | |
178 | return -EPERM; | |
179 | ||
180 | switch (type) { | |
181 | case BPF_TYPE_PROG: | |
182 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); | |
183 | case BPF_TYPE_MAP: | |
184 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); | |
185 | default: | |
186 | return -EPERM; | |
187 | } | |
188 | } | |
189 | ||
bb35a6ef DB |
190 | static int bpf_link(struct dentry *old_dentry, struct inode *dir, |
191 | struct dentry *new_dentry) | |
192 | { | |
193 | if (bpf_dname_reserved(new_dentry)) | |
194 | return -EPERM; | |
195 | ||
196 | return simple_link(old_dentry, dir, new_dentry); | |
197 | } | |
198 | ||
199 | static int bpf_rename(struct inode *old_dir, struct dentry *old_dentry, | |
200 | struct inode *new_dir, struct dentry *new_dentry) | |
201 | { | |
202 | if (bpf_dname_reserved(new_dentry)) | |
203 | return -EPERM; | |
204 | ||
205 | return simple_rename(old_dir, old_dentry, new_dir, new_dentry); | |
206 | } | |
207 | ||
b2197755 DB |
208 | static const struct inode_operations bpf_dir_iops = { |
209 | .lookup = simple_lookup, | |
210 | .mknod = bpf_mkobj, | |
211 | .mkdir = bpf_mkdir, | |
212 | .rmdir = simple_rmdir, | |
bb35a6ef DB |
213 | .rename = bpf_rename, |
214 | .link = bpf_link, | |
b2197755 DB |
215 | .unlink = simple_unlink, |
216 | }; | |
217 | ||
218 | static int bpf_obj_do_pin(const struct filename *pathname, void *raw, | |
219 | enum bpf_type type) | |
220 | { | |
221 | struct dentry *dentry; | |
222 | struct inode *dir; | |
223 | struct path path; | |
224 | umode_t mode; | |
225 | dev_t devt; | |
226 | int ret; | |
227 | ||
228 | dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); | |
229 | if (IS_ERR(dentry)) | |
230 | return PTR_ERR(dentry); | |
231 | ||
232 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); | |
233 | devt = MKDEV(UNNAMED_MAJOR, type); | |
234 | ||
235 | ret = security_path_mknod(&path, dentry, mode, devt); | |
236 | if (ret) | |
237 | goto out; | |
238 | ||
239 | dir = d_inode(path.dentry); | |
240 | if (dir->i_op != &bpf_dir_iops) { | |
241 | ret = -EPERM; | |
242 | goto out; | |
243 | } | |
244 | ||
245 | dentry->d_fsdata = raw; | |
246 | ret = vfs_mknod(dir, dentry, mode, devt); | |
247 | dentry->d_fsdata = NULL; | |
248 | out: | |
249 | done_path_create(&path, dentry); | |
250 | return ret; | |
251 | } | |
252 | ||
253 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |
254 | { | |
255 | struct filename *pname; | |
256 | enum bpf_type type; | |
257 | void *raw; | |
258 | int ret; | |
259 | ||
260 | pname = getname(pathname); | |
261 | if (IS_ERR(pname)) | |
262 | return PTR_ERR(pname); | |
263 | ||
264 | raw = bpf_fd_probe_obj(ufd, &type); | |
265 | if (IS_ERR(raw)) { | |
266 | ret = PTR_ERR(raw); | |
267 | goto out; | |
268 | } | |
269 | ||
270 | ret = bpf_obj_do_pin(pname, raw, type); | |
271 | if (ret != 0) | |
272 | bpf_any_put(raw, type); | |
273 | out: | |
274 | putname(pname); | |
275 | return ret; | |
276 | } | |
277 | ||
278 | static void *bpf_obj_do_get(const struct filename *pathname, | |
279 | enum bpf_type *type) | |
280 | { | |
281 | struct inode *inode; | |
282 | struct path path; | |
283 | void *raw; | |
284 | int ret; | |
285 | ||
286 | ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); | |
287 | if (ret) | |
288 | return ERR_PTR(ret); | |
289 | ||
290 | inode = d_backing_inode(path.dentry); | |
291 | ret = inode_permission(inode, MAY_WRITE); | |
292 | if (ret) | |
293 | goto out; | |
294 | ||
295 | ret = bpf_inode_type(inode, type); | |
296 | if (ret) | |
297 | goto out; | |
298 | ||
299 | raw = bpf_any_get(inode->i_private, *type); | |
300 | touch_atime(&path); | |
301 | ||
302 | path_put(&path); | |
303 | return raw; | |
304 | out: | |
305 | path_put(&path); | |
306 | return ERR_PTR(ret); | |
307 | } | |
308 | ||
309 | int bpf_obj_get_user(const char __user *pathname) | |
310 | { | |
311 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
312 | struct filename *pname; | |
313 | int ret = -ENOENT; | |
314 | void *raw; | |
315 | ||
316 | pname = getname(pathname); | |
317 | if (IS_ERR(pname)) | |
318 | return PTR_ERR(pname); | |
319 | ||
320 | raw = bpf_obj_do_get(pname, &type); | |
321 | if (IS_ERR(raw)) { | |
322 | ret = PTR_ERR(raw); | |
323 | goto out; | |
324 | } | |
325 | ||
326 | if (type == BPF_TYPE_PROG) | |
327 | ret = bpf_prog_new_fd(raw); | |
328 | else if (type == BPF_TYPE_MAP) | |
329 | ret = bpf_map_new_fd(raw); | |
330 | else | |
331 | goto out; | |
332 | ||
333 | if (ret < 0) | |
334 | bpf_any_put(raw, type); | |
335 | out: | |
336 | putname(pname); | |
337 | return ret; | |
338 | } | |
339 | ||
340 | static void bpf_evict_inode(struct inode *inode) | |
341 | { | |
342 | enum bpf_type type; | |
343 | ||
344 | truncate_inode_pages_final(&inode->i_data); | |
345 | clear_inode(inode); | |
346 | ||
347 | if (!bpf_inode_type(inode, &type)) | |
348 | bpf_any_put(inode->i_private, type); | |
349 | } | |
350 | ||
351 | static const struct super_operations bpf_super_ops = { | |
352 | .statfs = simple_statfs, | |
353 | .drop_inode = generic_delete_inode, | |
354 | .evict_inode = bpf_evict_inode, | |
355 | }; | |
356 | ||
357 | static int bpf_fill_super(struct super_block *sb, void *data, int silent) | |
358 | { | |
359 | static struct tree_descr bpf_rfiles[] = { { "" } }; | |
360 | struct inode *inode; | |
361 | int ret; | |
362 | ||
363 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | |
364 | if (ret) | |
365 | return ret; | |
366 | ||
367 | sb->s_op = &bpf_super_ops; | |
368 | ||
369 | inode = sb->s_root->d_inode; | |
370 | inode->i_op = &bpf_dir_iops; | |
371 | inode->i_mode &= ~S_IALLUGO; | |
372 | inode->i_mode |= S_ISVTX | S_IRWXUGO; | |
373 | ||
374 | return 0; | |
375 | } | |
376 | ||
377 | static struct dentry *bpf_mount(struct file_system_type *type, int flags, | |
378 | const char *dev_name, void *data) | |
379 | { | |
380 | return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); | |
381 | } | |
382 | ||
383 | static struct file_system_type bpf_fs_type = { | |
384 | .owner = THIS_MODULE, | |
385 | .name = "bpf", | |
386 | .mount = bpf_mount, | |
387 | .kill_sb = kill_litter_super, | |
388 | .fs_flags = FS_USERNS_MOUNT, | |
389 | }; | |
390 | ||
391 | MODULE_ALIAS_FS("bpf"); | |
392 | ||
393 | static int __init bpf_init(void) | |
394 | { | |
395 | int ret; | |
396 | ||
397 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
398 | if (ret) | |
399 | return ret; | |
400 | ||
401 | ret = register_filesystem(&bpf_fs_type); | |
402 | if (ret) | |
403 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
404 | ||
405 | return ret; | |
406 | } | |
407 | fs_initcall(bpf_init); |