]>
Commit | Line | Data |
---|---|---|
b2197755 DB |
1 | /* |
2 | * Minimal file system backend for holding eBPF maps and programs, | |
3 | * used by bpf(2) object pinning. | |
4 | * | |
5 | * Authors: | |
6 | * | |
7 | * Daniel Borkmann <[email protected]> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * version 2 as published by the Free Software Foundation. | |
12 | */ | |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/magic.h> | |
16 | #include <linux/major.h> | |
17 | #include <linux/mount.h> | |
18 | #include <linux/namei.h> | |
19 | #include <linux/fs.h> | |
20 | #include <linux/kdev_t.h> | |
21 | #include <linux/filter.h> | |
22 | #include <linux/bpf.h> | |
23 | ||
24 | enum bpf_type { | |
25 | BPF_TYPE_UNSPEC = 0, | |
26 | BPF_TYPE_PROG, | |
27 | BPF_TYPE_MAP, | |
28 | }; | |
29 | ||
30 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
31 | { | |
32 | switch (type) { | |
33 | case BPF_TYPE_PROG: | |
34 | atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); | |
35 | break; | |
36 | case BPF_TYPE_MAP: | |
37 | atomic_inc(&((struct bpf_map *)raw)->refcnt); | |
38 | break; | |
39 | default: | |
40 | WARN_ON_ONCE(1); | |
41 | break; | |
42 | } | |
43 | ||
44 | return raw; | |
45 | } | |
46 | ||
47 | static void bpf_any_put(void *raw, enum bpf_type type) | |
48 | { | |
49 | switch (type) { | |
50 | case BPF_TYPE_PROG: | |
51 | bpf_prog_put(raw); | |
52 | break; | |
53 | case BPF_TYPE_MAP: | |
54 | bpf_map_put(raw); | |
55 | break; | |
56 | default: | |
57 | WARN_ON_ONCE(1); | |
58 | break; | |
59 | } | |
60 | } | |
61 | ||
62 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
63 | { | |
64 | void *raw; | |
65 | ||
66 | *type = BPF_TYPE_MAP; | |
67 | raw = bpf_map_get(ufd); | |
68 | if (IS_ERR(raw)) { | |
69 | *type = BPF_TYPE_PROG; | |
70 | raw = bpf_prog_get(ufd); | |
71 | } | |
72 | ||
73 | return raw; | |
74 | } | |
75 | ||
76 | static const struct inode_operations bpf_dir_iops; | |
77 | ||
78 | static const struct inode_operations bpf_prog_iops = { }; | |
79 | static const struct inode_operations bpf_map_iops = { }; | |
80 | ||
81 | static struct inode *bpf_get_inode(struct super_block *sb, | |
82 | const struct inode *dir, | |
83 | umode_t mode) | |
84 | { | |
85 | struct inode *inode; | |
86 | ||
87 | switch (mode & S_IFMT) { | |
88 | case S_IFDIR: | |
89 | case S_IFREG: | |
90 | break; | |
91 | default: | |
92 | return ERR_PTR(-EINVAL); | |
93 | } | |
94 | ||
95 | inode = new_inode(sb); | |
96 | if (!inode) | |
97 | return ERR_PTR(-ENOSPC); | |
98 | ||
99 | inode->i_ino = get_next_ino(); | |
100 | inode->i_atime = CURRENT_TIME; | |
101 | inode->i_mtime = inode->i_atime; | |
102 | inode->i_ctime = inode->i_atime; | |
103 | ||
104 | inode_init_owner(inode, dir, mode); | |
105 | ||
106 | return inode; | |
107 | } | |
108 | ||
109 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
110 | { | |
111 | *type = BPF_TYPE_UNSPEC; | |
112 | if (inode->i_op == &bpf_prog_iops) | |
113 | *type = BPF_TYPE_PROG; | |
114 | else if (inode->i_op == &bpf_map_iops) | |
115 | *type = BPF_TYPE_MAP; | |
116 | else | |
117 | return -EACCES; | |
118 | ||
119 | return 0; | |
120 | } | |
121 | ||
122 | static bool bpf_dname_reserved(const struct dentry *dentry) | |
123 | { | |
124 | return strchr(dentry->d_name.name, '.'); | |
125 | } | |
126 | ||
127 | static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |
128 | { | |
129 | struct inode *inode; | |
130 | ||
131 | if (bpf_dname_reserved(dentry)) | |
132 | return -EPERM; | |
133 | ||
134 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); | |
135 | if (IS_ERR(inode)) | |
136 | return PTR_ERR(inode); | |
137 | ||
138 | inode->i_op = &bpf_dir_iops; | |
139 | inode->i_fop = &simple_dir_operations; | |
140 | ||
141 | inc_nlink(inode); | |
142 | inc_nlink(dir); | |
143 | ||
144 | d_instantiate(dentry, inode); | |
145 | dget(dentry); | |
146 | ||
147 | return 0; | |
148 | } | |
149 | ||
150 | static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, | |
151 | umode_t mode, const struct inode_operations *iops) | |
152 | { | |
153 | struct inode *inode; | |
154 | ||
155 | if (bpf_dname_reserved(dentry)) | |
156 | return -EPERM; | |
157 | ||
158 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); | |
159 | if (IS_ERR(inode)) | |
160 | return PTR_ERR(inode); | |
161 | ||
162 | inode->i_op = iops; | |
163 | inode->i_private = dentry->d_fsdata; | |
164 | ||
165 | d_instantiate(dentry, inode); | |
166 | dget(dentry); | |
167 | ||
168 | return 0; | |
169 | } | |
170 | ||
171 | static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, | |
172 | dev_t devt) | |
173 | { | |
174 | enum bpf_type type = MINOR(devt); | |
175 | ||
176 | if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || | |
177 | dentry->d_fsdata == NULL) | |
178 | return -EPERM; | |
179 | ||
180 | switch (type) { | |
181 | case BPF_TYPE_PROG: | |
182 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); | |
183 | case BPF_TYPE_MAP: | |
184 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); | |
185 | default: | |
186 | return -EPERM; | |
187 | } | |
188 | } | |
189 | ||
190 | static const struct inode_operations bpf_dir_iops = { | |
191 | .lookup = simple_lookup, | |
192 | .mknod = bpf_mkobj, | |
193 | .mkdir = bpf_mkdir, | |
194 | .rmdir = simple_rmdir, | |
195 | .unlink = simple_unlink, | |
196 | }; | |
197 | ||
198 | static int bpf_obj_do_pin(const struct filename *pathname, void *raw, | |
199 | enum bpf_type type) | |
200 | { | |
201 | struct dentry *dentry; | |
202 | struct inode *dir; | |
203 | struct path path; | |
204 | umode_t mode; | |
205 | dev_t devt; | |
206 | int ret; | |
207 | ||
208 | dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); | |
209 | if (IS_ERR(dentry)) | |
210 | return PTR_ERR(dentry); | |
211 | ||
212 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); | |
213 | devt = MKDEV(UNNAMED_MAJOR, type); | |
214 | ||
215 | ret = security_path_mknod(&path, dentry, mode, devt); | |
216 | if (ret) | |
217 | goto out; | |
218 | ||
219 | dir = d_inode(path.dentry); | |
220 | if (dir->i_op != &bpf_dir_iops) { | |
221 | ret = -EPERM; | |
222 | goto out; | |
223 | } | |
224 | ||
225 | dentry->d_fsdata = raw; | |
226 | ret = vfs_mknod(dir, dentry, mode, devt); | |
227 | dentry->d_fsdata = NULL; | |
228 | out: | |
229 | done_path_create(&path, dentry); | |
230 | return ret; | |
231 | } | |
232 | ||
233 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |
234 | { | |
235 | struct filename *pname; | |
236 | enum bpf_type type; | |
237 | void *raw; | |
238 | int ret; | |
239 | ||
240 | pname = getname(pathname); | |
241 | if (IS_ERR(pname)) | |
242 | return PTR_ERR(pname); | |
243 | ||
244 | raw = bpf_fd_probe_obj(ufd, &type); | |
245 | if (IS_ERR(raw)) { | |
246 | ret = PTR_ERR(raw); | |
247 | goto out; | |
248 | } | |
249 | ||
250 | ret = bpf_obj_do_pin(pname, raw, type); | |
251 | if (ret != 0) | |
252 | bpf_any_put(raw, type); | |
253 | out: | |
254 | putname(pname); | |
255 | return ret; | |
256 | } | |
257 | ||
258 | static void *bpf_obj_do_get(const struct filename *pathname, | |
259 | enum bpf_type *type) | |
260 | { | |
261 | struct inode *inode; | |
262 | struct path path; | |
263 | void *raw; | |
264 | int ret; | |
265 | ||
266 | ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); | |
267 | if (ret) | |
268 | return ERR_PTR(ret); | |
269 | ||
270 | inode = d_backing_inode(path.dentry); | |
271 | ret = inode_permission(inode, MAY_WRITE); | |
272 | if (ret) | |
273 | goto out; | |
274 | ||
275 | ret = bpf_inode_type(inode, type); | |
276 | if (ret) | |
277 | goto out; | |
278 | ||
279 | raw = bpf_any_get(inode->i_private, *type); | |
280 | touch_atime(&path); | |
281 | ||
282 | path_put(&path); | |
283 | return raw; | |
284 | out: | |
285 | path_put(&path); | |
286 | return ERR_PTR(ret); | |
287 | } | |
288 | ||
289 | int bpf_obj_get_user(const char __user *pathname) | |
290 | { | |
291 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
292 | struct filename *pname; | |
293 | int ret = -ENOENT; | |
294 | void *raw; | |
295 | ||
296 | pname = getname(pathname); | |
297 | if (IS_ERR(pname)) | |
298 | return PTR_ERR(pname); | |
299 | ||
300 | raw = bpf_obj_do_get(pname, &type); | |
301 | if (IS_ERR(raw)) { | |
302 | ret = PTR_ERR(raw); | |
303 | goto out; | |
304 | } | |
305 | ||
306 | if (type == BPF_TYPE_PROG) | |
307 | ret = bpf_prog_new_fd(raw); | |
308 | else if (type == BPF_TYPE_MAP) | |
309 | ret = bpf_map_new_fd(raw); | |
310 | else | |
311 | goto out; | |
312 | ||
313 | if (ret < 0) | |
314 | bpf_any_put(raw, type); | |
315 | out: | |
316 | putname(pname); | |
317 | return ret; | |
318 | } | |
319 | ||
320 | static void bpf_evict_inode(struct inode *inode) | |
321 | { | |
322 | enum bpf_type type; | |
323 | ||
324 | truncate_inode_pages_final(&inode->i_data); | |
325 | clear_inode(inode); | |
326 | ||
327 | if (!bpf_inode_type(inode, &type)) | |
328 | bpf_any_put(inode->i_private, type); | |
329 | } | |
330 | ||
331 | static const struct super_operations bpf_super_ops = { | |
332 | .statfs = simple_statfs, | |
333 | .drop_inode = generic_delete_inode, | |
334 | .evict_inode = bpf_evict_inode, | |
335 | }; | |
336 | ||
337 | static int bpf_fill_super(struct super_block *sb, void *data, int silent) | |
338 | { | |
339 | static struct tree_descr bpf_rfiles[] = { { "" } }; | |
340 | struct inode *inode; | |
341 | int ret; | |
342 | ||
343 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); | |
344 | if (ret) | |
345 | return ret; | |
346 | ||
347 | sb->s_op = &bpf_super_ops; | |
348 | ||
349 | inode = sb->s_root->d_inode; | |
350 | inode->i_op = &bpf_dir_iops; | |
351 | inode->i_mode &= ~S_IALLUGO; | |
352 | inode->i_mode |= S_ISVTX | S_IRWXUGO; | |
353 | ||
354 | return 0; | |
355 | } | |
356 | ||
357 | static struct dentry *bpf_mount(struct file_system_type *type, int flags, | |
358 | const char *dev_name, void *data) | |
359 | { | |
360 | return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); | |
361 | } | |
362 | ||
363 | static struct file_system_type bpf_fs_type = { | |
364 | .owner = THIS_MODULE, | |
365 | .name = "bpf", | |
366 | .mount = bpf_mount, | |
367 | .kill_sb = kill_litter_super, | |
368 | .fs_flags = FS_USERNS_MOUNT, | |
369 | }; | |
370 | ||
371 | MODULE_ALIAS_FS("bpf"); | |
372 | ||
373 | static int __init bpf_init(void) | |
374 | { | |
375 | int ret; | |
376 | ||
377 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
378 | if (ret) | |
379 | return ret; | |
380 | ||
381 | ret = register_filesystem(&bpf_fs_type); | |
382 | if (ret) | |
383 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
384 | ||
385 | return ret; | |
386 | } | |
387 | fs_initcall(bpf_init); |