btrfs: tracepoints: end assignment with semicolon at btrfs_qgroup_extent event class
[linux.git] / fs / binfmt_misc.c
CommitLineData
09c434b8 1// SPDX-License-Identifier: GPL-2.0-only
1da177e4 2/*
e6084d4a 3 * binfmt_misc.c
1da177e4 4 *
e6084d4a 5 * Copyright (C) 1997 Richard Günther
1da177e4 6 *
e6084d4a 7 * binfmt_misc detects binaries via a magic or filename extension and invokes
34962fb8 8 * a specified wrapper. See Documentation/admin-guide/binfmt-misc.rst for more details.
1da177e4
LT
9 */
10
6b899c4e
MF
11#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
12
6ceafb88 13#include <linux/kernel.h>
1da177e4
LT
14#include <linux/module.h>
15#include <linux/init.h>
589ee628 16#include <linux/sched/mm.h>
b502bd11 17#include <linux/magic.h>
1da177e4
LT
18#include <linux/binfmts.h>
19#include <linux/slab.h>
20#include <linux/ctype.h>
8d82e180 21#include <linux/string_helpers.h>
1da177e4
LT
22#include <linux/file.h>
23#include <linux/pagemap.h>
24#include <linux/namei.h>
25#include <linux/mount.h>
bc99a664 26#include <linux/fs_context.h>
1da177e4 27#include <linux/syscalls.h>
6e2c10a1 28#include <linux/fs.h>
6b899c4e 29#include <linux/uaccess.h>
1da177e4 30
948b701a
JB
31#include "internal.h"
32
6b899c4e
MF
33#ifdef DEBUG
34# define USE_DEBUG 1
35#else
36# define USE_DEBUG 0
37#endif
1da177e4
LT
38
39enum {
40 VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
41};
42
1da177e4 43enum {Enabled, Magic};
6a46bf55
LS
44#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31)
45#define MISC_FMT_OPEN_BINARY (1UL << 30)
46#define MISC_FMT_CREDENTIALS (1UL << 29)
47#define MISC_FMT_OPEN_FILE (1UL << 28)
1da177e4
LT
48
49typedef struct {
50 struct list_head list;
51 unsigned long flags; /* type, status, etc. */
52 int offset; /* offset of magic */
53 int size; /* size of magic/mask */
54 char *magic; /* magic or filename extension */
55 char *mask; /* mask, NULL for exact match */
50097f74 56 const char *interpreter; /* filename of interpreter */
1da177e4
LT
57 char *name;
58 struct dentry *dentry;
948b701a 59 struct file *interp_file;
1c5976ef 60 refcount_t users; /* sync removal with load_misc_binary() */
1da177e4
LT
61} Node;
62
1f5ce9e9 63static struct file_system_type bm_fs_type;
1da177e4 64
bbaecc08
MF
65/*
66 * Max length of the register string. Determined by:
67 * - 7 delimiters
68 * - name: ~50 bytes
69 * - type: 1 byte
70 * - offset: 3 bytes (has to be smaller than BINPRM_BUF_SIZE)
71 * - magic: 128 bytes (512 in escaped form)
72 * - mask: 128 bytes (512 in escaped form)
73 * - interp: ~50 bytes
74 * - flags: 5 bytes
75 * Round that up a bit, and then back off to hold the internal data
76 * (like struct Node).
77 */
78#define MAX_REGISTER_LENGTH 1920
79
1c5976ef
CB
80/**
81 * search_binfmt_handler - search for a binary handler for @bprm
82 * @misc: handle to binfmt_misc instance
83 * @bprm: binary for which we are looking for a handler
84 *
85 * Search for a binary type handler for @bprm in the list of registered binary
86 * type handlers.
87 *
88 * Return: binary type list entry on success, NULL on failure
1da177e4 89 */
21ca59b3
CB
90static Node *search_binfmt_handler(struct binfmt_misc *misc,
91 struct linux_binprm *bprm)
1da177e4
LT
92{
93 char *p = strrchr(bprm->interp, '.');
1c5976ef 94 Node *e;
1da177e4 95
6b899c4e 96 /* Walk all the registered handlers. */
21ca59b3 97 list_for_each_entry(e, &misc->entries, list) {
1da177e4
LT
98 char *s;
99 int j;
100
6b899c4e 101 /* Make sure this one is currently enabled. */
1da177e4
LT
102 if (!test_bit(Enabled, &e->flags))
103 continue;
104
6b899c4e 105 /* Do matching based on extension if applicable. */
1da177e4
LT
106 if (!test_bit(Magic, &e->flags)) {
107 if (p && !strcmp(e->magic, p + 1))
108 return e;
109 continue;
110 }
111
6b899c4e 112 /* Do matching based on magic & mask. */
1da177e4
LT
113 s = bprm->buf + e->offset;
114 if (e->mask) {
115 for (j = 0; j < e->size; j++)
116 if ((*s++ ^ e->magic[j]) & e->mask[j])
117 break;
118 } else {
119 for (j = 0; j < e->size; j++)
120 if ((*s++ ^ e->magic[j]))
121 break;
122 }
123 if (j == e->size)
124 return e;
125 }
1c5976ef 126
1da177e4
LT
127 return NULL;
128}
129
1c5976ef
CB
130/**
131 * get_binfmt_handler - try to find a binary type handler
132 * @misc: handle to binfmt_misc instance
133 * @bprm: binary for which we are looking for a handler
134 *
135 * Try to find a binfmt handler for the binary type. If one is found take a
136 * reference to protect against removal via bm_{entry,status}_write().
137 *
138 * Return: binary type list entry on success, NULL on failure
139 */
21ca59b3
CB
140static Node *get_binfmt_handler(struct binfmt_misc *misc,
141 struct linux_binprm *bprm)
1c5976ef
CB
142{
143 Node *e;
144
21ca59b3
CB
145 read_lock(&misc->entries_lock);
146 e = search_binfmt_handler(misc, bprm);
1c5976ef
CB
147 if (e)
148 refcount_inc(&e->users);
21ca59b3 149 read_unlock(&misc->entries_lock);
1c5976ef
CB
150 return e;
151}
152
153/**
154 * put_binfmt_handler - put binary handler node
155 * @e: node to put
156 *
157 * Free node syncing with load_misc_binary() and defer final free to
158 * load_misc_binary() in case it is using the binary type handler we were
159 * requested to remove.
160 */
161static void put_binfmt_handler(Node *e)
162{
163 if (refcount_dec_and_test(&e->users)) {
164 if (e->flags & MISC_FMT_OPEN_FILE)
165 filp_close(e->interp_file, NULL);
166 kfree(e);
167 }
168}
169
21ca59b3
CB
170/**
171 * load_binfmt_misc - load the binfmt_misc of the caller's user namespace
172 *
173 * To be called in load_misc_binary() to load the relevant struct binfmt_misc.
174 * If a user namespace doesn't have its own binfmt_misc mount it can make use
175 * of its ancestor's binfmt_misc handlers. This mimicks the behavior of
176 * pre-namespaced binfmt_misc where all registered binfmt_misc handlers where
177 * available to all user and user namespaces on the system.
178 *
179 * Return: the binfmt_misc instance of the caller's user namespace
180 */
181static struct binfmt_misc *load_binfmt_misc(void)
182{
183 const struct user_namespace *user_ns;
184 struct binfmt_misc *misc;
185
186 user_ns = current_user_ns();
187 while (user_ns) {
188 /* Pairs with smp_store_release() in bm_fill_super(). */
189 misc = smp_load_acquire(&user_ns->binfmt_misc);
190 if (misc)
191 return misc;
192
193 user_ns = user_ns->parent;
194 }
195
196 return &init_binfmt_misc;
197}
198
1da177e4
LT
199/*
200 * the loader itself
201 */
71613c3b 202static int load_misc_binary(struct linux_binprm *bprm)
1da177e4
LT
203{
204 Node *fmt;
e6084d4a 205 struct file *interp_file = NULL;
21ca59b3
CB
206 int retval = -ENOEXEC;
207 struct binfmt_misc *misc;
1da177e4 208
21ca59b3
CB
209 misc = load_binfmt_misc();
210 if (!misc->enabled)
43a4f261 211 return retval;
1da177e4 212
21ca59b3 213 fmt = get_binfmt_handler(misc, bprm);
1da177e4 214 if (!fmt)
43a4f261 215 return retval;
1da177e4 216
51f39a1f 217 /* Need to be able to load the file after exec */
43a4f261 218 retval = -ENOENT;
51f39a1f 219 if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
43a4f261 220 goto ret;
51f39a1f 221
2347961b
LV
222 if (fmt->flags & MISC_FMT_PRESERVE_ARGV0) {
223 bprm->interp_flags |= BINPRM_FLAGS_PRESERVE_ARGV0;
224 } else {
b6a2fea3
OW
225 retval = remove_arg_zero(bprm);
226 if (retval)
e6084d4a 227 goto ret;
1da177e4
LT
228 }
229
bc2bf338 230 if (fmt->flags & MISC_FMT_OPEN_BINARY)
b8a61c9e 231 bprm->have_execfd = 1;
1da177e4 232
1da177e4 233 /* make argv[1] be the path to the binary */
986db2d1 234 retval = copy_string_kernel(bprm->interp, bprm);
1da177e4 235 if (retval < 0)
b8a61c9e 236 goto ret;
1da177e4
LT
237 bprm->argc++;
238
239 /* add the interp as argv[0] */
986db2d1 240 retval = copy_string_kernel(fmt->interpreter, bprm);
1da177e4 241 if (retval < 0)
b8a61c9e 242 goto ret;
e6084d4a 243 bprm->argc++;
1da177e4 244
b66c5984 245 /* Update interp in case binfmt_script needs it. */
50097f74 246 retval = bprm_change_interp(fmt->interpreter, bprm);
b66c5984 247 if (retval < 0)
b8a61c9e 248 goto ret;
1da177e4 249
2a010c41 250 if (fmt->flags & MISC_FMT_OPEN_FILE)
19f391eb 251 interp_file = file_clone_open(fmt->interp_file);
2a010c41 252 else
50097f74 253 interp_file = open_exec(fmt->interpreter);
e6084d4a
MF
254 retval = PTR_ERR(interp_file);
255 if (IS_ERR(interp_file))
b8a61c9e 256 goto ret;
1da177e4 257
bc2bf338 258 bprm->interpreter = interp_file;
a16b3357 259 if (fmt->flags & MISC_FMT_CREDENTIALS)
56305aa9 260 bprm->execfd_creds = 1;
1da177e4 261
bc2bf338 262 retval = 0;
e6084d4a 263ret:
1c5976ef
CB
264
265 /*
266 * If we actually put the node here all concurrent calls to
267 * load_misc_binary() will have finished. We also know
21ca59b3
CB
268 * that for the refcount to be zero someone must have concurently
269 * removed the binary type handler from the list and it's our job to
270 * free it.
1c5976ef
CB
271 */
272 put_binfmt_handler(fmt);
273
1da177e4 274 return retval;
1da177e4
LT
275}
276
277/* Command parsers */
278
279/*
280 * parses and copies one argument enclosed in del from *sp to *dp,
281 * recognising the \x special.
282 * returns pointer to the copied argument or NULL in case of an
283 * error (and sets err) or null argument length.
284 */
285static char *scanarg(char *s, char del)
286{
287 char c;
288
289 while ((c = *s++) != del) {
290 if (c == '\\' && *s == 'x') {
291 s++;
292 if (!isxdigit(*s++))
293 return NULL;
294 if (!isxdigit(*s++))
295 return NULL;
296 }
297 }
7d65cf10 298 s[-1] ='\0';
1da177e4
LT
299 return s;
300}
301
e6084d4a 302static char *check_special_flags(char *sfs, Node *e)
1da177e4 303{
e6084d4a 304 char *p = sfs;
1da177e4
LT
305 int cont = 1;
306
307 /* special flags */
308 while (cont) {
309 switch (*p) {
e6084d4a
MF
310 case 'P':
311 pr_debug("register: flag: P (preserve argv0)\n");
312 p++;
313 e->flags |= MISC_FMT_PRESERVE_ARGV0;
314 break;
315 case 'O':
316 pr_debug("register: flag: O (open binary)\n");
317 p++;
318 e->flags |= MISC_FMT_OPEN_BINARY;
319 break;
320 case 'C':
321 pr_debug("register: flag: C (preserve creds)\n");
322 p++;
323 /* this flags also implies the
324 open-binary flag */
325 e->flags |= (MISC_FMT_CREDENTIALS |
326 MISC_FMT_OPEN_BINARY);
327 break;
948b701a
JB
328 case 'F':
329 pr_debug("register: flag: F: open interpreter file now\n");
330 p++;
331 e->flags |= MISC_FMT_OPEN_FILE;
332 break;
e6084d4a
MF
333 default:
334 cont = 0;
1da177e4
LT
335 }
336 }
337
338 return p;
339}
e6084d4a 340
1da177e4
LT
341/*
342 * This registers a new binary format, it recognises the syntax
343 * ':name:type:offset:magic:mask:interpreter:flags'
344 * where the ':' is the IFS, that can be chosen with the first char
345 */
346static Node *create_entry(const char __user *buffer, size_t count)
347{
348 Node *e;
349 int memsize, err;
350 char *buf, *p;
351 char del;
352
6b899c4e
MF
353 pr_debug("register: received %zu bytes\n", count);
354
1da177e4
LT
355 /* some sanity checks */
356 err = -EINVAL;
bbaecc08 357 if ((count < 11) || (count > MAX_REGISTER_LENGTH))
1da177e4
LT
358 goto out;
359
360 err = -ENOMEM;
361 memsize = sizeof(Node) + count + 8;
21ca59b3 362 e = kmalloc(memsize, GFP_KERNEL_ACCOUNT);
1da177e4
LT
363 if (!e)
364 goto out;
365
366 p = buf = (char *)e + sizeof(Node);
367
368 memset(e, 0, sizeof(Node));
369 if (copy_from_user(buf, buffer, count))
e6084d4a 370 goto efault;
1da177e4
LT
371
372 del = *p++; /* delimeter */
373
6b899c4e
MF
374 pr_debug("register: delim: %#x {%c}\n", del, del);
375
376 /* Pad the buffer with the delim to simplify parsing below. */
e6084d4a 377 memset(buf + count, del, 8);
1da177e4 378
6b899c4e 379 /* Parse the 'name' field. */
1da177e4
LT
380 e->name = p;
381 p = strchr(p, del);
382 if (!p)
e6084d4a 383 goto einval;
1da177e4
LT
384 *p++ = '\0';
385 if (!e->name[0] ||
386 !strcmp(e->name, ".") ||
387 !strcmp(e->name, "..") ||
388 strchr(e->name, '/'))
e6084d4a 389 goto einval;
6b899c4e
MF
390
391 pr_debug("register: name: {%s}\n", e->name);
392
393 /* Parse the 'type' field. */
1da177e4 394 switch (*p++) {
6b899c4e
MF
395 case 'E':
396 pr_debug("register: type: E (extension)\n");
397 e->flags = 1 << Enabled;
398 break;
399 case 'M':
400 pr_debug("register: type: M (magic)\n");
401 e->flags = (1 << Enabled) | (1 << Magic);
402 break;
403 default:
e6084d4a 404 goto einval;
1da177e4
LT
405 }
406 if (*p++ != del)
e6084d4a 407 goto einval;
6b899c4e 408
1da177e4 409 if (test_bit(Magic, &e->flags)) {
6b899c4e
MF
410 /* Handle the 'M' (magic) format. */
411 char *s;
412
413 /* Parse the 'offset' field. */
414 s = strchr(p, del);
1da177e4 415 if (!s)
e6084d4a 416 goto einval;
5cc41e09
TLSC
417 *s = '\0';
418 if (p != s) {
419 int r = kstrtoint(p, 10, &e->offset);
420 if (r != 0 || e->offset < 0)
421 goto einval;
422 }
423 p = s;
1da177e4 424 if (*p++)
e6084d4a 425 goto einval;
6b899c4e
MF
426 pr_debug("register: offset: %#x\n", e->offset);
427
428 /* Parse the 'magic' field. */
1da177e4
LT
429 e->magic = p;
430 p = scanarg(p, del);
431 if (!p)
e6084d4a 432 goto einval;
7d65cf10 433 if (!e->magic[0])
e6084d4a 434 goto einval;
6b899c4e
MF
435 if (USE_DEBUG)
436 print_hex_dump_bytes(
437 KBUILD_MODNAME ": register: magic[raw]: ",
438 DUMP_PREFIX_NONE, e->magic, p - e->magic);
439
440 /* Parse the 'mask' field. */
1da177e4
LT
441 e->mask = p;
442 p = scanarg(p, del);
443 if (!p)
e6084d4a 444 goto einval;
7d65cf10 445 if (!e->mask[0]) {
1da177e4 446 e->mask = NULL;
6b899c4e
MF
447 pr_debug("register: mask[raw]: none\n");
448 } else if (USE_DEBUG)
449 print_hex_dump_bytes(
450 KBUILD_MODNAME ": register: mask[raw]: ",
451 DUMP_PREFIX_NONE, e->mask, p - e->mask);
452
453 /*
454 * Decode the magic & mask fields.
455 * Note: while we might have accepted embedded NUL bytes from
456 * above, the unescape helpers here will stop at the first one
457 * it encounters.
458 */
8d82e180
AS
459 e->size = string_unescape_inplace(e->magic, UNESCAPE_HEX);
460 if (e->mask &&
461 string_unescape_inplace(e->mask, UNESCAPE_HEX) != e->size)
e6084d4a 462 goto einval;
5cc41e09
TLSC
463 if (e->size > BINPRM_BUF_SIZE ||
464 BINPRM_BUF_SIZE - e->size < e->offset)
e6084d4a 465 goto einval;
6b899c4e
MF
466 pr_debug("register: magic/mask length: %i\n", e->size);
467 if (USE_DEBUG) {
468 print_hex_dump_bytes(
469 KBUILD_MODNAME ": register: magic[decoded]: ",
470 DUMP_PREFIX_NONE, e->magic, e->size);
471
472 if (e->mask) {
473 int i;
21ca59b3 474 char *masked = kmalloc(e->size, GFP_KERNEL_ACCOUNT);
6b899c4e
MF
475
476 print_hex_dump_bytes(
477 KBUILD_MODNAME ": register: mask[decoded]: ",
478 DUMP_PREFIX_NONE, e->mask, e->size);
479
480 if (masked) {
481 for (i = 0; i < e->size; ++i)
482 masked[i] = e->magic[i] & e->mask[i];
483 print_hex_dump_bytes(
484 KBUILD_MODNAME ": register: magic[masked]: ",
485 DUMP_PREFIX_NONE, masked, e->size);
486
487 kfree(masked);
488 }
489 }
490 }
1da177e4 491 } else {
6b899c4e
MF
492 /* Handle the 'E' (extension) format. */
493
494 /* Skip the 'offset' field. */
1da177e4
LT
495 p = strchr(p, del);
496 if (!p)
e6084d4a 497 goto einval;
1da177e4 498 *p++ = '\0';
6b899c4e
MF
499
500 /* Parse the 'magic' field. */
1da177e4
LT
501 e->magic = p;
502 p = strchr(p, del);
503 if (!p)
e6084d4a 504 goto einval;
1da177e4
LT
505 *p++ = '\0';
506 if (!e->magic[0] || strchr(e->magic, '/'))
e6084d4a 507 goto einval;
6b899c4e
MF
508 pr_debug("register: extension: {%s}\n", e->magic);
509
510 /* Skip the 'mask' field. */
1da177e4
LT
511 p = strchr(p, del);
512 if (!p)
e6084d4a 513 goto einval;
1da177e4
LT
514 *p++ = '\0';
515 }
6b899c4e
MF
516
517 /* Parse the 'interpreter' field. */
1da177e4
LT
518 e->interpreter = p;
519 p = strchr(p, del);
520 if (!p)
e6084d4a 521 goto einval;
1da177e4
LT
522 *p++ = '\0';
523 if (!e->interpreter[0])
e6084d4a 524 goto einval;
6b899c4e 525 pr_debug("register: interpreter: {%s}\n", e->interpreter);
1da177e4 526
6b899c4e 527 /* Parse the 'flags' field. */
e6084d4a 528 p = check_special_flags(p, e);
1da177e4
LT
529 if (*p == '\n')
530 p++;
531 if (p != buf + count)
e6084d4a
MF
532 goto einval;
533
1da177e4
LT
534 return e;
535
536out:
537 return ERR_PTR(err);
538
e6084d4a 539efault:
1da177e4
LT
540 kfree(e);
541 return ERR_PTR(-EFAULT);
e6084d4a 542einval:
1da177e4
LT
543 kfree(e);
544 return ERR_PTR(-EINVAL);
545}
546
547/*
548 * Set status of entry/binfmt_misc:
549 * '1' enables, '0' disables and '-1' clears entry/binfmt_misc
550 */
551static int parse_command(const char __user *buffer, size_t count)
552{
553 char s[4];
554
1da177e4
LT
555 if (count > 3)
556 return -EINVAL;
557 if (copy_from_user(s, buffer, count))
558 return -EFAULT;
de8288b1
AB
559 if (!count)
560 return 0;
e6084d4a 561 if (s[count - 1] == '\n')
1da177e4
LT
562 count--;
563 if (count == 1 && s[0] == '0')
564 return 1;
565 if (count == 1 && s[0] == '1')
566 return 2;
567 if (count == 2 && s[0] == '-' && s[1] == '1')
568 return 3;
569 return -EINVAL;
570}
571
572/* generic stuff */
573
574static void entry_status(Node *e, char *page)
575{
6ceafb88
RV
576 char *dp = page;
577 const char *status = "disabled";
1da177e4
LT
578
579 if (test_bit(Enabled, &e->flags))
580 status = "enabled";
581
582 if (!VERBOSE_STATUS) {
583 sprintf(page, "%s\n", status);
584 return;
585 }
586
6ceafb88 587 dp += sprintf(dp, "%s\ninterpreter %s\n", status, e->interpreter);
1da177e4
LT
588
589 /* print the special flags */
6ceafb88 590 dp += sprintf(dp, "flags: ");
e6084d4a
MF
591 if (e->flags & MISC_FMT_PRESERVE_ARGV0)
592 *dp++ = 'P';
593 if (e->flags & MISC_FMT_OPEN_BINARY)
594 *dp++ = 'O';
595 if (e->flags & MISC_FMT_CREDENTIALS)
596 *dp++ = 'C';
948b701a
JB
597 if (e->flags & MISC_FMT_OPEN_FILE)
598 *dp++ = 'F';
e6084d4a 599 *dp++ = '\n';
1da177e4
LT
600
601 if (!test_bit(Magic, &e->flags)) {
602 sprintf(dp, "extension .%s\n", e->magic);
603 } else {
6ceafb88
RV
604 dp += sprintf(dp, "offset %i\nmagic ", e->offset);
605 dp = bin2hex(dp, e->magic, e->size);
1da177e4 606 if (e->mask) {
6ceafb88
RV
607 dp += sprintf(dp, "\nmask ");
608 dp = bin2hex(dp, e->mask, e->size);
1da177e4
LT
609 }
610 *dp++ = '\n';
611 *dp = '\0';
612 }
613}
614
615static struct inode *bm_get_inode(struct super_block *sb, int mode)
616{
e6084d4a 617 struct inode *inode = new_inode(sb);
1da177e4
LT
618
619 if (inode) {
85fe4025 620 inode->i_ino = get_next_ino();
1da177e4 621 inode->i_mode = mode;
16a94965 622 simple_inode_init_ts(inode);
1da177e4
LT
623 }
624 return inode;
625}
626
21ca59b3
CB
627/**
628 * i_binfmt_misc - retrieve struct binfmt_misc from a binfmt_misc inode
629 * @inode: inode of the relevant binfmt_misc instance
630 *
631 * This helper retrieves struct binfmt_misc from a binfmt_misc inode. This can
632 * be done without any memory barriers because we are guaranteed that
633 * user_ns->binfmt_misc is fully initialized. It was fully initialized when the
634 * binfmt_misc mount was first created.
635 *
636 * Return: struct binfmt_misc of the relevant binfmt_misc instance
637 */
638static struct binfmt_misc *i_binfmt_misc(struct inode *inode)
639{
640 return inode->i_sb->s_user_ns->binfmt_misc;
641}
642
1c5976ef
CB
643/**
644 * bm_evict_inode - cleanup data associated with @inode
645 * @inode: inode to which the data is attached
646 *
647 * Cleanup the binary type handler data associated with @inode if a binary type
648 * entry is removed or the filesystem is unmounted and the super block is
649 * shutdown.
650 *
651 * If the ->evict call was not caused by a super block shutdown but by a write
652 * to remove the entry or all entries via bm_{entry,status}_write() the entry
653 * will have already been removed from the list. We keep the list_empty() check
654 * to make that explicit.
655*/
b57922d9 656static void bm_evict_inode(struct inode *inode)
1da177e4 657{
83f91827
ON
658 Node *e = inode->i_private;
659
dbd5768f 660 clear_inode(inode);
1c5976ef
CB
661
662 if (e) {
21ca59b3
CB
663 struct binfmt_misc *misc;
664
665 misc = i_binfmt_misc(inode);
666 write_lock(&misc->entries_lock);
1c5976ef
CB
667 if (!list_empty(&e->list))
668 list_del_init(&e->list);
21ca59b3 669 write_unlock(&misc->entries_lock);
1c5976ef
CB
670 put_binfmt_handler(e);
671 }
1da177e4
LT
672}
673
1c5976ef
CB
674/**
675 * unlink_binfmt_dentry - remove the dentry for the binary type handler
676 * @dentry: dentry associated with the binary type handler
677 *
678 * Do the actual filesystem work to remove a dentry for a registered binary
679 * type handler. Since binfmt_misc only allows simple files to be created
680 * directly under the root dentry of the filesystem we ensure that we are
681 * indeed passed a dentry directly beneath the root dentry, that the inode
682 * associated with the root dentry is locked, and that it is a regular file we
683 * are asked to remove.
684 */
685static void unlink_binfmt_dentry(struct dentry *dentry)
1da177e4 686{
1c5976ef
CB
687 struct dentry *parent = dentry->d_parent;
688 struct inode *inode, *parent_inode;
1da177e4 689
1c5976ef
CB
690 /* All entries are immediate descendants of the root dentry. */
691 if (WARN_ON_ONCE(dentry->d_sb->s_root != parent))
692 return;
1da177e4 693
1c5976ef
CB
694 /* We only expect to be called on regular files. */
695 inode = d_inode(dentry);
696 if (WARN_ON_ONCE(!S_ISREG(inode->i_mode)))
697 return;
698
699 /* The parent inode must be locked. */
700 parent_inode = d_inode(parent);
701 if (WARN_ON_ONCE(!inode_is_locked(parent_inode)))
702 return;
703
704 if (simple_positive(dentry)) {
705 dget(dentry);
706 simple_unlink(parent_inode, dentry);
707 d_delete(dentry);
708 dput(dentry);
709 }
710}
711
712/**
713 * remove_binfmt_handler - remove a binary type handler
714 * @misc: handle to binfmt_misc instance
715 * @e: binary type handler to remove
716 *
717 * Remove a binary type handler from the list of binary type handlers and
718 * remove its associated dentry. This is called from
719 * binfmt_{entry,status}_write(). In the future, we might want to think about
720 * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's
721 * to use writes to files in order to delete binary type handlers. But it has
722 * worked for so long that it's not a pressing issue.
723 */
21ca59b3 724static void remove_binfmt_handler(struct binfmt_misc *misc, Node *e)
1c5976ef 725{
21ca59b3 726 write_lock(&misc->entries_lock);
baba1b29 727 list_del_init(&e->list);
21ca59b3 728 write_unlock(&misc->entries_lock);
1c5976ef 729 unlink_binfmt_dentry(e->dentry);
1da177e4
LT
730}
731
732/* /<entry> */
733
734static ssize_t
e6084d4a 735bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
1da177e4 736{
496ad9aa 737 Node *e = file_inode(file)->i_private;
1da177e4
LT
738 ssize_t res;
739 char *page;
1da177e4 740
e6084d4a
MF
741 page = (char *) __get_free_page(GFP_KERNEL);
742 if (!page)
1da177e4
LT
743 return -ENOMEM;
744
745 entry_status(e, page);
1da177e4 746
6e2c10a1
AM
747 res = simple_read_from_buffer(buf, nbytes, ppos, page, strlen(page));
748
1da177e4
LT
749 free_page((unsigned long) page);
750 return res;
751}
752
753static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
754 size_t count, loff_t *ppos)
755{
1c5976ef
CB
756 struct inode *inode = file_inode(file);
757 Node *e = inode->i_private;
1da177e4
LT
758 int res = parse_command(buffer, count);
759
760 switch (res) {
e6084d4a
MF
761 case 1:
762 /* Disable this handler. */
763 clear_bit(Enabled, &e->flags);
764 break;
765 case 2:
766 /* Enable this handler. */
767 set_bit(Enabled, &e->flags);
768 break;
769 case 3:
770 /* Delete this handler. */
1c5976ef
CB
771 inode = d_inode(inode->i_sb->s_root);
772 inode_lock(inode);
1da177e4 773
1c5976ef
CB
774 /*
775 * In order to add new element or remove elements from the list
776 * via bm_{entry,register,status}_write() inode_lock() on the
777 * root inode must be held.
778 * The lock is exclusive ensuring that the list can't be
779 * modified. Only load_misc_binary() can access but does so
780 * read-only. So we only need to take the write lock when we
781 * actually remove the entry from the list.
782 */
baba1b29 783 if (!list_empty(&e->list))
21ca59b3 784 remove_binfmt_handler(i_binfmt_misc(inode), e);
1da177e4 785
1c5976ef 786 inode_unlock(inode);
e6084d4a
MF
787 break;
788 default:
789 return res;
1da177e4 790 }
e6084d4a 791
1da177e4
LT
792 return count;
793}
794
4b6f5d20 795static const struct file_operations bm_entry_operations = {
1da177e4
LT
796 .read = bm_entry_read,
797 .write = bm_entry_write,
6038f373 798 .llseek = default_llseek,
1da177e4
LT
799};
800
801/* /register */
802
803static ssize_t bm_register_write(struct file *file, const char __user *buffer,
804 size_t count, loff_t *ppos)
805{
806 Node *e;
807 struct inode *inode;
ea7d4c04
AV
808 struct super_block *sb = file_inode(file)->i_sb;
809 struct dentry *root = sb->s_root, *dentry;
21ca59b3 810 struct binfmt_misc *misc;
1da177e4 811 int err = 0;
e7850f4d 812 struct file *f = NULL;
1da177e4
LT
813
814 e = create_entry(buffer, count);
815
816 if (IS_ERR(e))
817 return PTR_ERR(e);
818
e7850f4d 819 if (e->flags & MISC_FMT_OPEN_FILE) {
21ca59b3
CB
820 const struct cred *old_cred;
821
822 /*
823 * Now that we support unprivileged binfmt_misc mounts make
824 * sure we use the credentials that the register @file was
825 * opened with to also open the interpreter. Before that this
826 * didn't matter much as only a privileged process could open
827 * the register file.
828 */
829 old_cred = override_creds(file->f_cred);
e7850f4d 830 f = open_exec(e->interpreter);
21ca59b3 831 revert_creds(old_cred);
e7850f4d
LR
832 if (IS_ERR(f)) {
833 pr_notice("register: failed to install interpreter file %s\n",
834 e->interpreter);
835 kfree(e);
836 return PTR_ERR(f);
837 }
838 e->interp_file = f;
839 }
840
5955102c 841 inode_lock(d_inode(root));
1da177e4
LT
842 dentry = lookup_one_len(e->name, root, strlen(e->name));
843 err = PTR_ERR(dentry);
844 if (IS_ERR(dentry))
845 goto out;
846
847 err = -EEXIST;
75c3cfa8 848 if (d_really_is_positive(dentry))
1da177e4
LT
849 goto out2;
850
851 inode = bm_get_inode(sb, S_IFREG | 0644);
852
853 err = -ENOMEM;
854 if (!inode)
855 goto out2;
856
1c5976ef 857 refcount_set(&e->users, 1);
1da177e4 858 e->dentry = dget(dentry);
8e18e294 859 inode->i_private = e;
1da177e4
LT
860 inode->i_fop = &bm_entry_operations;
861
862 d_instantiate(dentry, inode);
21ca59b3
CB
863 misc = i_binfmt_misc(inode);
864 write_lock(&misc->entries_lock);
865 list_add(&e->list, &misc->entries);
866 write_unlock(&misc->entries_lock);
1da177e4
LT
867
868 err = 0;
869out2:
870 dput(dentry);
871out:
5955102c 872 inode_unlock(d_inode(root));
1da177e4
LT
873
874 if (err) {
e7850f4d
LR
875 if (f)
876 filp_close(f, NULL);
1da177e4 877 kfree(e);
948b701a 878 return err;
1da177e4
LT
879 }
880 return count;
881}
882
4b6f5d20 883static const struct file_operations bm_register_operations = {
1da177e4 884 .write = bm_register_write,
6038f373 885 .llseek = noop_llseek,
1da177e4
LT
886};
887
888/* /status */
889
890static ssize_t
891bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
892{
21ca59b3
CB
893 struct binfmt_misc *misc;
894 char *s;
1da177e4 895
21ca59b3
CB
896 misc = i_binfmt_misc(file_inode(file));
897 s = misc->enabled ? "enabled\n" : "disabled\n";
92f4c701 898 return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
1da177e4
LT
899}
900
e6084d4a 901static ssize_t bm_status_write(struct file *file, const char __user *buffer,
1da177e4
LT
902 size_t count, loff_t *ppos)
903{
21ca59b3 904 struct binfmt_misc *misc;
1da177e4 905 int res = parse_command(buffer, count);
1c5976ef
CB
906 Node *e, *next;
907 struct inode *inode;
1da177e4 908
21ca59b3 909 misc = i_binfmt_misc(file_inode(file));
1da177e4 910 switch (res) {
e6084d4a
MF
911 case 1:
912 /* Disable all handlers. */
21ca59b3 913 misc->enabled = false;
e6084d4a
MF
914 break;
915 case 2:
916 /* Enable all handlers. */
21ca59b3 917 misc->enabled = true;
e6084d4a
MF
918 break;
919 case 3:
920 /* Delete all handlers. */
1c5976ef
CB
921 inode = d_inode(file_inode(file)->i_sb->s_root);
922 inode_lock(inode);
1da177e4 923
1c5976ef
CB
924 /*
925 * In order to add new element or remove elements from the list
926 * via bm_{entry,register,status}_write() inode_lock() on the
927 * root inode must be held.
928 * The lock is exclusive ensuring that the list can't be
929 * modified. Only load_misc_binary() can access but does so
930 * read-only. So we only need to take the write lock when we
931 * actually remove the entry from the list.
932 */
21ca59b3
CB
933 list_for_each_entry_safe(e, next, &misc->entries, list)
934 remove_binfmt_handler(misc, e);
1da177e4 935
1c5976ef 936 inode_unlock(inode);
e6084d4a
MF
937 break;
938 default:
939 return res;
1da177e4 940 }
e6084d4a 941
1da177e4
LT
942 return count;
943}
944
4b6f5d20 945static const struct file_operations bm_status_operations = {
1da177e4
LT
946 .read = bm_status_read,
947 .write = bm_status_write,
6038f373 948 .llseek = default_llseek,
1da177e4
LT
949};
950
951/* Superblock handling */
952
21ca59b3
CB
953static void bm_put_super(struct super_block *sb)
954{
955 struct user_namespace *user_ns = sb->s_fs_info;
956
957 sb->s_fs_info = NULL;
958 put_user_ns(user_ns);
959}
960
ee9b6d61 961static const struct super_operations s_ops = {
1da177e4 962 .statfs = simple_statfs,
b57922d9 963 .evict_inode = bm_evict_inode,
21ca59b3 964 .put_super = bm_put_super,
1da177e4
LT
965};
966
bc99a664 967static int bm_fill_super(struct super_block *sb, struct fs_context *fc)
1da177e4 968{
e6084d4a 969 int err;
21ca59b3
CB
970 struct user_namespace *user_ns = sb->s_user_ns;
971 struct binfmt_misc *misc;
cda37124 972 static const struct tree_descr bm_files[] = {
1a1c9bb4
JL
973 [2] = {"status", &bm_status_operations, S_IWUSR|S_IRUGO},
974 [3] = {"register", &bm_register_operations, S_IWUSR},
1da177e4
LT
975 /* last one */ {""}
976 };
e6084d4a 977
21ca59b3
CB
978 if (WARN_ON(user_ns != current_user_ns()))
979 return -EINVAL;
980
981 /*
982 * Lazily allocate a new binfmt_misc instance for this namespace, i.e.
983 * do it here during the first mount of binfmt_misc. We don't need to
984 * waste memory for every user namespace allocation. It's likely much
985 * more common to not mount a separate binfmt_misc instance than it is
986 * to mount one.
987 *
988 * While multiple superblocks can exist they are keyed by userns in
989 * s_fs_info for binfmt_misc. Hence, the vfs guarantees that
990 * bm_fill_super() is called exactly once whenever a binfmt_misc
991 * superblock for a userns is created. This in turn lets us conclude
992 * that when a binfmt_misc superblock is created for the first time for
993 * a userns there's no one racing us. Therefore we don't need any
994 * barriers when we dereference binfmt_misc.
995 */
996 misc = user_ns->binfmt_misc;
997 if (!misc) {
998 /*
999 * If it turns out that most user namespaces actually want to
1000 * register their own binary type handler and therefore all
1001 * create their own separate binfm_misc mounts we should
1002 * consider turning this into a kmem cache.
1003 */
1004 misc = kzalloc(sizeof(struct binfmt_misc), GFP_KERNEL);
1005 if (!misc)
1006 return -ENOMEM;
1007
1008 INIT_LIST_HEAD(&misc->entries);
1009 rwlock_init(&misc->entries_lock);
1010
1011 /* Pairs with smp_load_acquire() in load_binfmt_misc(). */
1012 smp_store_release(&user_ns->binfmt_misc, misc);
1013 }
1014
1015 /*
1016 * When the binfmt_misc superblock for this userns is shutdown
1017 * ->enabled might have been set to false and we don't reinitialize
1018 * ->enabled again in put_super() as someone might already be mounting
1019 * binfmt_misc again. It also would be pointless since by the time
1020 * ->put_super() is called we know that the binary type list for this
1021 * bintfmt_misc mount is empty making load_misc_binary() return
1022 * -ENOEXEC independent of whether ->enabled is true. Instead, if
1023 * someone mounts binfmt_misc for the first time or again we simply
1024 * reset ->enabled to true.
1025 */
1026 misc->enabled = true;
1027
e6084d4a 1028 err = simple_fill_super(sb, BINFMTFS_MAGIC, bm_files);
1da177e4
LT
1029 if (!err)
1030 sb->s_op = &s_ops;
1031 return err;
1032}
1033
21ca59b3
CB
1034static void bm_free(struct fs_context *fc)
1035{
1036 if (fc->s_fs_info)
1037 put_user_ns(fc->s_fs_info);
1038}
1039
bc99a664 1040static int bm_get_tree(struct fs_context *fc)
1da177e4 1041{
21ca59b3 1042 return get_tree_keyed(fc, bm_fill_super, get_user_ns(fc->user_ns));
bc99a664
DH
1043}
1044
1045static const struct fs_context_operations bm_context_ops = {
21ca59b3 1046 .free = bm_free,
bc99a664
DH
1047 .get_tree = bm_get_tree,
1048};
1049
1050static int bm_init_fs_context(struct fs_context *fc)
1051{
1052 fc->ops = &bm_context_ops;
1053 return 0;
1da177e4
LT
1054}
1055
1056static struct linux_binfmt misc_format = {
1057 .module = THIS_MODULE,
1058 .load_binary = load_misc_binary,
1059};
1060
1061static struct file_system_type bm_fs_type = {
1062 .owner = THIS_MODULE,
1063 .name = "binfmt_misc",
bc99a664 1064 .init_fs_context = bm_init_fs_context,
21ca59b3 1065 .fs_flags = FS_USERNS_MOUNT,
1da177e4
LT
1066 .kill_sb = kill_litter_super,
1067};
7f78e035 1068MODULE_ALIAS_FS("binfmt_misc");
1da177e4
LT
1069
1070static int __init init_misc_binfmt(void)
1071{
1072 int err = register_filesystem(&bm_fs_type);
8fc3dc5a
AV
1073 if (!err)
1074 insert_binfmt(&misc_format);
b42bc9a3 1075 return err;
1da177e4
LT
1076}
1077
1078static void __exit exit_misc_binfmt(void)
1079{
1080 unregister_binfmt(&misc_format);
1081 unregister_filesystem(&bm_fs_type);
1082}
1083
1084core_initcall(init_misc_binfmt);
1085module_exit(exit_misc_binfmt);
2c2a3f62 1086MODULE_DESCRIPTION("Kernel support for miscellaneous binaries");
1da177e4 1087MODULE_LICENSE("GPL");
This page took 1.264551 seconds and 4 git commands to generate.