1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef NO_BCACHEFS_CHARDEV
5 #include "bcachefs_ioctl.h"
10 #include "recovery_passes.h"
14 #include "thread_with_file.h"
16 #include <linux/cdev.h>
17 #include <linux/device.h>
19 #include <linux/ioctl.h>
20 #include <linux/major.h>
21 #include <linux/sched/task.h>
22 #include <linux/slab.h>
23 #include <linux/uaccess.h>
25 /* returns with ref on ca->ref */
26 static struct bch_dev *bch2_device_lookup(struct bch_fs *c, u64 dev,
31 if (flags & BCH_BY_INDEX) {
32 if (dev >= c->sb.nr_devices)
33 return ERR_PTR(-EINVAL);
35 ca = bch2_dev_tryget_noerror(c, dev);
37 return ERR_PTR(-EINVAL);
41 path = strndup_user((const char __user *)
42 (unsigned long) dev, PATH_MAX);
44 return ERR_CAST(path);
46 ca = bch2_dev_lookup(c, path);
54 static long bch2_ioctl_assemble(struct bch_ioctl_assemble __user *user_arg)
56 struct bch_ioctl_assemble arg;
58 u64 *user_devs = NULL;
63 if (copy_from_user(&arg, user_arg, sizeof(arg)))
66 if (arg.flags || arg.pad)
69 user_devs = kmalloc_array(arg.nr_devs, sizeof(u64), GFP_KERNEL);
73 devs = kcalloc(arg.nr_devs, sizeof(char *), GFP_KERNEL);
75 if (copy_from_user(user_devs, user_arg->devs,
76 sizeof(u64) * arg.nr_devs))
79 for (i = 0; i < arg.nr_devs; i++) {
80 devs[i] = strndup_user((const char __user *)(unsigned long)
83 ret= PTR_ERR_OR_ZERO(devs[i]);
88 c = bch2_fs_open(devs, arg.nr_devs, bch2_opts_empty());
89 ret = PTR_ERR_OR_ZERO(c);
94 for (i = 0; i < arg.nr_devs; i++)
100 static long bch2_ioctl_incremental(struct bch_ioctl_incremental __user *user_arg)
102 struct bch_ioctl_incremental arg;
106 if (copy_from_user(&arg, user_arg, sizeof(arg)))
109 if (arg.flags || arg.pad)
112 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
113 ret = PTR_ERR_OR_ZERO(path);
117 err = bch2_fs_open_incremental(path);
121 pr_err("Could not register bcachefs devices: %s", err);
130 struct thread_with_stdio thr;
132 struct bch_opts opts;
135 static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr)
137 struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr);
141 static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio)
143 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
144 struct bch_fs *c = thr->c;
146 int ret = PTR_ERR_OR_ZERO(c);
150 ret = bch2_fs_start(thr->c);
154 if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
155 bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name);
158 if (test_bit(BCH_FS_error, &c->flags)) {
159 bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name);
167 static const struct thread_with_stdio_ops bch2_offline_fsck_ops = {
168 .exit = bch2_fsck_thread_exit,
169 .fn = bch2_fsck_offline_thread_fn,
172 static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg)
174 struct bch_ioctl_fsck_offline arg;
175 struct fsck_thread *thr = NULL;
176 darray_str(devs) = {};
179 if (copy_from_user(&arg, user_arg, sizeof(arg)))
185 if (!capable(CAP_SYS_ADMIN))
188 for (size_t i = 0; i < arg.nr_devs; i++) {
190 ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64));
194 char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX);
195 ret = PTR_ERR_OR_ZERO(dev_str);
199 ret = darray_push(&devs, dev_str);
206 thr = kzalloc(sizeof(*thr), GFP_KERNEL);
212 thr->opts = bch2_opts_empty();
215 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
217 ret = PTR_ERR_OR_ZERO(optstr) ?:
218 bch2_parse_mount_opts(NULL, &thr->opts, optstr);
226 opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
228 /* We need request_key() to be called before we punt to kthread: */
229 opt_set(thr->opts, nostart, true);
231 bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops);
233 thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts);
235 if (!IS_ERR(thr->c) &&
236 thr->c->opts.errors == BCH_ON_ERROR_panic)
237 thr->c->opts.errors = BCH_ON_ERROR_ro;
239 ret = __bch2_run_thread_with_stdio(&thr->thr);
241 darray_for_each(devs, i)
247 bch2_fsck_thread_exit(&thr->thr);
248 pr_err("ret %s", bch2_err_str(ret));
252 static long bch2_global_ioctl(unsigned cmd, void __user *arg)
258 case BCH_IOCTL_ASSEMBLE:
259 return bch2_ioctl_assemble(arg);
260 case BCH_IOCTL_INCREMENTAL:
261 return bch2_ioctl_incremental(arg);
263 case BCH_IOCTL_FSCK_OFFLINE: {
264 ret = bch2_ioctl_fsck_offline(arg);
273 ret = bch2_err_class(ret);
277 static long bch2_ioctl_query_uuid(struct bch_fs *c,
278 struct bch_ioctl_query_uuid __user *user_arg)
280 return copy_to_user_errcode(&user_arg->uuid, &c->sb.user_uuid,
281 sizeof(c->sb.user_uuid));
285 static long bch2_ioctl_start(struct bch_fs *c, struct bch_ioctl_start arg)
287 if (!capable(CAP_SYS_ADMIN))
290 if (arg.flags || arg.pad)
293 return bch2_fs_start(c);
296 static long bch2_ioctl_stop(struct bch_fs *c)
298 if (!capable(CAP_SYS_ADMIN))
306 static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
311 if (!capable(CAP_SYS_ADMIN))
314 if (arg.flags || arg.pad)
317 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
318 ret = PTR_ERR_OR_ZERO(path);
322 ret = bch2_dev_add(c, path);
329 static long bch2_ioctl_disk_remove(struct bch_fs *c, struct bch_ioctl_disk arg)
333 if (!capable(CAP_SYS_ADMIN))
336 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
337 BCH_FORCE_IF_METADATA_LOST|
338 BCH_FORCE_IF_DEGRADED|
343 ca = bch2_device_lookup(c, arg.dev, arg.flags);
347 return bch2_dev_remove(c, ca, arg.flags);
350 static long bch2_ioctl_disk_online(struct bch_fs *c, struct bch_ioctl_disk arg)
355 if (!capable(CAP_SYS_ADMIN))
358 if (arg.flags || arg.pad)
361 path = strndup_user((const char __user *)(unsigned long) arg.dev, PATH_MAX);
362 ret = PTR_ERR_OR_ZERO(path);
366 ret = bch2_dev_online(c, path);
371 static long bch2_ioctl_disk_offline(struct bch_fs *c, struct bch_ioctl_disk arg)
376 if (!capable(CAP_SYS_ADMIN))
379 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
380 BCH_FORCE_IF_METADATA_LOST|
381 BCH_FORCE_IF_DEGRADED|
386 ca = bch2_device_lookup(c, arg.dev, arg.flags);
390 ret = bch2_dev_offline(c, ca, arg.flags);
395 static long bch2_ioctl_disk_set_state(struct bch_fs *c,
396 struct bch_ioctl_disk_set_state arg)
401 if (!capable(CAP_SYS_ADMIN))
404 if ((arg.flags & ~(BCH_FORCE_IF_DATA_LOST|
405 BCH_FORCE_IF_METADATA_LOST|
406 BCH_FORCE_IF_DEGRADED|
408 arg.pad[0] || arg.pad[1] || arg.pad[2] ||
409 arg.new_state >= BCH_MEMBER_STATE_NR)
412 ca = bch2_device_lookup(c, arg.dev, arg.flags);
416 ret = bch2_dev_set_state(c, ca, arg.new_state, arg.flags);
418 bch_err(c, "Error setting device state: %s", bch2_err_str(ret));
424 struct bch_data_ctx {
425 struct thread_with_file thr;
428 struct bch_ioctl_data arg;
429 struct bch_move_stats stats;
432 static int bch2_data_thread(void *arg)
434 struct bch_data_ctx *ctx = container_of(arg, struct bch_data_ctx, thr);
436 ctx->thr.ret = bch2_data_job(ctx->c, &ctx->stats, ctx->arg);
437 ctx->stats.data_type = U8_MAX;
441 static int bch2_data_job_release(struct inode *inode, struct file *file)
443 struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
445 bch2_thread_with_file_exit(&ctx->thr);
450 static ssize_t bch2_data_job_read(struct file *file, char __user *buf,
451 size_t len, loff_t *ppos)
453 struct bch_data_ctx *ctx = container_of(file->private_data, struct bch_data_ctx, thr);
454 struct bch_fs *c = ctx->c;
455 struct bch_ioctl_data_event e = {
456 .type = BCH_DATA_EVENT_PROGRESS,
457 .p.data_type = ctx->stats.data_type,
458 .p.btree_id = ctx->stats.pos.btree,
459 .p.pos = ctx->stats.pos.pos,
460 .p.sectors_done = atomic64_read(&ctx->stats.sectors_seen),
461 .p.sectors_total = bch2_fs_usage_read_short(c).used,
467 return copy_to_user_errcode(buf, &e, sizeof(e)) ?: sizeof(e);
470 static const struct file_operations bcachefs_data_ops = {
471 .release = bch2_data_job_release,
472 .read = bch2_data_job_read,
476 static long bch2_ioctl_data(struct bch_fs *c,
477 struct bch_ioctl_data arg)
479 struct bch_data_ctx *ctx;
482 if (!capable(CAP_SYS_ADMIN))
485 if (arg.op >= BCH_DATA_OP_NR || arg.flags)
488 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
495 ret = bch2_run_thread_with_file(&ctx->thr,
503 static long bch2_ioctl_fs_usage(struct bch_fs *c,
504 struct bch_ioctl_fs_usage __user *user_arg)
506 struct bch_ioctl_fs_usage *arg = NULL;
507 struct bch_replicas_usage *dst_e, *dst_end;
508 struct bch_fs_usage_online *src;
509 u32 replica_entries_bytes;
513 if (!test_bit(BCH_FS_started, &c->flags))
516 if (get_user(replica_entries_bytes, &user_arg->replica_entries_bytes))
519 arg = kzalloc(size_add(sizeof(*arg), replica_entries_bytes), GFP_KERNEL);
523 src = bch2_fs_usage_read(c);
529 arg->capacity = c->capacity;
530 arg->used = bch2_fs_sectors_used(c, src);
531 arg->online_reserved = src->online_reserved;
533 for (i = 0; i < BCH_REPLICAS_MAX; i++)
534 arg->persistent_reserved[i] = src->u.persistent_reserved[i];
536 dst_e = arg->replicas;
537 dst_end = (void *) arg->replicas + replica_entries_bytes;
539 for (i = 0; i < c->replicas.nr; i++) {
540 struct bch_replicas_entry_v1 *src_e =
541 cpu_replicas_entry(&c->replicas, i);
543 /* check that we have enough space for one replicas entry */
544 if (dst_e + 1 > dst_end) {
549 dst_e->sectors = src->u.replicas[i];
552 /* recheck after setting nr_devs: */
553 if (replicas_usage_next(dst_e) > dst_end) {
558 memcpy(dst_e->r.devs, src_e->devs, src_e->nr_devs);
560 dst_e = replicas_usage_next(dst_e);
563 arg->replica_entries_bytes = (void *) dst_e - (void *) arg->replicas;
565 percpu_up_read(&c->mark_lock);
571 ret = copy_to_user_errcode(user_arg, arg,
572 sizeof(*arg) + arg->replica_entries_bytes);
578 /* obsolete, didn't allow for new data types: */
579 static long bch2_ioctl_dev_usage(struct bch_fs *c,
580 struct bch_ioctl_dev_usage __user *user_arg)
582 struct bch_ioctl_dev_usage arg;
583 struct bch_dev_usage src;
587 if (!test_bit(BCH_FS_started, &c->flags))
590 if (copy_from_user(&arg, user_arg, sizeof(arg)))
593 if ((arg.flags & ~BCH_BY_INDEX) ||
599 ca = bch2_device_lookup(c, arg.dev, arg.flags);
603 src = bch2_dev_usage_read(ca);
605 arg.state = ca->mi.state;
606 arg.bucket_size = ca->mi.bucket_size;
607 arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
609 for (i = 0; i < BCH_DATA_NR; i++) {
610 arg.d[i].buckets = src.d[i].buckets;
611 arg.d[i].sectors = src.d[i].sectors;
612 arg.d[i].fragmented = src.d[i].fragmented;
617 return copy_to_user_errcode(user_arg, &arg, sizeof(arg));
620 static long bch2_ioctl_dev_usage_v2(struct bch_fs *c,
621 struct bch_ioctl_dev_usage_v2 __user *user_arg)
623 struct bch_ioctl_dev_usage_v2 arg;
624 struct bch_dev_usage src;
628 if (!test_bit(BCH_FS_started, &c->flags))
631 if (copy_from_user(&arg, user_arg, sizeof(arg)))
634 if ((arg.flags & ~BCH_BY_INDEX) ||
640 ca = bch2_device_lookup(c, arg.dev, arg.flags);
644 src = bch2_dev_usage_read(ca);
646 arg.state = ca->mi.state;
647 arg.bucket_size = ca->mi.bucket_size;
648 arg.nr_data_types = min(arg.nr_data_types, BCH_DATA_NR);
649 arg.nr_buckets = ca->mi.nbuckets - ca->mi.first_bucket;
651 ret = copy_to_user_errcode(user_arg, &arg, sizeof(arg));
655 for (unsigned i = 0; i < arg.nr_data_types; i++) {
656 struct bch_ioctl_dev_usage_type t = {
657 .buckets = src.d[i].buckets,
658 .sectors = src.d[i].sectors,
659 .fragmented = src.d[i].fragmented,
662 ret = copy_to_user_errcode(&user_arg->d[i], &t, sizeof(t));
671 static long bch2_ioctl_read_super(struct bch_fs *c,
672 struct bch_ioctl_read_super arg)
674 struct bch_dev *ca = NULL;
678 if (!capable(CAP_SYS_ADMIN))
681 if ((arg.flags & ~(BCH_BY_INDEX|BCH_READ_DEV)) ||
685 mutex_lock(&c->sb_lock);
687 if (arg.flags & BCH_READ_DEV) {
688 ca = bch2_device_lookup(c, arg.dev, arg.flags);
689 ret = PTR_ERR_OR_ZERO(ca);
698 if (vstruct_bytes(sb) > arg.size) {
703 ret = copy_to_user_errcode((void __user *)(unsigned long)arg.sb, sb,
708 mutex_unlock(&c->sb_lock);
712 static long bch2_ioctl_disk_get_idx(struct bch_fs *c,
713 struct bch_ioctl_disk_get_idx arg)
715 dev_t dev = huge_decode_dev(arg.dev);
717 if (!capable(CAP_SYS_ADMIN))
723 for_each_online_member(c, ca)
724 if (ca->dev == dev) {
725 percpu_ref_put(&ca->io_ref);
729 return -BCH_ERR_ENOENT_dev_idx_not_found;
732 static long bch2_ioctl_disk_resize(struct bch_fs *c,
733 struct bch_ioctl_disk_resize arg)
738 if (!capable(CAP_SYS_ADMIN))
741 if ((arg.flags & ~BCH_BY_INDEX) ||
745 ca = bch2_device_lookup(c, arg.dev, arg.flags);
749 ret = bch2_dev_resize(c, ca, arg.nbuckets);
755 static long bch2_ioctl_disk_resize_journal(struct bch_fs *c,
756 struct bch_ioctl_disk_resize_journal arg)
761 if (!capable(CAP_SYS_ADMIN))
764 if ((arg.flags & ~BCH_BY_INDEX) ||
768 if (arg.nbuckets > U32_MAX)
771 ca = bch2_device_lookup(c, arg.dev, arg.flags);
775 ret = bch2_set_nr_journal_buckets(c, ca, arg.nbuckets);
781 static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio)
783 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr);
784 struct bch_fs *c = thr->c;
786 c->stdio_filter = current;
787 c->stdio = &thr->thr.stdio;
790 * XXX: can we figure out a way to do this without mucking with c->opts?
792 unsigned old_fix_errors = c->opts.fix_errors;
793 if (opt_defined(thr->opts, fix_errors))
794 c->opts.fix_errors = thr->opts.fix_errors;
796 c->opts.fix_errors = FSCK_FIX_ask;
799 set_bit(BCH_FS_fsck_running, &c->flags);
801 c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
802 int ret = bch2_run_online_recovery_passes(c);
804 clear_bit(BCH_FS_fsck_running, &c->flags);
808 c->stdio_filter = NULL;
809 c->opts.fix_errors = old_fix_errors;
811 up(&c->online_fsck_mutex);
816 static const struct thread_with_stdio_ops bch2_online_fsck_ops = {
817 .exit = bch2_fsck_thread_exit,
818 .fn = bch2_fsck_online_thread_fn,
821 static long bch2_ioctl_fsck_online(struct bch_fs *c,
822 struct bch_ioctl_fsck_online arg)
824 struct fsck_thread *thr = NULL;
830 if (!capable(CAP_SYS_ADMIN))
833 if (!bch2_ro_ref_tryget(c))
836 if (down_trylock(&c->online_fsck_mutex)) {
841 thr = kzalloc(sizeof(*thr), GFP_KERNEL);
848 thr->opts = bch2_opts_empty();
851 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16);
853 ret = PTR_ERR_OR_ZERO(optstr) ?:
854 bch2_parse_mount_opts(c, &thr->opts, optstr);
862 ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops);
867 bch2_fsck_thread_exit(&thr->thr);
868 up(&c->online_fsck_mutex);
874 #define BCH_IOCTL(_name, _argtype) \
878 if (copy_from_user(&i, arg, sizeof(i))) \
880 ret = bch2_ioctl_##_name(c, i); \
884 long bch2_fs_ioctl(struct bch_fs *c, unsigned cmd, void __user *arg)
889 case BCH_IOCTL_QUERY_UUID:
890 return bch2_ioctl_query_uuid(c, arg);
891 case BCH_IOCTL_FS_USAGE:
892 return bch2_ioctl_fs_usage(c, arg);
893 case BCH_IOCTL_DEV_USAGE:
894 return bch2_ioctl_dev_usage(c, arg);
895 case BCH_IOCTL_DEV_USAGE_V2:
896 return bch2_ioctl_dev_usage_v2(c, arg);
898 case BCH_IOCTL_START:
899 BCH_IOCTL(start, struct bch_ioctl_start);
901 return bch2_ioctl_stop(c);
903 case BCH_IOCTL_READ_SUPER:
904 BCH_IOCTL(read_super, struct bch_ioctl_read_super);
905 case BCH_IOCTL_DISK_GET_IDX:
906 BCH_IOCTL(disk_get_idx, struct bch_ioctl_disk_get_idx);
909 if (!test_bit(BCH_FS_started, &c->flags))
913 case BCH_IOCTL_DISK_ADD:
914 BCH_IOCTL(disk_add, struct bch_ioctl_disk);
915 case BCH_IOCTL_DISK_REMOVE:
916 BCH_IOCTL(disk_remove, struct bch_ioctl_disk);
917 case BCH_IOCTL_DISK_ONLINE:
918 BCH_IOCTL(disk_online, struct bch_ioctl_disk);
919 case BCH_IOCTL_DISK_OFFLINE:
920 BCH_IOCTL(disk_offline, struct bch_ioctl_disk);
921 case BCH_IOCTL_DISK_SET_STATE:
922 BCH_IOCTL(disk_set_state, struct bch_ioctl_disk_set_state);
924 BCH_IOCTL(data, struct bch_ioctl_data);
925 case BCH_IOCTL_DISK_RESIZE:
926 BCH_IOCTL(disk_resize, struct bch_ioctl_disk_resize);
927 case BCH_IOCTL_DISK_RESIZE_JOURNAL:
928 BCH_IOCTL(disk_resize_journal, struct bch_ioctl_disk_resize_journal);
929 case BCH_IOCTL_FSCK_ONLINE:
930 BCH_IOCTL(fsck_online, struct bch_ioctl_fsck_online);
936 ret = bch2_err_class(ret);
940 static DEFINE_IDR(bch_chardev_minor);
942 static long bch2_chardev_ioctl(struct file *filp, unsigned cmd, unsigned long v)
944 unsigned minor = iminor(file_inode(filp));
945 struct bch_fs *c = minor < U8_MAX ? idr_find(&bch_chardev_minor, minor) : NULL;
946 void __user *arg = (void __user *) v;
949 ? bch2_fs_ioctl(c, cmd, arg)
950 : bch2_global_ioctl(cmd, arg);
953 static const struct file_operations bch_chardev_fops = {
954 .owner = THIS_MODULE,
955 .unlocked_ioctl = bch2_chardev_ioctl,
956 .open = nonseekable_open,
959 static int bch_chardev_major;
960 static const struct class bch_chardev_class = {
963 static struct device *bch_chardev;
965 void bch2_fs_chardev_exit(struct bch_fs *c)
967 if (!IS_ERR_OR_NULL(c->chardev))
968 device_unregister(c->chardev);
970 idr_remove(&bch_chardev_minor, c->minor);
973 int bch2_fs_chardev_init(struct bch_fs *c)
975 c->minor = idr_alloc(&bch_chardev_minor, c, 0, 0, GFP_KERNEL);
979 c->chardev = device_create(&bch_chardev_class, NULL,
980 MKDEV(bch_chardev_major, c->minor), c,
981 "bcachefs%u-ctl", c->minor);
982 if (IS_ERR(c->chardev))
983 return PTR_ERR(c->chardev);
988 void bch2_chardev_exit(void)
990 device_destroy(&bch_chardev_class, MKDEV(bch_chardev_major, U8_MAX));
991 class_unregister(&bch_chardev_class);
992 if (bch_chardev_major > 0)
993 unregister_chrdev(bch_chardev_major, "bcachefs");
996 int __init bch2_chardev_init(void)
1000 bch_chardev_major = register_chrdev(0, "bcachefs-ctl", &bch_chardev_fops);
1001 if (bch_chardev_major < 0)
1002 return bch_chardev_major;
1004 ret = class_register(&bch_chardev_class);
1008 bch_chardev = device_create(&bch_chardev_class, NULL,
1009 MKDEV(bch_chardev_major, U8_MAX),
1010 NULL, "bcachefs-ctl");
1011 if (IS_ERR(bch_chardev)) {
1012 ret = PTR_ERR(bch_chardev);
1019 class_unregister(&bch_chardev_class);
1021 unregister_chrdev(bch_chardev_major, "bcachefs-ctl");
1025 #endif /* NO_BCACHEFS_CHARDEV */