&dev_replace->scrub_progress, 0, 1);
ret = btrfs_dev_replace_finishing(fs_info, ret);
- if (ret == -EINPROGRESS) {
+ if (ret == -EINPROGRESS)
ret = BTRFS_IOCTL_DEV_REPLACE_RESULT_SCRUB_INPROGRESS;
- } else if (ret != -ECANCELED) {
- WARN_ON(ret);
- }
return ret;
/* replace the sysfs entry */
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
+ btrfs_sysfs_update_devid(tgt_device);
btrfs_rm_dev_replace_free_srcdev(src_device);
/* write back the superblocks */
#include <linux/sched/mm.h>
#include <crypto/hash.h>
#include "ctree.h"
+ #include "discard.h"
#include "volumes.h"
#include "disk-io.h"
#include "ordered-data.h"
* This can easily boost the amount of SYSTEM chunks if cleaner
* thread can't be triggered fast enough, and use up all space
* of btrfs_super_block::sys_chunk_array
+ *
+ * While for dev replace, we need to try our best to mark block
+ * group RO, to prevent race between:
+ * - Write duplication
+ * Contains latest data
+ * - Scrub copy
+ * Contains data from commit tree
+ *
+ * If target block group is not marked RO, nocow writes can
+ * be overwritten by scrub copy, causing data corruption.
+ * So for dev-replace, it's not allowed to continue if a block
+ * group is not RO.
*/
- ret = btrfs_inc_block_group_ro(cache, false);
- scrub_pause_off(fs_info);
-
+ ret = btrfs_inc_block_group_ro(cache, sctx->is_dev_replace);
if (ret == 0) {
ro_set = 1;
- } else if (ret == -ENOSPC) {
+ } else if (ret == -ENOSPC && !sctx->is_dev_replace) {
/*
* btrfs_inc_block_group_ro return -ENOSPC when it
* failed in creating new chunk for metadata.
- * It is not a problem for scrub/replace, because
+ * It is not a problem for scrub, because
* metadata are always cowed, and our scrub paused
* commit_transactions.
*/
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
btrfs_put_block_group(cache);
+ scrub_pause_off(fs_info);
break;
}
+ /*
+ * Now the target block is marked RO, wait for nocow writes to
+ * finish before dev-replace.
+ * COW is fine, as COW never overwrites extents in commit tree.
+ */
+ if (sctx->is_dev_replace) {
+ btrfs_wait_nocow_writers(cache);
+ btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
+ cache->length);
+ }
+
+ scrub_pause_off(fs_info);
down_write(&dev_replace->rwsem);
dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset;
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
cache->used == 0) {
spin_unlock(&cache->lock);
- btrfs_mark_bg_unused(cache);
+ if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
+ btrfs_discard_queue_work(&fs_info->discard_ctl,
+ cache);
+ else
+ btrfs_mark_bg_unused(cache);
} else {
spin_unlock(&cache->lock);
}
/* per-device scrub information */
struct scrub_ctx *scrub_ctx;
- struct btrfs_work work;
-
/* readahead state */
atomic_t reada_in_flight;
u64 reada_next;
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
struct extent_io_tree alloc_state;
+
+ struct completion kobj_unregister;
+ /* For sysfs/FSID/devinfo/devid/ */
+ struct kobject devid_kobj;
};
/*
write_seqcount_end(&dev->data_seqcount); \
preempt_enable(); \
}
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
#define BTRFS_DEVICE_GETSET_FUNCS(name) \
static inline u64 \
btrfs_device_get_##name(const struct btrfs_device *dev) \
struct btrfs_fs_info *fs_info;
/* sysfs kobjects */
struct kobject fsid_kobj;
- struct kobject *device_dir_kobj;
+ struct kobject *devices_kobj;
struct completion kobj_unregister;
};
struct btrfs_bio **bbio_ret);
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
- int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
- u64 physical, u64 **logical, int *naddrs, int *stripe_len);
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);