* QEMU backup
*
* Copyright (C) 2013 Proxmox Server Solutions
+ * Copyright (c) 2019 Virtuozzo International GmbH.
*
* Authors:
#include "block/block_int.h"
#include "block/blockjob_int.h"
#include "block/block_backup.h"
+#include "block/block-copy.h"
#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
#include "qemu/error-report.h"
-#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
+#include "block/backup-top.h"
-typedef struct CowRequest {
- int64_t start_byte;
- int64_t end_byte;
- QLIST_ENTRY(CowRequest) list;
- CoQueue wait_queue; /* coroutines blocked on this request */
-} CowRequest;
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
typedef struct BackupBlockJob {
BlockJob common;
- BlockBackend *target;
+ BlockDriverState *backup_top;
+ BlockDriverState *source_bs;
BdrvDirtyBitmap *sync_bitmap;
- BdrvDirtyBitmap *copy_bitmap;
MirrorSyncMode sync_mode;
BitmapSyncMode bitmap_mode;
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
- CoRwlock flush_rwlock;
uint64_t len;
uint64_t bytes_read;
int64_t cluster_size;
- NotifierWithReturn before_write;
- QLIST_HEAD(, CowRequest) inflight_reqs;
-
- bool use_copy_range;
- int64_t copy_range_size;
- BdrvRequestFlags write_flags;
- bool initializing_bitmap;
+ BlockCopyState *bcs;
} BackupBlockJob;
static const BlockJobDriver backup_job_driver;
-/* See if in-flight requests overlap and wait for them to complete */
-static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
- int64_t start,
- int64_t end)
-{
- CowRequest *req;
- bool retry;
-
- do {
- retry = false;
- QLIST_FOREACH(req, &job->inflight_reqs, list) {
- if (end > req->start_byte && start < req->end_byte) {
- qemu_co_queue_wait(&req->wait_queue, NULL);
- retry = true;
- break;
- }
- }
- } while (retry);
-}
-
-/* Keep track of an in-flight request */
-static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
- int64_t start, int64_t end)
-{
- req->start_byte = start;
- req->end_byte = end;
- qemu_co_queue_init(&req->wait_queue);
- QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
-}
-
-/* Forget about a completed request */
-static void cow_request_end(CowRequest *req)
-{
- QLIST_REMOVE(req, list);
- qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-/* Copy range to target with a bounce buffer and return the bytes copied. If
- * error occurred, return a negative error number */
-static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
- int64_t start,
- int64_t end,
- bool is_write_notifier,
- bool *error_is_read,
- void **bounce_buffer)
-{
- int ret;
- BlockBackend *blk = job->common.blk;
- int nbytes;
- int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
-
- assert(QEMU_IS_ALIGNED(start, job->cluster_size));
- bdrv_reset_dirty_bitmap(job->copy_bitmap, start, job->cluster_size);
- nbytes = MIN(job->cluster_size, job->len - start);
- if (!*bounce_buffer) {
- *bounce_buffer = blk_blockalign(blk, job->cluster_size);
- }
-
- ret = blk_co_pread(blk, start, nbytes, *bounce_buffer, read_flags);
- if (ret < 0) {
- trace_backup_do_cow_read_fail(job, start, ret);
- if (error_is_read) {
- *error_is_read = true;
- }
- goto fail;
- }
-
- ret = blk_co_pwrite(job->target, start, nbytes, *bounce_buffer,
- job->write_flags);
- if (ret < 0) {
- trace_backup_do_cow_write_fail(job, start, ret);
- if (error_is_read) {
- *error_is_read = false;
- }
- goto fail;
- }
-
- return nbytes;
-fail:
- bdrv_set_dirty_bitmap(job->copy_bitmap, start, job->cluster_size);
- return ret;
-
-}
-
-/* Copy range to target and return the bytes copied. If error occurred, return a
- * negative error number. */
-static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
- int64_t start,
- int64_t end,
- bool is_write_notifier)
-{
- int ret;
- int nr_clusters;
- BlockBackend *blk = job->common.blk;
- int nbytes;
- int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
-
- assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
- assert(QEMU_IS_ALIGNED(start, job->cluster_size));
- nbytes = MIN(job->copy_range_size, MIN(end, job->len) - start);
- nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
- bdrv_reset_dirty_bitmap(job->copy_bitmap, start,
- job->cluster_size * nr_clusters);
- ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
- read_flags, job->write_flags);
- if (ret < 0) {
- trace_backup_do_cow_copy_range_fail(job, start, ret);
- bdrv_set_dirty_bitmap(job->copy_bitmap, start,
- job->cluster_size * nr_clusters);
- return ret;
- }
-
- return nbytes;
-}
-
-/*
- * Check if the cluster starting at offset is allocated or not.
- * return via pnum the number of contiguous clusters sharing this allocation.
- */
-static int backup_is_cluster_allocated(BackupBlockJob *s, int64_t offset,
- int64_t *pnum)
+static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
{
- BlockDriverState *bs = blk_bs(s->common.blk);
- int64_t count, total_count = 0;
- int64_t bytes = s->len - offset;
- int ret;
-
- assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-
- while (true) {
- ret = bdrv_is_allocated(bs, offset, bytes, &count);
- if (ret < 0) {
- return ret;
- }
-
- total_count += count;
-
- if (ret || count == 0) {
- /*
- * ret: partial segment(s) are considered allocated.
- * otherwise: unallocated tail is treated as an entire segment.
- */
- *pnum = DIV_ROUND_UP(total_count, s->cluster_size);
- return ret;
- }
+ BackupBlockJob *s = opaque;
- /* Unallocated segment(s) with uncertain following segment(s) */
- if (total_count >= s->cluster_size) {
- *pnum = total_count / s->cluster_size;
- return 0;
- }
-
- offset += count;
- bytes -= count;
- }
+ s->bytes_read += bytes;
+ job_progress_update(&s->common.job, bytes);
}
-/**
- * Reset bits in copy_bitmap starting at offset if they represent unallocated
- * data in the image. May reset subsequent contiguous bits.
- * @return 0 when the cluster at @offset was unallocated,
- * 1 otherwise, and -ret on error.
- */
-static int64_t backup_bitmap_reset_unallocated(BackupBlockJob *s,
- int64_t offset, int64_t *count)
+static void backup_progress_reset_callback(void *opaque)
{
- int ret;
- int64_t clusters, bytes, estimate;
-
- ret = backup_is_cluster_allocated(s, offset, &clusters);
- if (ret < 0) {
- return ret;
- }
-
- bytes = clusters * s->cluster_size;
+ BackupBlockJob *s = opaque;
+ uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap);
- if (!ret) {
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
- estimate = bdrv_get_dirty_count(s->copy_bitmap);
- job_progress_set_remaining(&s->common.job, estimate);
- }
-
- *count = bytes;
- return ret;
-}
-
-static int coroutine_fn backup_do_copy(BackupBlockJob *job,
- int64_t start, uint64_t bytes,
- bool *error_is_read,
- bool is_write_notifier)
-{
- int ret = 0;
- int64_t end = bytes + start; /* bytes */
- void *bounce_buffer = NULL;
- int64_t status_bytes;
-
- assert(QEMU_IS_ALIGNED(start, job->cluster_size));
- assert(QEMU_IS_ALIGNED(end, job->cluster_size));
-
- while (start < end) {
- int64_t dirty_end;
-
- if (!bdrv_dirty_bitmap_get(job->copy_bitmap, start)) {
- trace_backup_do_cow_skip(job, start);
- start += job->cluster_size;
- continue; /* already copied */
- }
-
- dirty_end = bdrv_dirty_bitmap_next_zero(job->copy_bitmap, start,
- (end - start));
- if (dirty_end < 0) {
- dirty_end = end;
- }
-
- if (job->initializing_bitmap) {
- ret = backup_bitmap_reset_unallocated(job, start, &status_bytes);
- if (ret == 0) {
- trace_backup_do_cow_skip_range(job, start, status_bytes);
- start += status_bytes;
- continue;
- }
- /* Clamp to known allocated region */
- dirty_end = MIN(dirty_end, start + status_bytes);
- }
-
- trace_backup_do_cow_process(job, start);
-
- if (job->use_copy_range) {
- ret = backup_cow_with_offload(job, start, dirty_end,
- is_write_notifier);
- if (ret < 0) {
- job->use_copy_range = false;
- }
- }
- if (!job->use_copy_range) {
- ret = backup_cow_with_bounce_buffer(job, start, dirty_end,
- is_write_notifier,
- error_is_read, &bounce_buffer);
- }
- if (ret < 0) {
- break;
- }
-
- /* Publish progress, guest I/O counts as progress too. Note that the
- * offset field is an opaque progress value, it is not a disk offset.
- */
- start += ret;
- job->bytes_read += ret;
- job_progress_update(&job->common.job, ret);
- ret = 0;
- }
-
- if (bounce_buffer) {
- qemu_vfree(bounce_buffer);
- }
-
- return ret;
+ job_progress_set_remaining(&s->common.job, estimate);
}
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
int64_t offset, uint64_t bytes,
- bool *error_is_read,
- bool is_write_notifier)
+ bool *error_is_read)
{
- CowRequest cow_request;
int ret = 0;
int64_t start, end; /* bytes */
- qemu_co_rwlock_rdlock(&job->flush_rwlock);
-
start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
trace_backup_do_cow_enter(job, start, offset, bytes);
- wait_for_overlapping_requests(job, start, end);
- cow_request_begin(&cow_request, job, start, end);
-
- ret = backup_do_copy(job, start, end - start, error_is_read,
- is_write_notifier);
-
- cow_request_end(&cow_request);
+ ret = block_copy(job->bcs, start, end - start, error_is_read);
trace_backup_do_cow_return(job, offset, bytes, ret);
- qemu_co_rwlock_unlock(&job->flush_rwlock);
-
return ret;
}
-static int coroutine_fn backup_before_write_notify(
- NotifierWithReturn *notifier,
- void *opaque)
-{
- BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
- BdrvTrackedRequest *req = opaque;
-
- assert(req->bs == blk_bs(job->common.blk));
- assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
- assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
-
- return backup_do_cow(job, req->offset, req->bytes, NULL, true);
-}
-
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
{
BdrvDirtyBitmap *bm;
- BlockDriverState *bs = blk_bs(job->common.blk);
bool sync = (((ret == 0) || (job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS)) \
&& (job->bitmap_mode != BITMAP_SYNC_MODE_NEVER));
* We succeeded, or we always intended to sync the bitmap.
* Delete this bitmap and install the child.
*/
- bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+ bm = bdrv_dirty_bitmap_abdicate(job->sync_bitmap, NULL);
} else {
/*
* We failed, or we never intended to sync the bitmap anyway.
* Merge the successor back into the parent, keeping all data.
*/
- bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+ bm = bdrv_reclaim_dirty_bitmap(job->sync_bitmap, NULL);
}
assert(bm);
if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
/* If we failed and synced, merge in the bits we didn't copy: */
- bdrv_dirty_bitmap_merge_internal(bm, job->copy_bitmap,
+ bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap,
NULL, true);
}
}
static void backup_clean(Job *job)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
- BlockDriverState *bs = blk_bs(s->common.blk);
+ AioContext *aio_context = bdrv_get_aio_context(s->backup_top);
- if (s->copy_bitmap) {
- bdrv_release_dirty_bitmap(bs, s->copy_bitmap);
- s->copy_bitmap = NULL;
- }
-
- assert(s->target);
- blk_unref(s->target);
- s->target = NULL;
+ aio_context_acquire(aio_context);
+ bdrv_backup_top_drop(s->backup_top);
+ aio_context_release(aio_context);
}
void backup_do_checkpoint(BlockJob *job, Error **errp)
return;
}
- bdrv_set_dirty_bitmap(backup_job->copy_bitmap, 0, backup_job->len);
+ bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len);
}
static BlockErrorAction backup_error_action(BackupBlockJob *job,
return true;
}
- /* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
- * return. Without a yield, the VM would not reboot. */
+ /*
+ * We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
+ * return. Without a yield, the VM would not reboot.
+ */
delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
job->bytes_read = 0;
job_sleep_ns(&job->common.job, delay_ns);
BdrvDirtyBitmapIter *bdbi;
int ret = 0;
- bdbi = bdrv_dirty_iter_new(job->copy_bitmap);
+ bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap);
while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
do {
if (yield_and_check(job)) {
goto out;
}
- ret = backup_do_cow(job, offset,
- job->cluster_size, &error_is_read, false);
+ ret = backup_do_cow(job, offset, job->cluster_size, &error_is_read);
if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
BLOCK_ERROR_ACTION_REPORT)
{
uint64_t estimate;
if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
- ret = bdrv_dirty_bitmap_merge_internal(job->copy_bitmap,
+ ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap,
job->sync_bitmap,
NULL, true);
assert(ret);
* We can't hog the coroutine to initialize this thoroughly.
* Set a flag and resume work when we are able to yield safely.
*/
- job->initializing_bitmap = true;
+ job->bcs->skip_unallocated = true;
}
- bdrv_set_dirty_bitmap(job->copy_bitmap, 0, job->len);
+ bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len);
}
- estimate = bdrv_get_dirty_count(job->copy_bitmap);
+ estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap);
job_progress_set_remaining(&job->common.job, estimate);
}
static int coroutine_fn backup_run(Job *job, Error **errp)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
- BlockDriverState *bs = blk_bs(s->common.blk);
int ret = 0;
- QLIST_INIT(&s->inflight_reqs);
- qemu_co_rwlock_init(&s->flush_rwlock);
-
backup_init_copy_bitmap(s);
- s->before_write.notify = backup_before_write_notify;
- bdrv_add_before_write_notifier(bs, &s->before_write);
-
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
goto out;
}
- ret = backup_bitmap_reset_unallocated(s, offset, &count);
+ ret = block_copy_reset_unallocated(s->bcs, offset, &count);
if (ret < 0) {
goto out;
}
offset += count;
}
- s->initializing_bitmap = false;
+ s->bcs->skip_unallocated = false;
}
if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
- /* All bits are set in copy_bitmap to allow any cluster to be copied.
- * This does not actually require them to be copied. */
+ /*
+ * All bits are set in copy_bitmap to allow any cluster to be copied.
+ * This does not actually require them to be copied.
+ */
while (!job_is_cancelled(job)) {
- /* Yield until the job is cancelled. We just let our before_write
- * notify callback service CoW requests. */
+ /*
+ * Yield until the job is cancelled. We just let our before_write
+ * notify callback service CoW requests.
+ */
job_yield(job);
}
} else {
}
out:
- notifier_with_return_remove(&s->before_write);
-
- /* wait until pending backup_do_cow() calls have completed */
- qemu_co_rwlock_wrlock(&s->flush_rwlock);
- qemu_co_rwlock_unlock(&s->flush_rwlock);
-
return ret;
}
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
bool compress,
+ const char *filter_node_name,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
int creation_flags,
{
int64_t len;
BackupBlockJob *job = NULL;
- int ret;
int64_t cluster_size;
- BdrvDirtyBitmap *copy_bitmap = NULL;
+ BdrvRequestFlags write_flags;
+ BlockDriverState *backup_top = NULL;
+ BlockCopyState *bcs = NULL;
assert(bs);
assert(target);
}
/* Create a new bitmap, and freeze/disable this one. */
- if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
+ if (bdrv_dirty_bitmap_create_successor(sync_bitmap, errp) < 0) {
return NULL;
}
}
goto error;
}
- copy_bitmap = bdrv_create_dirty_bitmap(bs, cluster_size, NULL, errp);
- if (!copy_bitmap) {
- goto error;
- }
- bdrv_disable_dirty_bitmap(copy_bitmap);
-
- /* job->len is fixed, so we can't allow resize */
- job = block_job_create(job_id, &backup_job_driver, txn, bs,
- BLK_PERM_CONSISTENT_READ,
- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
- BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
- speed, creation_flags, cb, opaque, errp);
- if (!job) {
- goto error;
- }
-
- /* The target must match the source in size, so no resize here either */
- job->target = blk_new(job->common.job.aio_context,
- BLK_PERM_WRITE,
- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
- BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
- ret = blk_insert_bs(job->target, target, errp);
- if (ret < 0) {
- goto error;
- }
- blk_set_disable_request_queuing(job->target, true);
-
- job->on_source_error = on_source_error;
- job->on_target_error = on_target_error;
- job->sync_mode = sync_mode;
- job->sync_bitmap = sync_bitmap;
- job->bitmap_mode = bitmap_mode;
-
/*
* If source is in backing chain of target assume that target is going to be
* used for "image fleecing", i.e. it should represent a kind of snapshot of
* For more information see commit f8d59dfb40bb and test
* tests/qemu-iotests/222
*/
- job->write_flags =
- (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
- (compress ? BDRV_REQ_WRITE_COMPRESSED : 0);
+ write_flags = (bdrv_chain_contains(target, bs) ? BDRV_REQ_SERIALISING : 0) |
+ (compress ? BDRV_REQ_WRITE_COMPRESSED : 0),
+
+ backup_top = bdrv_backup_top_append(bs, target, filter_node_name,
+ cluster_size, write_flags, &bcs, errp);
+ if (!backup_top) {
+ goto error;
+ }
+ /* job->len is fixed, so we can't allow resize */
+ job = block_job_create(job_id, &backup_job_driver, txn, backup_top,
+ 0, BLK_PERM_ALL,
+ speed, creation_flags, cb, opaque, errp);
+ if (!job) {
+ goto error;
+ }
+
+ job->backup_top = backup_top;
+ job->source_bs = bs;
+ job->on_source_error = on_source_error;
+ job->on_target_error = on_target_error;
+ job->sync_mode = sync_mode;
+ job->sync_bitmap = sync_bitmap;
+ job->bitmap_mode = bitmap_mode;
+ job->bcs = bcs;
job->cluster_size = cluster_size;
- job->copy_bitmap = copy_bitmap;
- copy_bitmap = NULL;
- job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
- blk_get_max_transfer(job->target));
- job->copy_range_size = QEMU_ALIGN_DOWN(job->copy_range_size,
- job->cluster_size);
- /*
- * Set use_copy_range, consider the following:
- * 1. Compression is not supported for copy_range.
- * 2. copy_range does not respect max_transfer (it's a TODO), so we factor
- * that in here. If max_transfer is smaller than the job->cluster_size,
- * we do not use copy_range (in that case it's zero after aligning down
- * above).
- */
- job->use_copy_range = !compress && job->copy_range_size > 0;
+ job->len = len;
+
+ block_copy_set_callbacks(bcs, backup_progress_bytes_callback,
+ backup_progress_reset_callback, job);
- /* Required permissions are already taken with target's blk_new() */
+ /* Required permissions are already taken by backup-top target */
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
&error_abort);
- job->len = len;
return &job->common;
error:
- if (copy_bitmap) {
- assert(!job || !job->copy_bitmap);
- bdrv_release_dirty_bitmap(bs, copy_bitmap);
- }
if (sync_bitmap) {
- bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
+ bdrv_reclaim_dirty_bitmap(sync_bitmap, NULL);
}
- if (job) {
- backup_clean(&job->common.job);
- job_early_fail(&job->common.job);
+ if (backup_top) {
+ bdrv_backup_top_drop(backup_top);
}
return NULL;