]> Git Repo - qemu.git/blob - block/backup.c
block/backup: add 'never' policy to bitmap sync mode
[qemu.git] / block / backup.c
1 /*
2  * QEMU backup
3  *
4  * Copyright (C) 2013 Proxmox Server Solutions
5  *
6  * Authors:
7  *  Dietmar Maurer ([email protected])
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  *
12  */
13
14 #include "qemu/osdep.h"
15
16 #include "trace.h"
17 #include "block/block.h"
18 #include "block/block_int.h"
19 #include "block/blockjob_int.h"
20 #include "block/block_backup.h"
21 #include "qapi/error.h"
22 #include "qapi/qmp/qerror.h"
23 #include "qemu/ratelimit.h"
24 #include "qemu/cutils.h"
25 #include "sysemu/block-backend.h"
26 #include "qemu/bitmap.h"
27 #include "qemu/error-report.h"
28
29 #define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
30
31 typedef struct CowRequest {
32     int64_t start_byte;
33     int64_t end_byte;
34     QLIST_ENTRY(CowRequest) list;
35     CoQueue wait_queue; /* coroutines blocked on this request */
36 } CowRequest;
37
38 typedef struct BackupBlockJob {
39     BlockJob common;
40     BlockBackend *target;
41     BdrvDirtyBitmap *sync_bitmap;
42     MirrorSyncMode sync_mode;
43     BitmapSyncMode bitmap_mode;
44     BlockdevOnError on_source_error;
45     BlockdevOnError on_target_error;
46     CoRwlock flush_rwlock;
47     uint64_t len;
48     uint64_t bytes_read;
49     int64_t cluster_size;
50     bool compress;
51     NotifierWithReturn before_write;
52     QLIST_HEAD(, CowRequest) inflight_reqs;
53
54     HBitmap *copy_bitmap;
55     bool use_copy_range;
56     int64_t copy_range_size;
57
58     bool serialize_target_writes;
59 } BackupBlockJob;
60
61 static const BlockJobDriver backup_job_driver;
62
63 /* See if in-flight requests overlap and wait for them to complete */
64 static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
65                                                        int64_t start,
66                                                        int64_t end)
67 {
68     CowRequest *req;
69     bool retry;
70
71     do {
72         retry = false;
73         QLIST_FOREACH(req, &job->inflight_reqs, list) {
74             if (end > req->start_byte && start < req->end_byte) {
75                 qemu_co_queue_wait(&req->wait_queue, NULL);
76                 retry = true;
77                 break;
78             }
79         }
80     } while (retry);
81 }
82
83 /* Keep track of an in-flight request */
84 static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
85                               int64_t start, int64_t end)
86 {
87     req->start_byte = start;
88     req->end_byte = end;
89     qemu_co_queue_init(&req->wait_queue);
90     QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
91 }
92
93 /* Forget about a completed request */
94 static void cow_request_end(CowRequest *req)
95 {
96     QLIST_REMOVE(req, list);
97     qemu_co_queue_restart_all(&req->wait_queue);
98 }
99
100 /* Copy range to target with a bounce buffer and return the bytes copied. If
101  * error occurred, return a negative error number */
102 static int coroutine_fn backup_cow_with_bounce_buffer(BackupBlockJob *job,
103                                                       int64_t start,
104                                                       int64_t end,
105                                                       bool is_write_notifier,
106                                                       bool *error_is_read,
107                                                       void **bounce_buffer)
108 {
109     int ret;
110     BlockBackend *blk = job->common.blk;
111     int nbytes;
112     int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
113     int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
114
115     assert(QEMU_IS_ALIGNED(start, job->cluster_size));
116     hbitmap_reset(job->copy_bitmap, start, job->cluster_size);
117     nbytes = MIN(job->cluster_size, job->len - start);
118     if (!*bounce_buffer) {
119         *bounce_buffer = blk_blockalign(blk, job->cluster_size);
120     }
121
122     ret = blk_co_pread(blk, start, nbytes, *bounce_buffer, read_flags);
123     if (ret < 0) {
124         trace_backup_do_cow_read_fail(job, start, ret);
125         if (error_is_read) {
126             *error_is_read = true;
127         }
128         goto fail;
129     }
130
131     if (buffer_is_zero(*bounce_buffer, nbytes)) {
132         ret = blk_co_pwrite_zeroes(job->target, start,
133                                    nbytes, write_flags | BDRV_REQ_MAY_UNMAP);
134     } else {
135         ret = blk_co_pwrite(job->target, start,
136                             nbytes, *bounce_buffer, write_flags |
137                             (job->compress ? BDRV_REQ_WRITE_COMPRESSED : 0));
138     }
139     if (ret < 0) {
140         trace_backup_do_cow_write_fail(job, start, ret);
141         if (error_is_read) {
142             *error_is_read = false;
143         }
144         goto fail;
145     }
146
147     return nbytes;
148 fail:
149     hbitmap_set(job->copy_bitmap, start, job->cluster_size);
150     return ret;
151
152 }
153
154 /* Copy range to target and return the bytes copied. If error occurred, return a
155  * negative error number. */
156 static int coroutine_fn backup_cow_with_offload(BackupBlockJob *job,
157                                                 int64_t start,
158                                                 int64_t end,
159                                                 bool is_write_notifier)
160 {
161     int ret;
162     int nr_clusters;
163     BlockBackend *blk = job->common.blk;
164     int nbytes;
165     int read_flags = is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0;
166     int write_flags = job->serialize_target_writes ? BDRV_REQ_SERIALISING : 0;
167
168     assert(QEMU_IS_ALIGNED(job->copy_range_size, job->cluster_size));
169     assert(QEMU_IS_ALIGNED(start, job->cluster_size));
170     nbytes = MIN(job->copy_range_size, end - start);
171     nr_clusters = DIV_ROUND_UP(nbytes, job->cluster_size);
172     hbitmap_reset(job->copy_bitmap, start, job->cluster_size * nr_clusters);
173     ret = blk_co_copy_range(blk, start, job->target, start, nbytes,
174                             read_flags, write_flags);
175     if (ret < 0) {
176         trace_backup_do_cow_copy_range_fail(job, start, ret);
177         hbitmap_set(job->copy_bitmap, start, job->cluster_size * nr_clusters);
178         return ret;
179     }
180
181     return nbytes;
182 }
183
184 static int coroutine_fn backup_do_cow(BackupBlockJob *job,
185                                       int64_t offset, uint64_t bytes,
186                                       bool *error_is_read,
187                                       bool is_write_notifier)
188 {
189     CowRequest cow_request;
190     int ret = 0;
191     int64_t start, end; /* bytes */
192     void *bounce_buffer = NULL;
193
194     qemu_co_rwlock_rdlock(&job->flush_rwlock);
195
196     start = QEMU_ALIGN_DOWN(offset, job->cluster_size);
197     end = QEMU_ALIGN_UP(bytes + offset, job->cluster_size);
198
199     trace_backup_do_cow_enter(job, start, offset, bytes);
200
201     wait_for_overlapping_requests(job, start, end);
202     cow_request_begin(&cow_request, job, start, end);
203
204     while (start < end) {
205         int64_t dirty_end;
206
207         if (!hbitmap_get(job->copy_bitmap, start)) {
208             trace_backup_do_cow_skip(job, start);
209             start += job->cluster_size;
210             continue; /* already copied */
211         }
212
213         dirty_end = hbitmap_next_zero(job->copy_bitmap, start, (end - start));
214         if (dirty_end < 0) {
215             dirty_end = end;
216         }
217
218         trace_backup_do_cow_process(job, start);
219
220         if (job->use_copy_range) {
221             ret = backup_cow_with_offload(job, start, dirty_end,
222                                           is_write_notifier);
223             if (ret < 0) {
224                 job->use_copy_range = false;
225             }
226         }
227         if (!job->use_copy_range) {
228             ret = backup_cow_with_bounce_buffer(job, start, dirty_end,
229                                                 is_write_notifier,
230                                                 error_is_read, &bounce_buffer);
231         }
232         if (ret < 0) {
233             break;
234         }
235
236         /* Publish progress, guest I/O counts as progress too.  Note that the
237          * offset field is an opaque progress value, it is not a disk offset.
238          */
239         start += ret;
240         job->bytes_read += ret;
241         job_progress_update(&job->common.job, ret);
242         ret = 0;
243     }
244
245     if (bounce_buffer) {
246         qemu_vfree(bounce_buffer);
247     }
248
249     cow_request_end(&cow_request);
250
251     trace_backup_do_cow_return(job, offset, bytes, ret);
252
253     qemu_co_rwlock_unlock(&job->flush_rwlock);
254
255     return ret;
256 }
257
258 static int coroutine_fn backup_before_write_notify(
259         NotifierWithReturn *notifier,
260         void *opaque)
261 {
262     BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
263     BdrvTrackedRequest *req = opaque;
264
265     assert(req->bs == blk_bs(job->common.blk));
266     assert(QEMU_IS_ALIGNED(req->offset, BDRV_SECTOR_SIZE));
267     assert(QEMU_IS_ALIGNED(req->bytes, BDRV_SECTOR_SIZE));
268
269     return backup_do_cow(job, req->offset, req->bytes, NULL, true);
270 }
271
272 static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
273 {
274     BdrvDirtyBitmap *bm;
275     BlockDriverState *bs = blk_bs(job->common.blk);
276
277     if (ret < 0 || job->bitmap_mode == BITMAP_SYNC_MODE_NEVER) {
278         /*
279          * Failure, or we don't want to synchronize the bitmap.
280          * Merge the successor back into the parent, delete nothing.
281          */
282         bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
283         assert(bm);
284     } else {
285         /* Everything is fine, delete this bitmap and install the backup. */
286         bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
287         assert(bm);
288     }
289 }
290
291 static void backup_commit(Job *job)
292 {
293     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
294     if (s->sync_bitmap) {
295         backup_cleanup_sync_bitmap(s, 0);
296     }
297 }
298
299 static void backup_abort(Job *job)
300 {
301     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
302     if (s->sync_bitmap) {
303         backup_cleanup_sync_bitmap(s, -1);
304     }
305 }
306
307 static void backup_clean(Job *job)
308 {
309     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
310     assert(s->target);
311     blk_unref(s->target);
312     s->target = NULL;
313
314     if (s->copy_bitmap) {
315         hbitmap_free(s->copy_bitmap);
316         s->copy_bitmap = NULL;
317     }
318 }
319
320 void backup_do_checkpoint(BlockJob *job, Error **errp)
321 {
322     BackupBlockJob *backup_job = container_of(job, BackupBlockJob, common);
323
324     assert(block_job_driver(job) == &backup_job_driver);
325
326     if (backup_job->sync_mode != MIRROR_SYNC_MODE_NONE) {
327         error_setg(errp, "The backup job only supports block checkpoint in"
328                    " sync=none mode");
329         return;
330     }
331
332     hbitmap_set(backup_job->copy_bitmap, 0, backup_job->len);
333 }
334
335 static void backup_drain(BlockJob *job)
336 {
337     BackupBlockJob *s = container_of(job, BackupBlockJob, common);
338
339     /* Need to keep a reference in case blk_drain triggers execution
340      * of backup_complete...
341      */
342     if (s->target) {
343         BlockBackend *target = s->target;
344         blk_ref(target);
345         blk_drain(target);
346         blk_unref(target);
347     }
348 }
349
350 static BlockErrorAction backup_error_action(BackupBlockJob *job,
351                                             bool read, int error)
352 {
353     if (read) {
354         return block_job_error_action(&job->common, job->on_source_error,
355                                       true, error);
356     } else {
357         return block_job_error_action(&job->common, job->on_target_error,
358                                       false, error);
359     }
360 }
361
362 static bool coroutine_fn yield_and_check(BackupBlockJob *job)
363 {
364     uint64_t delay_ns;
365
366     if (job_is_cancelled(&job->common.job)) {
367         return true;
368     }
369
370     /* We need to yield even for delay_ns = 0 so that bdrv_drain_all() can
371      * return. Without a yield, the VM would not reboot. */
372     delay_ns = block_job_ratelimit_get_delay(&job->common, job->bytes_read);
373     job->bytes_read = 0;
374     job_sleep_ns(&job->common.job, delay_ns);
375
376     if (job_is_cancelled(&job->common.job)) {
377         return true;
378     }
379
380     return false;
381 }
382
383 static bool bdrv_is_unallocated_range(BlockDriverState *bs,
384                                       int64_t offset, int64_t bytes)
385 {
386     int64_t end = offset + bytes;
387
388     while (offset < end && !bdrv_is_allocated(bs, offset, bytes, &bytes)) {
389         if (bytes == 0) {
390             return true;
391         }
392         offset += bytes;
393         bytes = end - offset;
394     }
395
396     return offset >= end;
397 }
398
399 static int coroutine_fn backup_loop(BackupBlockJob *job)
400 {
401     int ret;
402     bool error_is_read;
403     int64_t offset;
404     HBitmapIter hbi;
405     BlockDriverState *bs = blk_bs(job->common.blk);
406
407     hbitmap_iter_init(&hbi, job->copy_bitmap, 0);
408     while ((offset = hbitmap_iter_next(&hbi)) != -1) {
409         if (job->sync_mode == MIRROR_SYNC_MODE_TOP &&
410             bdrv_is_unallocated_range(bs, offset, job->cluster_size))
411         {
412             hbitmap_reset(job->copy_bitmap, offset, job->cluster_size);
413             continue;
414         }
415
416         do {
417             if (yield_and_check(job)) {
418                 return 0;
419             }
420             ret = backup_do_cow(job, offset,
421                                 job->cluster_size, &error_is_read, false);
422             if (ret < 0 && backup_error_action(job, error_is_read, -ret) ==
423                            BLOCK_ERROR_ACTION_REPORT)
424             {
425                 return ret;
426             }
427         } while (ret < 0);
428     }
429
430     return 0;
431 }
432
433 /* init copy_bitmap from sync_bitmap */
434 static void backup_incremental_init_copy_bitmap(BackupBlockJob *job)
435 {
436     uint64_t offset = 0;
437     uint64_t bytes = job->len;
438
439     while (bdrv_dirty_bitmap_next_dirty_area(job->sync_bitmap,
440                                              &offset, &bytes))
441     {
442         hbitmap_set(job->copy_bitmap, offset, bytes);
443
444         offset += bytes;
445         if (offset >= job->len) {
446             break;
447         }
448         bytes = job->len - offset;
449     }
450
451     /* TODO job_progress_set_remaining() would make more sense */
452     job_progress_update(&job->common.job,
453         job->len - hbitmap_count(job->copy_bitmap));
454 }
455
456 static int coroutine_fn backup_run(Job *job, Error **errp)
457 {
458     BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
459     BlockDriverState *bs = blk_bs(s->common.blk);
460     int ret = 0;
461
462     QLIST_INIT(&s->inflight_reqs);
463     qemu_co_rwlock_init(&s->flush_rwlock);
464
465     job_progress_set_remaining(job, s->len);
466
467     if (s->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
468         backup_incremental_init_copy_bitmap(s);
469     } else {
470         hbitmap_set(s->copy_bitmap, 0, s->len);
471     }
472
473     s->before_write.notify = backup_before_write_notify;
474     bdrv_add_before_write_notifier(bs, &s->before_write);
475
476     if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
477         /* All bits are set in copy_bitmap to allow any cluster to be copied.
478          * This does not actually require them to be copied. */
479         while (!job_is_cancelled(job)) {
480             /* Yield until the job is cancelled.  We just let our before_write
481              * notify callback service CoW requests. */
482             job_yield(job);
483         }
484     } else {
485         ret = backup_loop(s);
486     }
487
488     notifier_with_return_remove(&s->before_write);
489
490     /* wait until pending backup_do_cow() calls have completed */
491     qemu_co_rwlock_wrlock(&s->flush_rwlock);
492     qemu_co_rwlock_unlock(&s->flush_rwlock);
493
494     return ret;
495 }
496
497 static const BlockJobDriver backup_job_driver = {
498     .job_driver = {
499         .instance_size          = sizeof(BackupBlockJob),
500         .job_type               = JOB_TYPE_BACKUP,
501         .free                   = block_job_free,
502         .user_resume            = block_job_user_resume,
503         .drain                  = block_job_drain,
504         .run                    = backup_run,
505         .commit                 = backup_commit,
506         .abort                  = backup_abort,
507         .clean                  = backup_clean,
508     },
509     .drain                  = backup_drain,
510 };
511
512 static int64_t backup_calculate_cluster_size(BlockDriverState *target,
513                                              Error **errp)
514 {
515     int ret;
516     BlockDriverInfo bdi;
517
518     /*
519      * If there is no backing file on the target, we cannot rely on COW if our
520      * backup cluster size is smaller than the target cluster size. Even for
521      * targets with a backing file, try to avoid COW if possible.
522      */
523     ret = bdrv_get_info(target, &bdi);
524     if (ret == -ENOTSUP && !target->backing) {
525         /* Cluster size is not defined */
526         warn_report("The target block device doesn't provide "
527                     "information about the block size and it doesn't have a "
528                     "backing file. The default block size of %u bytes is "
529                     "used. If the actual block size of the target exceeds "
530                     "this default, the backup may be unusable",
531                     BACKUP_CLUSTER_SIZE_DEFAULT);
532         return BACKUP_CLUSTER_SIZE_DEFAULT;
533     } else if (ret < 0 && !target->backing) {
534         error_setg_errno(errp, -ret,
535             "Couldn't determine the cluster size of the target image, "
536             "which has no backing file");
537         error_append_hint(errp,
538             "Aborting, since this may create an unusable destination image\n");
539         return ret;
540     } else if (ret < 0 && target->backing) {
541         /* Not fatal; just trudge on ahead. */
542         return BACKUP_CLUSTER_SIZE_DEFAULT;
543     }
544
545     return MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
546 }
547
548 BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
549                   BlockDriverState *target, int64_t speed,
550                   MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
551                   BitmapSyncMode bitmap_mode,
552                   bool compress,
553                   BlockdevOnError on_source_error,
554                   BlockdevOnError on_target_error,
555                   int creation_flags,
556                   BlockCompletionFunc *cb, void *opaque,
557                   JobTxn *txn, Error **errp)
558 {
559     int64_t len;
560     BackupBlockJob *job = NULL;
561     int ret;
562     int64_t cluster_size;
563     HBitmap *copy_bitmap = NULL;
564
565     assert(bs);
566     assert(target);
567
568     if (bs == target) {
569         error_setg(errp, "Source and target cannot be the same");
570         return NULL;
571     }
572
573     if (!bdrv_is_inserted(bs)) {
574         error_setg(errp, "Device is not inserted: %s",
575                    bdrv_get_device_name(bs));
576         return NULL;
577     }
578
579     if (!bdrv_is_inserted(target)) {
580         error_setg(errp, "Device is not inserted: %s",
581                    bdrv_get_device_name(target));
582         return NULL;
583     }
584
585     if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) {
586         error_setg(errp, "Compression is not supported for this drive %s",
587                    bdrv_get_device_name(target));
588         return NULL;
589     }
590
591     if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
592         return NULL;
593     }
594
595     if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
596         return NULL;
597     }
598
599     /* QMP interface should have handled translating this to bitmap mode */
600     assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);
601
602     if (sync_mode == MIRROR_SYNC_MODE_BITMAP) {
603         if (!sync_bitmap) {
604             error_setg(errp, "must provide a valid bitmap name for "
605                        "'%s' sync mode", MirrorSyncMode_str(sync_mode));
606             return NULL;
607         }
608
609         /* Create a new bitmap, and freeze/disable this one. */
610         if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
611             return NULL;
612         }
613     } else if (sync_bitmap) {
614         error_setg(errp,
615                    "a bitmap was given to backup_job_create, "
616                    "but it received an incompatible sync_mode (%s)",
617                    MirrorSyncMode_str(sync_mode));
618         return NULL;
619     }
620
621     len = bdrv_getlength(bs);
622     if (len < 0) {
623         error_setg_errno(errp, -len, "unable to get length for '%s'",
624                          bdrv_get_device_name(bs));
625         goto error;
626     }
627
628     cluster_size = backup_calculate_cluster_size(target, errp);
629     if (cluster_size < 0) {
630         goto error;
631     }
632
633     copy_bitmap = hbitmap_alloc(len, ctz32(cluster_size));
634
635     /* job->len is fixed, so we can't allow resize */
636     job = block_job_create(job_id, &backup_job_driver, txn, bs,
637                            BLK_PERM_CONSISTENT_READ,
638                            BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
639                            BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD,
640                            speed, creation_flags, cb, opaque, errp);
641     if (!job) {
642         goto error;
643     }
644
645     /* The target must match the source in size, so no resize here either */
646     job->target = blk_new(job->common.job.aio_context,
647                           BLK_PERM_WRITE,
648                           BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE |
649                           BLK_PERM_WRITE_UNCHANGED | BLK_PERM_GRAPH_MOD);
650     ret = blk_insert_bs(job->target, target, errp);
651     if (ret < 0) {
652         goto error;
653     }
654     blk_set_disable_request_queuing(job->target, true);
655
656     job->on_source_error = on_source_error;
657     job->on_target_error = on_target_error;
658     job->sync_mode = sync_mode;
659     job->sync_bitmap = sync_bitmap;
660     job->bitmap_mode = bitmap_mode;
661     job->compress = compress;
662
663     /* Detect image-fleecing (and similar) schemes */
664     job->serialize_target_writes = bdrv_chain_contains(target, bs);
665     job->cluster_size = cluster_size;
666     job->copy_bitmap = copy_bitmap;
667     copy_bitmap = NULL;
668     job->use_copy_range = !compress; /* compression isn't supported for it */
669     job->copy_range_size = MIN_NON_ZERO(blk_get_max_transfer(job->common.blk),
670                                         blk_get_max_transfer(job->target));
671     job->copy_range_size = MAX(job->cluster_size,
672                                QEMU_ALIGN_UP(job->copy_range_size,
673                                              job->cluster_size));
674
675     /* Required permissions are already taken with target's blk_new() */
676     block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
677                        &error_abort);
678     job->len = len;
679
680     return &job->common;
681
682  error:
683     if (copy_bitmap) {
684         assert(!job || !job->copy_bitmap);
685         hbitmap_free(copy_bitmap);
686     }
687     if (sync_bitmap) {
688         bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
689     }
690     if (job) {
691         backup_clean(&job->common.job);
692         job_early_fail(&job->common.job);
693     }
694
695     return NULL;
696 }
This page took 0.059861 seconds and 4 git commands to generate.