]> Git Repo - qemu.git/blob - block-migration.c
block migration: Add support for restore progress reporting
[qemu.git] / block-migration.c
1 /*
2  * QEMU live block migration
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Liran Schour   <[email protected]>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13
14 #include "qemu-common.h"
15 #include "block_int.h"
16 #include "hw/hw.h"
17 #include "qemu-queue.h"
18 #include "monitor.h"
19 #include "block-migration.h"
20 #include <assert.h>
21
22 #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
23
24 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
25 #define BLK_MIG_FLAG_EOS                0x02
26 #define BLK_MIG_FLAG_PROGRESS           0x04
27
28 #define MAX_IS_ALLOCATED_SEARCH 65536
29 #define MAX_BLOCKS_READ 10000
30 #define BLOCKS_READ_CHANGE 100
31 #define INITIAL_BLOCKS_READ 100
32
33 //#define DEBUG_BLK_MIGRATION
34
35 #ifdef DEBUG_BLK_MIGRATION
36 #define dprintf(fmt, ...) \
37     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
38 #else
39 #define dprintf(fmt, ...) \
40     do { } while (0)
41 #endif
42
43 typedef struct BlkMigDevState {
44     BlockDriverState *bs;
45     int bulk_completed;
46     int shared_base;
47     int64_t cur_sector;
48     int64_t completed_sectors;
49     int64_t total_sectors;
50     int64_t dirty;
51     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
52 } BlkMigDevState;
53
54 typedef struct BlkMigBlock {
55     uint8_t *buf;
56     BlkMigDevState *bmds;
57     int64_t sector;
58     struct iovec iov;
59     QEMUIOVector qiov;
60     BlockDriverAIOCB *aiocb;
61     int ret;
62     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
63 } BlkMigBlock;
64
65 typedef struct BlkMigState {
66     int blk_enable;
67     int shared_base;
68     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
69     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
70     int submitted;
71     int read_done;
72     int transferred;
73     int64_t total_sector_sum;
74     int prev_progress;
75 } BlkMigState;
76
77 static BlkMigState block_mig_state;
78
79 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
80 {
81     int len;
82
83     /* sector number and flags */
84     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
85                      | BLK_MIG_FLAG_DEVICE_BLOCK);
86
87     /* device name */
88     len = strlen(blk->bmds->bs->device_name);
89     qemu_put_byte(f, len);
90     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
91
92     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
93 }
94
95 int blk_mig_active(void)
96 {
97     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
98 }
99
100 uint64_t blk_mig_bytes_transferred(void)
101 {
102     BlkMigDevState *bmds;
103     uint64_t sum = 0;
104
105     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
106         sum += bmds->completed_sectors;
107     }
108     return sum << BDRV_SECTOR_BITS;
109 }
110
111 uint64_t blk_mig_bytes_remaining(void)
112 {
113     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
114 }
115
116 uint64_t blk_mig_bytes_total(void)
117 {
118     BlkMigDevState *bmds;
119     uint64_t sum = 0;
120
121     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
122         sum += bmds->total_sectors;
123     }
124     return sum << BDRV_SECTOR_BITS;
125 }
126
127 static void blk_mig_read_cb(void *opaque, int ret)
128 {
129     BlkMigBlock *blk = opaque;
130
131     blk->ret = ret;
132
133     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
134
135     block_mig_state.submitted--;
136     block_mig_state.read_done++;
137     assert(block_mig_state.submitted >= 0);
138 }
139
140 static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
141                                 BlkMigDevState *bmds, int is_async)
142 {
143     int64_t total_sectors = bmds->total_sectors;
144     int64_t cur_sector = bmds->cur_sector;
145     BlockDriverState *bs = bmds->bs;
146     BlkMigBlock *blk;
147     int nr_sectors;
148
149     if (bmds->shared_base) {
150         while (cur_sector < total_sectors &&
151                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
152                                   &nr_sectors)) {
153             cur_sector += nr_sectors;
154         }
155     }
156
157     if (cur_sector >= total_sectors) {
158         bmds->cur_sector = bmds->completed_sectors = total_sectors;
159         return 1;
160     }
161
162     bmds->completed_sectors = cur_sector;
163
164     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
165
166     /* we are going to transfer a full block even if it is not allocated */
167     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
168
169     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
170         nr_sectors = total_sectors - cur_sector;
171     }
172
173     blk = qemu_malloc(sizeof(BlkMigBlock));
174     blk->buf = qemu_malloc(BLOCK_SIZE);
175     blk->bmds = bmds;
176     blk->sector = cur_sector;
177
178     if (is_async) {
179         blk->iov.iov_base = blk->buf;
180         blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
181         qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
182
183         blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
184                                     nr_sectors, blk_mig_read_cb, blk);
185         if (!blk->aiocb) {
186             goto error;
187         }
188         block_mig_state.submitted++;
189     } else {
190         if (bdrv_read(bs, cur_sector, blk->buf, nr_sectors) < 0) {
191             goto error;
192         }
193         blk_send(f, blk);
194
195         qemu_free(blk->buf);
196         qemu_free(blk);
197     }
198
199     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
200     bmds->cur_sector = cur_sector + nr_sectors;
201
202     return (bmds->cur_sector >= total_sectors);
203
204 error:
205     monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
206     qemu_file_set_error(f);
207     qemu_free(blk->buf);
208     qemu_free(blk);
209     return 0;
210 }
211
212 static void set_dirty_tracking(int enable)
213 {
214     BlkMigDevState *bmds;
215
216     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
217         bdrv_set_dirty_tracking(bmds->bs, enable);
218     }
219 }
220
221 static void init_blk_migration(Monitor *mon, QEMUFile *f)
222 {
223     BlkMigDevState *bmds;
224     BlockDriverState *bs;
225
226     block_mig_state.submitted = 0;
227     block_mig_state.read_done = 0;
228     block_mig_state.transferred = 0;
229     block_mig_state.total_sector_sum = 0;
230     block_mig_state.prev_progress = -1;
231
232     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
233         if (bs->type == BDRV_TYPE_HD) {
234             bmds = qemu_mallocz(sizeof(BlkMigDevState));
235             bmds->bs = bs;
236             bmds->bulk_completed = 0;
237             bmds->total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
238             bmds->completed_sectors = 0;
239             bmds->shared_base = block_mig_state.shared_base;
240
241             block_mig_state.total_sector_sum += bmds->total_sectors;
242
243             if (bmds->shared_base) {
244                 monitor_printf(mon, "Start migration for %s with shared base "
245                                     "image\n",
246                                bs->device_name);
247             } else {
248                 monitor_printf(mon, "Start full migration for %s\n",
249                                bs->device_name);
250             }
251
252             QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
253         }
254     }
255 }
256
257 static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f, int is_async)
258 {
259     int64_t completed_sector_sum = 0;
260     BlkMigDevState *bmds;
261     int progress;
262     int ret = 0;
263
264     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
265         if (bmds->bulk_completed == 0) {
266             if (mig_save_device_bulk(mon, f, bmds, is_async) == 1) {
267                 /* completed bulk section for this device */
268                 bmds->bulk_completed = 1;
269             }
270             completed_sector_sum += bmds->completed_sectors;
271             ret = 1;
272             break;
273         } else {
274             completed_sector_sum += bmds->completed_sectors;
275         }
276     }
277
278     progress = completed_sector_sum * 100 / block_mig_state.total_sector_sum;
279     if (progress != block_mig_state.prev_progress) {
280         block_mig_state.prev_progress = progress;
281         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
282                          | BLK_MIG_FLAG_PROGRESS);
283         monitor_printf(mon, "Completed %d %%\r", progress);
284         monitor_flush(mon);
285     }
286
287     return ret;
288 }
289
290 #define MAX_NUM_BLOCKS 4
291
292 static void blk_mig_save_dirty_blocks(Monitor *mon, QEMUFile *f)
293 {
294     BlkMigDevState *bmds;
295     BlkMigBlock blk;
296     int64_t sector;
297
298     blk.buf = qemu_malloc(BLOCK_SIZE);
299
300     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
301         for (sector = 0; sector < bmds->cur_sector;) {
302             if (bdrv_get_dirty(bmds->bs, sector)) {
303                 if (bdrv_read(bmds->bs, sector, blk.buf,
304                               BDRV_SECTORS_PER_DIRTY_CHUNK) < 0) {
305                     monitor_printf(mon, "Error reading sector %" PRId64 "\n",
306                                    sector);
307                     qemu_file_set_error(f);
308                     qemu_free(blk.buf);
309                     return;
310                 }
311                 blk.bmds = bmds;
312                 blk.sector = sector;
313                 blk_send(f, &blk);
314
315                 bdrv_reset_dirty(bmds->bs, sector,
316                                  BDRV_SECTORS_PER_DIRTY_CHUNK);
317             }
318             sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
319         }
320     }
321
322     qemu_free(blk.buf);
323 }
324
325 static void flush_blks(QEMUFile* f)
326 {
327     BlkMigBlock *blk;
328
329     dprintf("%s Enter submitted %d read_done %d transferred %d\n",
330             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
331             block_mig_state.transferred);
332
333     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
334         if (qemu_file_rate_limit(f)) {
335             break;
336         }
337         if (blk->ret < 0) {
338             qemu_file_set_error(f);
339             break;
340         }
341         blk_send(f, blk);
342
343         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
344         qemu_free(blk->buf);
345         qemu_free(blk);
346
347         block_mig_state.read_done--;
348         block_mig_state.transferred++;
349         assert(block_mig_state.read_done >= 0);
350     }
351
352     dprintf("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
353             block_mig_state.submitted, block_mig_state.read_done,
354             block_mig_state.transferred);
355 }
356
357 static int is_stage2_completed(void)
358 {
359     BlkMigDevState *bmds;
360
361     if (block_mig_state.submitted > 0) {
362         return 0;
363     }
364
365     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
366         if (bmds->bulk_completed == 0) {
367             return 0;
368         }
369     }
370
371     return 1;
372 }
373
374 static void blk_mig_cleanup(Monitor *mon)
375 {
376     BlkMigDevState *bmds;
377     BlkMigBlock *blk;
378
379     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
380         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
381         qemu_free(bmds);
382     }
383
384     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
385         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
386         qemu_free(blk->buf);
387         qemu_free(blk);
388     }
389
390     set_dirty_tracking(0);
391
392     monitor_printf(mon, "\n");
393 }
394
395 static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
396 {
397     dprintf("Enter save live stage %d submitted %d transferred %d\n",
398             stage, block_mig_state.submitted, block_mig_state.transferred);
399
400     if (stage < 0) {
401         blk_mig_cleanup(mon);
402         return 0;
403     }
404
405     if (block_mig_state.blk_enable != 1) {
406         /* no need to migrate storage */
407         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
408         return 1;
409     }
410
411     if (stage == 1) {
412         init_blk_migration(mon, f);
413
414         /* start track dirty blocks */
415         set_dirty_tracking(1);
416     }
417
418     flush_blks(f);
419
420     if (qemu_file_has_error(f)) {
421         blk_mig_cleanup(mon);
422         return 0;
423     }
424
425     /* control the rate of transfer */
426     while ((block_mig_state.submitted +
427             block_mig_state.read_done) * BLOCK_SIZE <
428            qemu_file_get_rate_limit(f)) {
429         if (blk_mig_save_bulked_block(mon, f, 1) == 0) {
430             /* no more bulk blocks for now */
431             break;
432         }
433     }
434
435     flush_blks(f);
436
437     if (qemu_file_has_error(f)) {
438         blk_mig_cleanup(mon);
439         return 0;
440     }
441
442     if (stage == 3) {
443         while (blk_mig_save_bulked_block(mon, f, 0) != 0) {
444             /* empty */
445         }
446
447         blk_mig_save_dirty_blocks(mon, f);
448         blk_mig_cleanup(mon);
449
450         /* report completion */
451         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
452
453         if (qemu_file_has_error(f)) {
454             return 0;
455         }
456
457         monitor_printf(mon, "Block migration completed\n");
458     }
459
460     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
461
462     return ((stage == 2) && is_stage2_completed());
463 }
464
465 static int block_load(QEMUFile *f, void *opaque, int version_id)
466 {
467     static int banner_printed;
468     int len, flags;
469     char device_name[256];
470     int64_t addr;
471     BlockDriverState *bs;
472     uint8_t *buf;
473
474     do {
475         addr = qemu_get_be64(f);
476
477         flags = addr & ~BDRV_SECTOR_MASK;
478         addr >>= BDRV_SECTOR_BITS;
479
480         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
481             /* get device name */
482             len = qemu_get_byte(f);
483             qemu_get_buffer(f, (uint8_t *)device_name, len);
484             device_name[len] = '\0';
485
486             bs = bdrv_find(device_name);
487             if (!bs) {
488                 fprintf(stderr, "Error unknown block device %s\n",
489                         device_name);
490                 return -EINVAL;
491             }
492
493             buf = qemu_malloc(BLOCK_SIZE);
494
495             qemu_get_buffer(f, buf, BLOCK_SIZE);
496             bdrv_write(bs, addr, buf, BDRV_SECTORS_PER_DIRTY_CHUNK);
497
498             qemu_free(buf);
499         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
500             if (!banner_printed) {
501                 printf("Receiving block device images\n");
502                 banner_printed = 1;
503             }
504             printf("Completed %d %%%c", (int)addr,
505                    (addr == 100) ? '\n' : '\r');
506             fflush(stdout);
507         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
508             fprintf(stderr, "Unknown flags\n");
509             return -EINVAL;
510         }
511         if (qemu_file_has_error(f)) {
512             return -EIO;
513         }
514     } while (!(flags & BLK_MIG_FLAG_EOS));
515
516     return 0;
517 }
518
519 static void block_set_params(int blk_enable, int shared_base, void *opaque)
520 {
521     block_mig_state.blk_enable = blk_enable;
522     block_mig_state.shared_base = shared_base;
523
524     /* shared base means that blk_enable = 1 */
525     block_mig_state.blk_enable |= shared_base;
526 }
527
528 void blk_mig_init(void)
529 {
530     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
531     QSIMPLEQ_INIT(&block_mig_state.blk_list);
532
533     register_savevm_live("block", 0, 1, block_set_params, block_save_live,
534                          NULL, block_load, &block_mig_state);
535 }
This page took 0.053132 seconds and 4 git commands to generate.