]> Git Repo - qemu.git/blob - block-migration.c
target-ppc: Bug Fix: rlwimi
[qemu.git] / block-migration.c
1 /*
2  * QEMU live block migration
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Liran Schour   <[email protected]>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15
16 #include "qemu-common.h"
17 #include "block/block_int.h"
18 #include "hw/hw.h"
19 #include "qemu/queue.h"
20 #include "qemu/timer.h"
21 #include "migration/block.h"
22 #include "migration/migration.h"
23 #include "sysemu/blockdev.h"
24 #include <assert.h>
25
26 #define BLOCK_SIZE                       (1 << 20)
27 #define BDRV_SECTORS_PER_DIRTY_CHUNK     (BLOCK_SIZE >> BDRV_SECTOR_BITS)
28
29 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
30 #define BLK_MIG_FLAG_EOS                0x02
31 #define BLK_MIG_FLAG_PROGRESS           0x04
32 #define BLK_MIG_FLAG_ZERO_BLOCK         0x08
33
34 #define MAX_IS_ALLOCATED_SEARCH 65536
35
36 //#define DEBUG_BLK_MIGRATION
37
38 #ifdef DEBUG_BLK_MIGRATION
39 #define DPRINTF(fmt, ...) \
40     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
41 #else
42 #define DPRINTF(fmt, ...) \
43     do { } while (0)
44 #endif
45
46 typedef struct BlkMigDevState {
47     /* Written during setup phase.  Can be read without a lock.  */
48     BlockDriverState *bs;
49     int shared_base;
50     int64_t total_sectors;
51     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
52
53     /* Only used by migration thread.  Does not need a lock.  */
54     int bulk_completed;
55     int64_t cur_sector;
56     int64_t cur_dirty;
57
58     /* Protected by block migration lock.  */
59     unsigned long *aio_bitmap;
60     int64_t completed_sectors;
61     BdrvDirtyBitmap *dirty_bitmap;
62     Error *blocker;
63 } BlkMigDevState;
64
65 typedef struct BlkMigBlock {
66     /* Only used by migration thread.  */
67     uint8_t *buf;
68     BlkMigDevState *bmds;
69     int64_t sector;
70     int nr_sectors;
71     struct iovec iov;
72     QEMUIOVector qiov;
73     BlockDriverAIOCB *aiocb;
74
75     /* Protected by block migration lock.  */
76     int ret;
77     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
78 } BlkMigBlock;
79
80 typedef struct BlkMigState {
81     /* Written during setup phase.  Can be read without a lock.  */
82     int blk_enable;
83     int shared_base;
84     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
85     int64_t total_sector_sum;
86     bool zero_blocks;
87
88     /* Protected by lock.  */
89     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
90     int submitted;
91     int read_done;
92
93     /* Only used by migration thread.  Does not need a lock.  */
94     int transferred;
95     int prev_progress;
96     int bulk_completed;
97
98     /* Lock must be taken _inside_ the iothread lock.  */
99     QemuMutex lock;
100 } BlkMigState;
101
102 static BlkMigState block_mig_state;
103
104 static void blk_mig_lock(void)
105 {
106     qemu_mutex_lock(&block_mig_state.lock);
107 }
108
109 static void blk_mig_unlock(void)
110 {
111     qemu_mutex_unlock(&block_mig_state.lock);
112 }
113
114 /* Must run outside of the iothread lock during the bulk phase,
115  * or the VM will stall.
116  */
117
118 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
119 {
120     int len;
121     uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
122
123     if (block_mig_state.zero_blocks &&
124         buffer_is_zero(blk->buf, BLOCK_SIZE)) {
125         flags |= BLK_MIG_FLAG_ZERO_BLOCK;
126     }
127
128     /* sector number and flags */
129     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
130                      | flags);
131
132     /* device name */
133     len = strlen(blk->bmds->bs->device_name);
134     qemu_put_byte(f, len);
135     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
136
137     /* if a block is zero we need to flush here since the network
138      * bandwidth is now a lot higher than the storage device bandwidth.
139      * thus if we queue zero blocks we slow down the migration */
140     if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
141         qemu_fflush(f);
142         return;
143     }
144
145     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
146 }
147
148 int blk_mig_active(void)
149 {
150     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
151 }
152
153 uint64_t blk_mig_bytes_transferred(void)
154 {
155     BlkMigDevState *bmds;
156     uint64_t sum = 0;
157
158     blk_mig_lock();
159     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
160         sum += bmds->completed_sectors;
161     }
162     blk_mig_unlock();
163     return sum << BDRV_SECTOR_BITS;
164 }
165
166 uint64_t blk_mig_bytes_remaining(void)
167 {
168     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
169 }
170
171 uint64_t blk_mig_bytes_total(void)
172 {
173     BlkMigDevState *bmds;
174     uint64_t sum = 0;
175
176     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
177         sum += bmds->total_sectors;
178     }
179     return sum << BDRV_SECTOR_BITS;
180 }
181
182
183 /* Called with migration lock held.  */
184
185 static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
186 {
187     int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
188
189     if (sector < bdrv_nb_sectors(bmds->bs)) {
190         return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
191             (1UL << (chunk % (sizeof(unsigned long) * 8))));
192     } else {
193         return 0;
194     }
195 }
196
197 /* Called with migration lock held.  */
198
199 static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
200                              int nb_sectors, int set)
201 {
202     int64_t start, end;
203     unsigned long val, idx, bit;
204
205     start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
206     end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
207
208     for (; start <= end; start++) {
209         idx = start / (sizeof(unsigned long) * 8);
210         bit = start % (sizeof(unsigned long) * 8);
211         val = bmds->aio_bitmap[idx];
212         if (set) {
213             val |= 1UL << bit;
214         } else {
215             val &= ~(1UL << bit);
216         }
217         bmds->aio_bitmap[idx] = val;
218     }
219 }
220
221 static void alloc_aio_bitmap(BlkMigDevState *bmds)
222 {
223     BlockDriverState *bs = bmds->bs;
224     int64_t bitmap_size;
225
226     bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
227     bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
228
229     bmds->aio_bitmap = g_malloc0(bitmap_size);
230 }
231
232 /* Never hold migration lock when yielding to the main loop!  */
233
234 static void blk_mig_read_cb(void *opaque, int ret)
235 {
236     BlkMigBlock *blk = opaque;
237
238     blk_mig_lock();
239     blk->ret = ret;
240
241     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
242     bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
243
244     block_mig_state.submitted--;
245     block_mig_state.read_done++;
246     assert(block_mig_state.submitted >= 0);
247     blk_mig_unlock();
248 }
249
250 /* Called with no lock taken.  */
251
252 static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
253 {
254     int64_t total_sectors = bmds->total_sectors;
255     int64_t cur_sector = bmds->cur_sector;
256     BlockDriverState *bs = bmds->bs;
257     BlkMigBlock *blk;
258     int nr_sectors;
259
260     if (bmds->shared_base) {
261         qemu_mutex_lock_iothread();
262         while (cur_sector < total_sectors &&
263                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
264                                   &nr_sectors)) {
265             cur_sector += nr_sectors;
266         }
267         qemu_mutex_unlock_iothread();
268     }
269
270     if (cur_sector >= total_sectors) {
271         bmds->cur_sector = bmds->completed_sectors = total_sectors;
272         return 1;
273     }
274
275     bmds->completed_sectors = cur_sector;
276
277     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
278
279     /* we are going to transfer a full block even if it is not allocated */
280     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
281
282     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
283         nr_sectors = total_sectors - cur_sector;
284     }
285
286     blk = g_new(BlkMigBlock, 1);
287     blk->buf = g_malloc(BLOCK_SIZE);
288     blk->bmds = bmds;
289     blk->sector = cur_sector;
290     blk->nr_sectors = nr_sectors;
291
292     blk->iov.iov_base = blk->buf;
293     blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
294     qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
295
296     blk_mig_lock();
297     block_mig_state.submitted++;
298     blk_mig_unlock();
299
300     qemu_mutex_lock_iothread();
301     blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
302                                 nr_sectors, blk_mig_read_cb, blk);
303
304     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
305     qemu_mutex_unlock_iothread();
306
307     bmds->cur_sector = cur_sector + nr_sectors;
308     return (bmds->cur_sector >= total_sectors);
309 }
310
311 /* Called with iothread lock taken.  */
312
313 static int set_dirty_tracking(void)
314 {
315     BlkMigDevState *bmds;
316     int ret;
317
318     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
319         bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
320                                                       NULL);
321         if (!bmds->dirty_bitmap) {
322             ret = -errno;
323             goto fail;
324         }
325     }
326     return 0;
327
328 fail:
329     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
330         if (bmds->dirty_bitmap) {
331             bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
332         }
333     }
334     return ret;
335 }
336
337 static void unset_dirty_tracking(void)
338 {
339     BlkMigDevState *bmds;
340
341     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
342         bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
343     }
344 }
345
346 static void init_blk_migration_it(void *opaque, BlockDriverState *bs)
347 {
348     BlkMigDevState *bmds;
349     int64_t sectors;
350
351     if (!bdrv_is_read_only(bs)) {
352         sectors = bdrv_nb_sectors(bs);
353         if (sectors <= 0) {
354             return;
355         }
356
357         bmds = g_new0(BlkMigDevState, 1);
358         bmds->bs = bs;
359         bmds->bulk_completed = 0;
360         bmds->total_sectors = sectors;
361         bmds->completed_sectors = 0;
362         bmds->shared_base = block_mig_state.shared_base;
363         alloc_aio_bitmap(bmds);
364         error_setg(&bmds->blocker, "block device is in use by migration");
365         bdrv_op_block_all(bs, bmds->blocker);
366         bdrv_ref(bs);
367
368         block_mig_state.total_sector_sum += sectors;
369
370         if (bmds->shared_base) {
371             DPRINTF("Start migration for %s with shared base image\n",
372                     bs->device_name);
373         } else {
374             DPRINTF("Start full migration for %s\n", bs->device_name);
375         }
376
377         QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
378     }
379 }
380
381 static void init_blk_migration(QEMUFile *f)
382 {
383     block_mig_state.submitted = 0;
384     block_mig_state.read_done = 0;
385     block_mig_state.transferred = 0;
386     block_mig_state.total_sector_sum = 0;
387     block_mig_state.prev_progress = -1;
388     block_mig_state.bulk_completed = 0;
389     block_mig_state.zero_blocks = migrate_zero_blocks();
390
391     bdrv_iterate(init_blk_migration_it, NULL);
392 }
393
394 /* Called with no lock taken.  */
395
396 static int blk_mig_save_bulked_block(QEMUFile *f)
397 {
398     int64_t completed_sector_sum = 0;
399     BlkMigDevState *bmds;
400     int progress;
401     int ret = 0;
402
403     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
404         if (bmds->bulk_completed == 0) {
405             if (mig_save_device_bulk(f, bmds) == 1) {
406                 /* completed bulk section for this device */
407                 bmds->bulk_completed = 1;
408             }
409             completed_sector_sum += bmds->completed_sectors;
410             ret = 1;
411             break;
412         } else {
413             completed_sector_sum += bmds->completed_sectors;
414         }
415     }
416
417     if (block_mig_state.total_sector_sum != 0) {
418         progress = completed_sector_sum * 100 /
419                    block_mig_state.total_sector_sum;
420     } else {
421         progress = 100;
422     }
423     if (progress != block_mig_state.prev_progress) {
424         block_mig_state.prev_progress = progress;
425         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
426                          | BLK_MIG_FLAG_PROGRESS);
427         DPRINTF("Completed %d %%\r", progress);
428     }
429
430     return ret;
431 }
432
433 static void blk_mig_reset_dirty_cursor(void)
434 {
435     BlkMigDevState *bmds;
436
437     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
438         bmds->cur_dirty = 0;
439     }
440 }
441
442 /* Called with iothread lock taken.  */
443
444 static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
445                                  int is_async)
446 {
447     BlkMigBlock *blk;
448     int64_t total_sectors = bmds->total_sectors;
449     int64_t sector;
450     int nr_sectors;
451     int ret = -EIO;
452
453     for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
454         blk_mig_lock();
455         if (bmds_aio_inflight(bmds, sector)) {
456             blk_mig_unlock();
457             bdrv_drain_all();
458         } else {
459             blk_mig_unlock();
460         }
461         if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
462
463             if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
464                 nr_sectors = total_sectors - sector;
465             } else {
466                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
467             }
468             blk = g_new(BlkMigBlock, 1);
469             blk->buf = g_malloc(BLOCK_SIZE);
470             blk->bmds = bmds;
471             blk->sector = sector;
472             blk->nr_sectors = nr_sectors;
473
474             if (is_async) {
475                 blk->iov.iov_base = blk->buf;
476                 blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
477                 qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
478
479                 blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
480                                             nr_sectors, blk_mig_read_cb, blk);
481
482                 blk_mig_lock();
483                 block_mig_state.submitted++;
484                 bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
485                 blk_mig_unlock();
486             } else {
487                 ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
488                 if (ret < 0) {
489                     goto error;
490                 }
491                 blk_send(f, blk);
492
493                 g_free(blk->buf);
494                 g_free(blk);
495             }
496
497             bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
498             break;
499         }
500         sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
501         bmds->cur_dirty = sector;
502     }
503
504     return (bmds->cur_dirty >= bmds->total_sectors);
505
506 error:
507     DPRINTF("Error reading sector %" PRId64 "\n", sector);
508     g_free(blk->buf);
509     g_free(blk);
510     return ret;
511 }
512
513 /* Called with iothread lock taken.
514  *
515  * return value:
516  * 0: too much data for max_downtime
517  * 1: few enough data for max_downtime
518 */
519 static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
520 {
521     BlkMigDevState *bmds;
522     int ret = 1;
523
524     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
525         ret = mig_save_device_dirty(f, bmds, is_async);
526         if (ret <= 0) {
527             break;
528         }
529     }
530
531     return ret;
532 }
533
534 /* Called with no locks taken.  */
535
536 static int flush_blks(QEMUFile *f)
537 {
538     BlkMigBlock *blk;
539     int ret = 0;
540
541     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
542             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
543             block_mig_state.transferred);
544
545     blk_mig_lock();
546     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
547         if (qemu_file_rate_limit(f)) {
548             break;
549         }
550         if (blk->ret < 0) {
551             ret = blk->ret;
552             break;
553         }
554
555         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
556         blk_mig_unlock();
557         blk_send(f, blk);
558         blk_mig_lock();
559
560         g_free(blk->buf);
561         g_free(blk);
562
563         block_mig_state.read_done--;
564         block_mig_state.transferred++;
565         assert(block_mig_state.read_done >= 0);
566     }
567     blk_mig_unlock();
568
569     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
570             block_mig_state.submitted, block_mig_state.read_done,
571             block_mig_state.transferred);
572     return ret;
573 }
574
575 /* Called with iothread lock taken.  */
576
577 static int64_t get_remaining_dirty(void)
578 {
579     BlkMigDevState *bmds;
580     int64_t dirty = 0;
581
582     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
583         dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
584     }
585
586     return dirty << BDRV_SECTOR_BITS;
587 }
588
589 /* Called with iothread lock taken.  */
590
591 static void blk_mig_cleanup(void)
592 {
593     BlkMigDevState *bmds;
594     BlkMigBlock *blk;
595
596     bdrv_drain_all();
597
598     unset_dirty_tracking();
599
600     blk_mig_lock();
601     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
602         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
603         bdrv_op_unblock_all(bmds->bs, bmds->blocker);
604         error_free(bmds->blocker);
605         bdrv_unref(bmds->bs);
606         g_free(bmds->aio_bitmap);
607         g_free(bmds);
608     }
609
610     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
611         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
612         g_free(blk->buf);
613         g_free(blk);
614     }
615     blk_mig_unlock();
616 }
617
618 static void block_migration_cancel(void *opaque)
619 {
620     blk_mig_cleanup();
621 }
622
623 static int block_save_setup(QEMUFile *f, void *opaque)
624 {
625     int ret;
626
627     DPRINTF("Enter save live setup submitted %d transferred %d\n",
628             block_mig_state.submitted, block_mig_state.transferred);
629
630     qemu_mutex_lock_iothread();
631     init_blk_migration(f);
632
633     /* start track dirty blocks */
634     ret = set_dirty_tracking();
635
636     if (ret) {
637         qemu_mutex_unlock_iothread();
638         return ret;
639     }
640
641     qemu_mutex_unlock_iothread();
642
643     ret = flush_blks(f);
644     blk_mig_reset_dirty_cursor();
645     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
646
647     return ret;
648 }
649
650 static int block_save_iterate(QEMUFile *f, void *opaque)
651 {
652     int ret;
653     int64_t last_ftell = qemu_ftell(f);
654
655     DPRINTF("Enter save live iterate submitted %d transferred %d\n",
656             block_mig_state.submitted, block_mig_state.transferred);
657
658     ret = flush_blks(f);
659     if (ret) {
660         return ret;
661     }
662
663     blk_mig_reset_dirty_cursor();
664
665     /* control the rate of transfer */
666     blk_mig_lock();
667     while ((block_mig_state.submitted +
668             block_mig_state.read_done) * BLOCK_SIZE <
669            qemu_file_get_rate_limit(f)) {
670         blk_mig_unlock();
671         if (block_mig_state.bulk_completed == 0) {
672             /* first finish the bulk phase */
673             if (blk_mig_save_bulked_block(f) == 0) {
674                 /* finished saving bulk on all devices */
675                 block_mig_state.bulk_completed = 1;
676             }
677             ret = 0;
678         } else {
679             /* Always called with iothread lock taken for
680              * simplicity, block_save_complete also calls it.
681              */
682             qemu_mutex_lock_iothread();
683             ret = blk_mig_save_dirty_block(f, 1);
684             qemu_mutex_unlock_iothread();
685         }
686         if (ret < 0) {
687             return ret;
688         }
689         blk_mig_lock();
690         if (ret != 0) {
691             /* no more dirty blocks */
692             break;
693         }
694     }
695     blk_mig_unlock();
696
697     ret = flush_blks(f);
698     if (ret) {
699         return ret;
700     }
701
702     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
703     return qemu_ftell(f) - last_ftell;
704 }
705
706 /* Called with iothread lock taken.  */
707
708 static int block_save_complete(QEMUFile *f, void *opaque)
709 {
710     int ret;
711
712     DPRINTF("Enter save live complete submitted %d transferred %d\n",
713             block_mig_state.submitted, block_mig_state.transferred);
714
715     ret = flush_blks(f);
716     if (ret) {
717         return ret;
718     }
719
720     blk_mig_reset_dirty_cursor();
721
722     /* we know for sure that save bulk is completed and
723        all async read completed */
724     blk_mig_lock();
725     assert(block_mig_state.submitted == 0);
726     blk_mig_unlock();
727
728     do {
729         ret = blk_mig_save_dirty_block(f, 0);
730         if (ret < 0) {
731             return ret;
732         }
733     } while (ret == 0);
734
735     /* report completion */
736     qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
737
738     DPRINTF("Block migration completed\n");
739
740     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
741
742     blk_mig_cleanup();
743     return 0;
744 }
745
746 static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
747 {
748     /* Estimate pending number of bytes to send */
749     uint64_t pending;
750
751     qemu_mutex_lock_iothread();
752     blk_mig_lock();
753     pending = get_remaining_dirty() +
754                        block_mig_state.submitted * BLOCK_SIZE +
755                        block_mig_state.read_done * BLOCK_SIZE;
756
757     /* Report at least one block pending during bulk phase */
758     if (pending == 0 && !block_mig_state.bulk_completed) {
759         pending = BLOCK_SIZE;
760     }
761     blk_mig_unlock();
762     qemu_mutex_unlock_iothread();
763
764     DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
765     return pending;
766 }
767
768 static int block_load(QEMUFile *f, void *opaque, int version_id)
769 {
770     static int banner_printed;
771     int len, flags;
772     char device_name[256];
773     int64_t addr;
774     BlockDriverState *bs, *bs_prev = NULL;
775     uint8_t *buf;
776     int64_t total_sectors = 0;
777     int nr_sectors;
778     int ret;
779
780     do {
781         addr = qemu_get_be64(f);
782
783         flags = addr & ~BDRV_SECTOR_MASK;
784         addr >>= BDRV_SECTOR_BITS;
785
786         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
787             /* get device name */
788             len = qemu_get_byte(f);
789             qemu_get_buffer(f, (uint8_t *)device_name, len);
790             device_name[len] = '\0';
791
792             bs = bdrv_find(device_name);
793             if (!bs) {
794                 fprintf(stderr, "Error unknown block device %s\n",
795                         device_name);
796                 return -EINVAL;
797             }
798
799             if (bs != bs_prev) {
800                 bs_prev = bs;
801                 total_sectors = bdrv_nb_sectors(bs);
802                 if (total_sectors <= 0) {
803                     error_report("Error getting length of block device %s",
804                                  device_name);
805                     return -EINVAL;
806                 }
807             }
808
809             if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
810                 nr_sectors = total_sectors - addr;
811             } else {
812                 nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
813             }
814
815             if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
816                 ret = bdrv_write_zeroes(bs, addr, nr_sectors,
817                                         BDRV_REQ_MAY_UNMAP);
818             } else {
819                 buf = g_malloc(BLOCK_SIZE);
820                 qemu_get_buffer(f, buf, BLOCK_SIZE);
821                 ret = bdrv_write(bs, addr, buf, nr_sectors);
822                 g_free(buf);
823             }
824
825             if (ret < 0) {
826                 return ret;
827             }
828         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
829             if (!banner_printed) {
830                 printf("Receiving block device images\n");
831                 banner_printed = 1;
832             }
833             printf("Completed %d %%%c", (int)addr,
834                    (addr == 100) ? '\n' : '\r');
835             fflush(stdout);
836         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
837             fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
838             return -EINVAL;
839         }
840         ret = qemu_file_get_error(f);
841         if (ret != 0) {
842             return ret;
843         }
844     } while (!(flags & BLK_MIG_FLAG_EOS));
845
846     return 0;
847 }
848
849 static void block_set_params(const MigrationParams *params, void *opaque)
850 {
851     block_mig_state.blk_enable = params->blk;
852     block_mig_state.shared_base = params->shared;
853
854     /* shared base means that blk_enable = 1 */
855     block_mig_state.blk_enable |= params->shared;
856 }
857
858 static bool block_is_active(void *opaque)
859 {
860     return block_mig_state.blk_enable == 1;
861 }
862
863 static SaveVMHandlers savevm_block_handlers = {
864     .set_params = block_set_params,
865     .save_live_setup = block_save_setup,
866     .save_live_iterate = block_save_iterate,
867     .save_live_complete = block_save_complete,
868     .save_live_pending = block_save_pending,
869     .load_state = block_load,
870     .cancel = block_migration_cancel,
871     .is_active = block_is_active,
872 };
873
874 void blk_mig_init(void)
875 {
876     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
877     QSIMPLEQ_INIT(&block_mig_state.blk_list);
878     qemu_mutex_init(&block_mig_state.lock);
879
880     register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
881                          &block_mig_state);
882 }
This page took 0.071192 seconds and 4 git commands to generate.