]> Git Repo - qemu.git/blob - block-migration.c
Do not use dprintf
[qemu.git] / block-migration.c
1 /*
2  * QEMU live block migration
3  *
4  * Copyright IBM, Corp. 2009
5  *
6  * Authors:
7  *  Liran Schour   <[email protected]>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  */
13
14 #include "qemu-common.h"
15 #include "block_int.h"
16 #include "hw/hw.h"
17 #include "qemu-queue.h"
18 #include "monitor.h"
19 #include "block-migration.h"
20 #include <assert.h>
21
22 #define BLOCK_SIZE (BDRV_SECTORS_PER_DIRTY_CHUNK << BDRV_SECTOR_BITS)
23
24 #define BLK_MIG_FLAG_DEVICE_BLOCK       0x01
25 #define BLK_MIG_FLAG_EOS                0x02
26 #define BLK_MIG_FLAG_PROGRESS           0x04
27
28 #define MAX_IS_ALLOCATED_SEARCH 65536
29 #define MAX_BLOCKS_READ 10000
30 #define BLOCKS_READ_CHANGE 100
31 #define INITIAL_BLOCKS_READ 100
32
33 //#define DEBUG_BLK_MIGRATION
34
35 #ifdef DEBUG_BLK_MIGRATION
36 #define DPRINTF(fmt, ...) \
37     do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
38 #else
39 #define DPRINTF(fmt, ...) \
40     do { } while (0)
41 #endif
42
43 typedef struct BlkMigDevState {
44     BlockDriverState *bs;
45     int bulk_completed;
46     int shared_base;
47     int64_t cur_sector;
48     int64_t completed_sectors;
49     int64_t total_sectors;
50     int64_t dirty;
51     QSIMPLEQ_ENTRY(BlkMigDevState) entry;
52 } BlkMigDevState;
53
54 typedef struct BlkMigBlock {
55     uint8_t *buf;
56     BlkMigDevState *bmds;
57     int64_t sector;
58     struct iovec iov;
59     QEMUIOVector qiov;
60     BlockDriverAIOCB *aiocb;
61     int ret;
62     QSIMPLEQ_ENTRY(BlkMigBlock) entry;
63 } BlkMigBlock;
64
65 typedef struct BlkMigState {
66     int blk_enable;
67     int shared_base;
68     QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
69     QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
70     int submitted;
71     int read_done;
72     int transferred;
73     int64_t total_sector_sum;
74     int prev_progress;
75 } BlkMigState;
76
77 static BlkMigState block_mig_state;
78
79 static void blk_send(QEMUFile *f, BlkMigBlock * blk)
80 {
81     int len;
82
83     /* sector number and flags */
84     qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
85                      | BLK_MIG_FLAG_DEVICE_BLOCK);
86
87     /* device name */
88     len = strlen(blk->bmds->bs->device_name);
89     qemu_put_byte(f, len);
90     qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
91
92     qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
93 }
94
95 int blk_mig_active(void)
96 {
97     return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
98 }
99
100 uint64_t blk_mig_bytes_transferred(void)
101 {
102     BlkMigDevState *bmds;
103     uint64_t sum = 0;
104
105     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
106         sum += bmds->completed_sectors;
107     }
108     return sum << BDRV_SECTOR_BITS;
109 }
110
111 uint64_t blk_mig_bytes_remaining(void)
112 {
113     return blk_mig_bytes_total() - blk_mig_bytes_transferred();
114 }
115
116 uint64_t blk_mig_bytes_total(void)
117 {
118     BlkMigDevState *bmds;
119     uint64_t sum = 0;
120
121     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
122         sum += bmds->total_sectors;
123     }
124     return sum << BDRV_SECTOR_BITS;
125 }
126
127 static void blk_mig_read_cb(void *opaque, int ret)
128 {
129     BlkMigBlock *blk = opaque;
130
131     blk->ret = ret;
132
133     QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
134
135     block_mig_state.submitted--;
136     block_mig_state.read_done++;
137     assert(block_mig_state.submitted >= 0);
138 }
139
140 static int mig_save_device_bulk(Monitor *mon, QEMUFile *f,
141                                 BlkMigDevState *bmds, int is_async)
142 {
143     int64_t total_sectors = bmds->total_sectors;
144     int64_t cur_sector = bmds->cur_sector;
145     BlockDriverState *bs = bmds->bs;
146     BlkMigBlock *blk;
147     int nr_sectors;
148
149     if (bmds->shared_base) {
150         while (cur_sector < total_sectors &&
151                !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
152                                   &nr_sectors)) {
153             cur_sector += nr_sectors;
154         }
155     }
156
157     if (cur_sector >= total_sectors) {
158         bmds->cur_sector = bmds->completed_sectors = total_sectors;
159         return 1;
160     }
161
162     bmds->completed_sectors = cur_sector;
163
164     cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
165
166     /* we are going to transfer a full block even if it is not allocated */
167     nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
168
169     if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
170         nr_sectors = total_sectors - cur_sector;
171     }
172
173     blk = qemu_malloc(sizeof(BlkMigBlock));
174     blk->buf = qemu_malloc(BLOCK_SIZE);
175     blk->bmds = bmds;
176     blk->sector = cur_sector;
177
178     if (is_async) {
179         blk->iov.iov_base = blk->buf;
180         blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
181         qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
182
183         blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
184                                     nr_sectors, blk_mig_read_cb, blk);
185         if (!blk->aiocb) {
186             goto error;
187         }
188         block_mig_state.submitted++;
189     } else {
190         if (bdrv_read(bs, cur_sector, blk->buf, nr_sectors) < 0) {
191             goto error;
192         }
193         blk_send(f, blk);
194
195         qemu_free(blk->buf);
196         qemu_free(blk);
197     }
198
199     bdrv_reset_dirty(bs, cur_sector, nr_sectors);
200     bmds->cur_sector = cur_sector + nr_sectors;
201
202     return (bmds->cur_sector >= total_sectors);
203
204 error:
205     monitor_printf(mon, "Error reading sector %" PRId64 "\n", cur_sector);
206     qemu_file_set_error(f);
207     qemu_free(blk->buf);
208     qemu_free(blk);
209     return 0;
210 }
211
212 static void set_dirty_tracking(int enable)
213 {
214     BlkMigDevState *bmds;
215
216     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
217         bdrv_set_dirty_tracking(bmds->bs, enable);
218     }
219 }
220
221 static void init_blk_migration(Monitor *mon, QEMUFile *f)
222 {
223     BlkMigDevState *bmds;
224     BlockDriverState *bs;
225     int64_t sectors;
226
227     block_mig_state.submitted = 0;
228     block_mig_state.read_done = 0;
229     block_mig_state.transferred = 0;
230     block_mig_state.total_sector_sum = 0;
231     block_mig_state.prev_progress = -1;
232
233     for (bs = bdrv_first; bs != NULL; bs = bs->next) {
234         if (bs->type == BDRV_TYPE_HD) {
235             sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
236             if (sectors == 0) {
237                 continue;
238             }
239
240             bmds = qemu_mallocz(sizeof(BlkMigDevState));
241             bmds->bs = bs;
242             bmds->bulk_completed = 0;
243             bmds->total_sectors = sectors;
244             bmds->completed_sectors = 0;
245             bmds->shared_base = block_mig_state.shared_base;
246
247             block_mig_state.total_sector_sum += sectors;
248
249             if (bmds->shared_base) {
250                 monitor_printf(mon, "Start migration for %s with shared base "
251                                     "image\n",
252                                bs->device_name);
253             } else {
254                 monitor_printf(mon, "Start full migration for %s\n",
255                                bs->device_name);
256             }
257
258             QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
259         }
260     }
261 }
262
263 static int blk_mig_save_bulked_block(Monitor *mon, QEMUFile *f, int is_async)
264 {
265     int64_t completed_sector_sum = 0;
266     BlkMigDevState *bmds;
267     int progress;
268     int ret = 0;
269
270     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
271         if (bmds->bulk_completed == 0) {
272             if (mig_save_device_bulk(mon, f, bmds, is_async) == 1) {
273                 /* completed bulk section for this device */
274                 bmds->bulk_completed = 1;
275             }
276             completed_sector_sum += bmds->completed_sectors;
277             ret = 1;
278             break;
279         } else {
280             completed_sector_sum += bmds->completed_sectors;
281         }
282     }
283
284     progress = completed_sector_sum * 100 / block_mig_state.total_sector_sum;
285     if (progress != block_mig_state.prev_progress) {
286         block_mig_state.prev_progress = progress;
287         qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
288                          | BLK_MIG_FLAG_PROGRESS);
289         monitor_printf(mon, "Completed %d %%\r", progress);
290         monitor_flush(mon);
291     }
292
293     return ret;
294 }
295
296 #define MAX_NUM_BLOCKS 4
297
298 static void blk_mig_save_dirty_blocks(Monitor *mon, QEMUFile *f)
299 {
300     BlkMigDevState *bmds;
301     BlkMigBlock blk;
302     int64_t sector;
303
304     blk.buf = qemu_malloc(BLOCK_SIZE);
305
306     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
307         for (sector = 0; sector < bmds->cur_sector;) {
308             if (bdrv_get_dirty(bmds->bs, sector)) {
309                 if (bdrv_read(bmds->bs, sector, blk.buf,
310                               BDRV_SECTORS_PER_DIRTY_CHUNK) < 0) {
311                     monitor_printf(mon, "Error reading sector %" PRId64 "\n",
312                                    sector);
313                     qemu_file_set_error(f);
314                     qemu_free(blk.buf);
315                     return;
316                 }
317                 blk.bmds = bmds;
318                 blk.sector = sector;
319                 blk_send(f, &blk);
320
321                 bdrv_reset_dirty(bmds->bs, sector,
322                                  BDRV_SECTORS_PER_DIRTY_CHUNK);
323             }
324             sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
325         }
326     }
327
328     qemu_free(blk.buf);
329 }
330
331 static void flush_blks(QEMUFile* f)
332 {
333     BlkMigBlock *blk;
334
335     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
336             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
337             block_mig_state.transferred);
338
339     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
340         if (qemu_file_rate_limit(f)) {
341             break;
342         }
343         if (blk->ret < 0) {
344             qemu_file_set_error(f);
345             break;
346         }
347         blk_send(f, blk);
348
349         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
350         qemu_free(blk->buf);
351         qemu_free(blk);
352
353         block_mig_state.read_done--;
354         block_mig_state.transferred++;
355         assert(block_mig_state.read_done >= 0);
356     }
357
358     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
359             block_mig_state.submitted, block_mig_state.read_done,
360             block_mig_state.transferred);
361 }
362
363 static int is_stage2_completed(void)
364 {
365     BlkMigDevState *bmds;
366
367     if (block_mig_state.submitted > 0) {
368         return 0;
369     }
370
371     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
372         if (bmds->bulk_completed == 0) {
373             return 0;
374         }
375     }
376
377     return 1;
378 }
379
380 static void blk_mig_cleanup(Monitor *mon)
381 {
382     BlkMigDevState *bmds;
383     BlkMigBlock *blk;
384
385     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
386         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
387         qemu_free(bmds);
388     }
389
390     while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
391         QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
392         qemu_free(blk->buf);
393         qemu_free(blk);
394     }
395
396     set_dirty_tracking(0);
397
398     monitor_printf(mon, "\n");
399 }
400
401 static int block_save_live(Monitor *mon, QEMUFile *f, int stage, void *opaque)
402 {
403     DPRINTF("Enter save live stage %d submitted %d transferred %d\n",
404             stage, block_mig_state.submitted, block_mig_state.transferred);
405
406     if (stage < 0) {
407         blk_mig_cleanup(mon);
408         return 0;
409     }
410
411     if (block_mig_state.blk_enable != 1) {
412         /* no need to migrate storage */
413         qemu_put_be64(f, BLK_MIG_FLAG_EOS);
414         return 1;
415     }
416
417     if (stage == 1) {
418         init_blk_migration(mon, f);
419
420         /* start track dirty blocks */
421         set_dirty_tracking(1);
422     }
423
424     flush_blks(f);
425
426     if (qemu_file_has_error(f)) {
427         blk_mig_cleanup(mon);
428         return 0;
429     }
430
431     /* control the rate of transfer */
432     while ((block_mig_state.submitted +
433             block_mig_state.read_done) * BLOCK_SIZE <
434            qemu_file_get_rate_limit(f)) {
435         if (blk_mig_save_bulked_block(mon, f, 1) == 0) {
436             /* no more bulk blocks for now */
437             break;
438         }
439     }
440
441     flush_blks(f);
442
443     if (qemu_file_has_error(f)) {
444         blk_mig_cleanup(mon);
445         return 0;
446     }
447
448     if (stage == 3) {
449         while (blk_mig_save_bulked_block(mon, f, 0) != 0) {
450             /* empty */
451         }
452
453         blk_mig_save_dirty_blocks(mon, f);
454         blk_mig_cleanup(mon);
455
456         /* report completion */
457         qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
458
459         if (qemu_file_has_error(f)) {
460             return 0;
461         }
462
463         monitor_printf(mon, "Block migration completed\n");
464     }
465
466     qemu_put_be64(f, BLK_MIG_FLAG_EOS);
467
468     return ((stage == 2) && is_stage2_completed());
469 }
470
471 static int block_load(QEMUFile *f, void *opaque, int version_id)
472 {
473     static int banner_printed;
474     int len, flags;
475     char device_name[256];
476     int64_t addr;
477     BlockDriverState *bs;
478     uint8_t *buf;
479
480     do {
481         addr = qemu_get_be64(f);
482
483         flags = addr & ~BDRV_SECTOR_MASK;
484         addr >>= BDRV_SECTOR_BITS;
485
486         if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
487             /* get device name */
488             len = qemu_get_byte(f);
489             qemu_get_buffer(f, (uint8_t *)device_name, len);
490             device_name[len] = '\0';
491
492             bs = bdrv_find(device_name);
493             if (!bs) {
494                 fprintf(stderr, "Error unknown block device %s\n",
495                         device_name);
496                 return -EINVAL;
497             }
498
499             buf = qemu_malloc(BLOCK_SIZE);
500
501             qemu_get_buffer(f, buf, BLOCK_SIZE);
502             bdrv_write(bs, addr, buf, BDRV_SECTORS_PER_DIRTY_CHUNK);
503
504             qemu_free(buf);
505         } else if (flags & BLK_MIG_FLAG_PROGRESS) {
506             if (!banner_printed) {
507                 printf("Receiving block device images\n");
508                 banner_printed = 1;
509             }
510             printf("Completed %d %%%c", (int)addr,
511                    (addr == 100) ? '\n' : '\r');
512             fflush(stdout);
513         } else if (!(flags & BLK_MIG_FLAG_EOS)) {
514             fprintf(stderr, "Unknown flags\n");
515             return -EINVAL;
516         }
517         if (qemu_file_has_error(f)) {
518             return -EIO;
519         }
520     } while (!(flags & BLK_MIG_FLAG_EOS));
521
522     return 0;
523 }
524
525 static void block_set_params(int blk_enable, int shared_base, void *opaque)
526 {
527     block_mig_state.blk_enable = blk_enable;
528     block_mig_state.shared_base = shared_base;
529
530     /* shared base means that blk_enable = 1 */
531     block_mig_state.blk_enable |= shared_base;
532 }
533
534 void blk_mig_init(void)
535 {
536     QSIMPLEQ_INIT(&block_mig_state.bmds_list);
537     QSIMPLEQ_INIT(&block_mig_state.blk_list);
538
539     register_savevm_live("block", 0, 1, block_set_params, block_save_live,
540                          NULL, block_load, &block_mig_state);
541 }
This page took 0.054185 seconds and 4 git commands to generate.