#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
#define BLK_MIG_FLAG_EOS 0x02
#define BLK_MIG_FLAG_PROGRESS 0x04
+#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
#define MAX_IS_ALLOCATED_SEARCH 65536
/* Protected by block migration lock. */
unsigned long *aio_bitmap;
int64_t completed_sectors;
+ BdrvDirtyBitmap *dirty_bitmap;
+ Error *blocker;
} BlkMigDevState;
typedef struct BlkMigBlock {
int shared_base;
QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
int64_t total_sector_sum;
+ bool zero_blocks;
/* Protected by lock. */
QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
qemu_mutex_unlock(&block_mig_state.lock);
}
+/* Must run outside of the iothread lock during the bulk phase,
+ * or the VM will stall.
+ */
+
static void blk_send(QEMUFile *f, BlkMigBlock * blk)
{
int len;
+ uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
+
+ if (block_mig_state.zero_blocks &&
+ buffer_is_zero(blk->buf, BLOCK_SIZE)) {
+ flags |= BLK_MIG_FLAG_ZERO_BLOCK;
+ }
/* sector number and flags */
qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
- | BLK_MIG_FLAG_DEVICE_BLOCK);
+ | flags);
/* device name */
len = strlen(blk->bmds->bs->device_name);
qemu_put_byte(f, len);
qemu_put_buffer(f, (uint8_t *)blk->bmds->bs->device_name, len);
+ /* if a block is zero we need to flush here since the network
+ * bandwidth is now a lot higher than the storage device bandwidth.
+ * thus if we queue zero blocks we slow down the migration */
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ qemu_fflush(f);
+ return;
+ }
+
qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
}
blk_mig_unlock();
}
+/* Called with no lock taken. */
+
static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
{
int64_t total_sectors = bmds->total_sectors;
int nr_sectors;
if (bmds->shared_base) {
+ qemu_mutex_lock_iothread();
while (cur_sector < total_sectors &&
!bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
&nr_sectors)) {
cur_sector += nr_sectors;
}
+ qemu_mutex_unlock_iothread();
}
if (cur_sector >= total_sectors) {
block_mig_state.submitted++;
blk_mig_unlock();
+ qemu_mutex_lock_iothread();
blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
nr_sectors, blk_mig_read_cb, blk);
bdrv_reset_dirty(bs, cur_sector, nr_sectors);
- bmds->cur_sector = cur_sector + nr_sectors;
+ qemu_mutex_unlock_iothread();
+ bmds->cur_sector = cur_sector + nr_sectors;
return (bmds->cur_sector >= total_sectors);
}
-static void set_dirty_tracking(int enable)
+/* Called with iothread lock taken. */
+
+static int set_dirty_tracking(void)
+{
+ BlkMigDevState *bmds;
+ int ret;
+
+ QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+ bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
+ NULL);
+ if (!bmds->dirty_bitmap) {
+ ret = -errno;
+ goto fail;
+ }
+ }
+ return 0;
+
+fail:
+ QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
+ if (bmds->dirty_bitmap) {
+ bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
+ }
+ }
+ return ret;
+}
+
+static void unset_dirty_tracking(void)
{
BlkMigDevState *bmds;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- bdrv_set_dirty_tracking(bmds->bs, enable ? BLOCK_SIZE : 0);
+ bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
}
}
bmds->completed_sectors = 0;
bmds->shared_base = block_mig_state.shared_base;
alloc_aio_bitmap(bmds);
- drive_get_ref(drive_get_by_blockdev(bs));
- bdrv_set_in_use(bs, 1);
+ error_setg(&bmds->blocker, "block device is in use by migration");
+ bdrv_op_block_all(bs, bmds->blocker);
+ bdrv_ref(bs);
block_mig_state.total_sector_sum += sectors;
block_mig_state.total_sector_sum = 0;
block_mig_state.prev_progress = -1;
block_mig_state.bulk_completed = 0;
+ block_mig_state.zero_blocks = migrate_zero_blocks();
bdrv_iterate(init_blk_migration_it, NULL);
}
+/* Called with no lock taken. */
+
static int blk_mig_save_bulked_block(QEMUFile *f)
{
int64_t completed_sector_sum = 0;
}
}
+/* Called with iothread lock taken. */
+
static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
int is_async)
{
} else {
blk_mig_unlock();
}
- if (bdrv_get_dirty(bmds->bs, sector)) {
+ if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
nr_sectors = total_sectors - sector;
return ret;
}
-/* return value:
+/* Called with iothread lock taken.
+ *
+ * return value:
* 0: too much data for max_downtime
* 1: few enough data for max_downtime
*/
return ret;
}
+/* Called with no locks taken. */
+
static int flush_blks(QEMUFile *f)
{
BlkMigBlock *blk;
return ret;
}
+/* Called with iothread lock taken. */
+
static int64_t get_remaining_dirty(void)
{
BlkMigDevState *bmds;
int64_t dirty = 0;
QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- dirty += bdrv_get_dirty_count(bmds->bs);
+ dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
}
return dirty << BDRV_SECTOR_BITS;
}
+/* Called with iothread lock taken. */
+
static void blk_mig_cleanup(void)
{
BlkMigDevState *bmds;
bdrv_drain_all();
- set_dirty_tracking(0);
+ unset_dirty_tracking();
blk_mig_lock();
while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
- bdrv_set_in_use(bmds->bs, 0);
- drive_put_ref(drive_get_by_blockdev(bmds->bs));
+ bdrv_op_unblock_all(bmds->bs, bmds->blocker);
+ error_free(bmds->blocker);
+ bdrv_unref(bmds->bs);
g_free(bmds->aio_bitmap);
g_free(bmds);
}
DPRINTF("Enter save live setup submitted %d transferred %d\n",
block_mig_state.submitted, block_mig_state.transferred);
+ qemu_mutex_lock_iothread();
init_blk_migration(f);
/* start track dirty blocks */
- set_dirty_tracking(1);
+ ret = set_dirty_tracking();
+
+ if (ret) {
+ qemu_mutex_unlock_iothread();
+ return ret;
+ }
+
+ qemu_mutex_unlock_iothread();
ret = flush_blks(f);
blk_mig_reset_dirty_cursor();
}
ret = 0;
} else {
+ /* Always called with iothread lock taken for
+ * simplicity, block_save_complete also calls it.
+ */
+ qemu_mutex_lock_iothread();
ret = blk_mig_save_dirty_block(f, 1);
+ qemu_mutex_unlock_iothread();
}
if (ret < 0) {
return ret;
return qemu_ftell(f) - last_ftell;
}
+/* Called with iothread lock taken. */
+
static int block_save_complete(QEMUFile *f, void *opaque)
{
int ret;
/* Estimate pending number of bytes to send */
uint64_t pending;
+ qemu_mutex_lock_iothread();
blk_mig_lock();
pending = get_remaining_dirty() +
block_mig_state.submitted * BLOCK_SIZE +
pending = BLOCK_SIZE;
}
blk_mig_unlock();
+ qemu_mutex_unlock_iothread();
DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
return pending;
nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
}
- buf = g_malloc(BLOCK_SIZE);
-
- qemu_get_buffer(f, buf, BLOCK_SIZE);
- ret = bdrv_write(bs, addr, buf, nr_sectors);
+ if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
+ ret = bdrv_write_zeroes(bs, addr, nr_sectors,
+ BDRV_REQ_MAY_UNMAP);
+ } else {
+ buf = g_malloc(BLOCK_SIZE);
+ qemu_get_buffer(f, buf, BLOCK_SIZE);
+ ret = bdrv_write(bs, addr, buf, nr_sectors);
+ g_free(buf);
+ }
- g_free(buf);
if (ret < 0) {
return ret;
}