X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/6b19a7d91c8de9904c67b87203a46e55db4181ab..0781c1ed1cbe1361b45f8fddfc85d202a517a88c:/migration/migration.c diff --git a/migration/migration.c b/migration/migration.c index 76153914d1..44cbfb0ddd 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -31,7 +31,6 @@ #include "migration/vmstate.h" #include "block/block.h" #include "qapi/qmp/qerror.h" -#include "qapi/util.h" #include "qemu/rcu.h" #include "block.h" #include "postcopy-ram.h" @@ -72,12 +71,14 @@ #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10 /* Migration XBZRLE default cache size */ -#define DEFAULT_MIGRATE_CACHE_SIZE (64 * 1024 * 1024) +#define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024) /* The delay time (in ms) between two COLO checkpoints * Note: Please change this default value to 10000 when we support hybrid mode. */ #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY 200 +#define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2 +#define DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT 16 static NotifierList migration_state_notifiers = NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); @@ -103,6 +104,9 @@ enum mig_rp_message_type { static MigrationState *current_migration; static bool migration_object_check(MigrationState *ms, Error **errp); +static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state); void migration_object_init(void) { @@ -128,6 +132,11 @@ void migration_object_init(void) } } +void migration_object_finalize(void) +{ + object_unref(OBJECT(current_migration)); +} + /* For outgoing */ MigrationState *migrate_get_current(void) { @@ -167,7 +176,7 @@ void migration_incoming_state_destroy(void) mis->from_src_file = NULL; } - qemu_event_destroy(&mis->main_thread_load_event); + qemu_event_reset(&mis->main_thread_load_event); } static void migrate_generate_event(int new_state) @@ -280,6 +289,10 @@ static void process_incoming_migration_bh(void *opaque) */ qemu_announce_self(); + if (multifd_load_cleanup(&local_err) != 0) { + error_report_err(local_err); + autostart = false; + } /* If global state section was not received or we are in running state, we need to obey autostart. Any other state is set with runstate_set. */ @@ -307,17 +320,16 @@ static void process_incoming_migration_bh(void *opaque) static void process_incoming_migration_co(void *opaque) { - QEMUFile *f = opaque; MigrationIncomingState *mis = migration_incoming_get_current(); PostcopyState ps; int ret; - mis->from_src_file = f; + assert(mis->from_src_file); mis->largest_page_size = qemu_ram_pagesize_largest(); postcopy_state_set(POSTCOPY_INCOMING_NONE); migrate_set_state(&mis->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_ACTIVE); - ret = qemu_loadvm_state(f); + ret = qemu_loadvm_state(mis->from_src_file); ps = postcopy_state_get(); trace_process_incoming_migration_co_end(ret, ps); @@ -353,24 +365,71 @@ static void process_incoming_migration_co(void *opaque) } if (ret < 0) { + Error *local_err = NULL; + migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE, MIGRATION_STATUS_FAILED); error_report("load of migration failed: %s", strerror(-ret)); qemu_fclose(mis->from_src_file); + if (multifd_load_cleanup(&local_err) != 0) { + error_report_err(local_err); + } exit(EXIT_FAILURE); } mis->bh = qemu_bh_new(process_incoming_migration_bh, mis); qemu_bh_schedule(mis->bh); } -void migration_fd_process_incoming(QEMUFile *f) +static void migration_incoming_setup(QEMUFile *f) { - Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, f); + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (multifd_load_setup() != 0) { + /* We haven't been able to create multifd threads + nothing better to do */ + exit(EXIT_FAILURE); + } + if (!mis->from_src_file) { + mis->from_src_file = f; + } qemu_file_set_blocking(f, false); +} + +static void migration_incoming_process(void) +{ + Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL); qemu_coroutine_enter(co); } +void migration_fd_process_incoming(QEMUFile *f) +{ + migration_incoming_setup(f); + migration_incoming_process(); +} + +void migration_ioc_process_incoming(QIOChannel *ioc) +{ + MigrationIncomingState *mis = migration_incoming_get_current(); + + if (!mis->from_src_file) { + QEMUFile *f = qemu_fopen_channel_input(ioc); + migration_fd_process_incoming(f); + } + /* We still only have a single channel. Nothing to do here yet */ +} + +/** + * @migration_has_all_channels: We have received all channels that we need + * + * Returns true when we have got connections to all the channels that + * we need for migration. + */ +bool migration_has_all_channels(void) +{ + return true; +} + /* * Send a 'SHUT' message on the return channel with the given value * to indicate that we've finished with the RP. Non-0 value indicates @@ -433,6 +492,7 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) MigrationParameters *params; MigrationState *s = migrate_get_current(); + /* TODO use QAPI_CLONE() instead of duplicating it inline */ params = g_malloc0(sizeof(*params)); params->has_compress_level = true; params->compress_level = s->parameters.compress_level; @@ -444,9 +504,9 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->cpu_throttle_initial = s->parameters.cpu_throttle_initial; params->has_cpu_throttle_increment = true; params->cpu_throttle_increment = s->parameters.cpu_throttle_increment; - params->has_tls_creds = !!s->parameters.tls_creds; + params->has_tls_creds = true; params->tls_creds = g_strdup(s->parameters.tls_creds); - params->has_tls_hostname = !!s->parameters.tls_hostname; + params->has_tls_hostname = true; params->tls_hostname = g_strdup(s->parameters.tls_hostname); params->has_max_bandwidth = true; params->max_bandwidth = s->parameters.max_bandwidth; @@ -456,6 +516,12 @@ MigrationParameters *qmp_query_migrate_parameters(Error **errp) params->x_checkpoint_delay = s->parameters.x_checkpoint_delay; params->has_block_incremental = true; params->block_incremental = s->parameters.block_incremental; + params->has_x_multifd_channels = true; + params->x_multifd_channels = s->parameters.x_multifd_channels; + params->has_x_multifd_page_count = true; + params->x_multifd_page_count = s->parameters.x_multifd_page_count; + params->has_xbzrle_cache_size = true; + params->xbzrle_cache_size = s->parameters.xbzrle_cache_size; return params; } @@ -470,6 +536,8 @@ static bool migration_is_setup_or_active(int state) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_SETUP: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: return true; default: @@ -544,11 +612,13 @@ MigrationInfo *qmp_query_migrate(Error **errp) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_CANCELLING: case MIGRATION_STATUS_POSTCOPY_ACTIVE: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: /* TODO add some postcopy stats */ info->has_status = true; info->has_total_time = true; info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - - s->total_time; + - s->start_time; info->has_expected_downtime = true; info->expected_downtime = s->expected_downtime; info->has_setup_time = true; @@ -603,6 +673,7 @@ static bool migrate_caps_check(bool *cap_list, { MigrationCapabilityStatusList *cap; bool old_postcopy_cap; + MigrationIncomingState *mis = migration_incoming_get_current(); old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]; @@ -636,7 +707,7 @@ static bool migrate_caps_check(bool *cap_list, * special support. */ if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) && - !postcopy_ram_supported_by_host()) { + !postcopy_ram_supported_by_host(mis)) { /* postcopy_ram_supported_by_host will have emitted a more * detailed message */ @@ -675,22 +746,20 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, static bool migrate_params_check(MigrationParameters *params, Error **errp) { if (params->has_compress_level && - (params->compress_level < 0 || params->compress_level > 9)) { + (params->compress_level > 9)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level", "is invalid, it should be in the range of 0 to 9"); return false; } - if (params->has_compress_threads && - (params->compress_threads < 1 || params->compress_threads > 255)) { + if (params->has_compress_threads && (params->compress_threads < 1)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_threads", "is invalid, it should be in the range of 1 to 255"); return false; } - if (params->has_decompress_threads && - (params->decompress_threads < 1 || params->decompress_threads > 255)) { + if (params->has_decompress_threads && (params->decompress_threads < 1)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "decompress_threads", "is invalid, it should be in the range of 1 to 255"); @@ -715,36 +784,119 @@ static bool migrate_params_check(MigrationParameters *params, Error **errp) return false; } - if (params->has_max_bandwidth && - (params->max_bandwidth < 0 || params->max_bandwidth > SIZE_MAX)) { + if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) { error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the" " range of 0 to %zu bytes/second", SIZE_MAX); return false; } if (params->has_downtime_limit && - (params->downtime_limit < 0 || - params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { + (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) { error_setg(errp, "Parameter 'downtime_limit' expects an integer in " "the range of 0 to %d milliseconds", MAX_MIGRATE_DOWNTIME); return false; } - if (params->has_x_checkpoint_delay && (params->x_checkpoint_delay < 0)) { + /* x_checkpoint_delay is now always positive */ + + if (params->has_x_multifd_channels && (params->x_multifd_channels < 1)) { error_setg(errp, QERR_INVALID_PARAMETER_VALUE, - "x_checkpoint_delay", - "is invalid, it should be positive"); + "multifd_channels", + "is invalid, it should be in the range of 1 to 255"); + return false; + } + if (params->has_x_multifd_page_count && + (params->x_multifd_page_count < 1 || + params->x_multifd_page_count > 10000)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "multifd_page_count", + "is invalid, it should be in the range of 1 to 10000"); + return false; + } + + if (params->has_xbzrle_cache_size && + (params->xbzrle_cache_size < qemu_target_page_size() || + !is_power_of_2(params->xbzrle_cache_size))) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + "xbzrle_cache_size", + "is invalid, it should be bigger than target page size" + " and a power of two"); return false; } return true; } -static void migrate_params_apply(MigrationParameters *params) +static void migrate_params_test_apply(MigrateSetParameters *params, + MigrationParameters *dest) +{ + *dest = migrate_get_current()->parameters; + + /* TODO use QAPI_CLONE() instead of duplicating it inline */ + + if (params->has_compress_level) { + dest->compress_level = params->compress_level; + } + + if (params->has_compress_threads) { + dest->compress_threads = params->compress_threads; + } + + if (params->has_decompress_threads) { + dest->decompress_threads = params->decompress_threads; + } + + if (params->has_cpu_throttle_initial) { + dest->cpu_throttle_initial = params->cpu_throttle_initial; + } + + if (params->has_cpu_throttle_increment) { + dest->cpu_throttle_increment = params->cpu_throttle_increment; + } + + if (params->has_tls_creds) { + assert(params->tls_creds->type == QTYPE_QSTRING); + dest->tls_creds = g_strdup(params->tls_creds->u.s); + } + + if (params->has_tls_hostname) { + assert(params->tls_hostname->type == QTYPE_QSTRING); + dest->tls_hostname = g_strdup(params->tls_hostname->u.s); + } + + if (params->has_max_bandwidth) { + dest->max_bandwidth = params->max_bandwidth; + } + + if (params->has_downtime_limit) { + dest->downtime_limit = params->downtime_limit; + } + + if (params->has_x_checkpoint_delay) { + dest->x_checkpoint_delay = params->x_checkpoint_delay; + } + + if (params->has_block_incremental) { + dest->block_incremental = params->block_incremental; + } + if (params->has_x_multifd_channels) { + dest->x_multifd_channels = params->x_multifd_channels; + } + if (params->has_x_multifd_page_count) { + dest->x_multifd_page_count = params->x_multifd_page_count; + } + if (params->has_xbzrle_cache_size) { + dest->xbzrle_cache_size = params->xbzrle_cache_size; + } +} + +static void migrate_params_apply(MigrateSetParameters *params, Error **errp) { MigrationState *s = migrate_get_current(); + /* TODO use QAPI_CLONE() instead of duplicating it inline */ + if (params->has_compress_level) { s->parameters.compress_level = params->compress_level; } @@ -767,12 +919,14 @@ static void migrate_params_apply(MigrationParameters *params) if (params->has_tls_creds) { g_free(s->parameters.tls_creds); - s->parameters.tls_creds = g_strdup(params->tls_creds); + assert(params->tls_creds->type == QTYPE_QSTRING); + s->parameters.tls_creds = g_strdup(params->tls_creds->u.s); } if (params->has_tls_hostname) { g_free(s->parameters.tls_hostname); - s->parameters.tls_hostname = g_strdup(params->tls_hostname); + assert(params->tls_hostname->type == QTYPE_QSTRING); + s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s); } if (params->has_max_bandwidth) { @@ -797,16 +951,45 @@ static void migrate_params_apply(MigrationParameters *params) if (params->has_block_incremental) { s->parameters.block_incremental = params->block_incremental; } + if (params->has_x_multifd_channels) { + s->parameters.x_multifd_channels = params->x_multifd_channels; + } + if (params->has_x_multifd_page_count) { + s->parameters.x_multifd_page_count = params->x_multifd_page_count; + } + if (params->has_xbzrle_cache_size) { + s->parameters.xbzrle_cache_size = params->xbzrle_cache_size; + xbzrle_cache_resize(params->xbzrle_cache_size, errp); + } } -void qmp_migrate_set_parameters(MigrationParameters *params, Error **errp) +void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp) { - if (!migrate_params_check(params, errp)) { + MigrationParameters tmp; + + /* TODO Rewrite "" to null instead */ + if (params->has_tls_creds + && params->tls_creds->type == QTYPE_QNULL) { + QDECREF(params->tls_creds->u.n); + params->tls_creds->type = QTYPE_QSTRING; + params->tls_creds->u.s = strdup(""); + } + /* TODO Rewrite "" to null instead */ + if (params->has_tls_hostname + && params->tls_hostname->type == QTYPE_QNULL) { + QDECREF(params->tls_hostname->u.n); + params->tls_hostname->type = QTYPE_QSTRING; + params->tls_hostname->u.s = strdup(""); + } + + migrate_params_test_apply(params, &tmp); + + if (!migrate_params_check(&tmp, errp)) { /* Invalid parameter */ return; } - migrate_params_apply(params); + migrate_params_apply(params, errp); } @@ -836,8 +1019,9 @@ void qmp_migrate_start_postcopy(Error **errp) void migrate_set_state(int *state, int old_state, int new_state) { + assert(new_state < MIGRATION_STATUS__MAX); if (atomic_cmpxchg(state, old_state, new_state) == old_state) { - trace_migrate_set_state(new_state); + trace_migrate_set_state(MigrationStatus_str(new_state)); migrate_generate_event(new_state); } } @@ -889,7 +1073,11 @@ static void migrate_fd_cleanup(void *opaque) qemu_bh_delete(s->cleanup_bh); s->cleanup_bh = NULL; + qemu_savevm_state_cleanup(); + if (s->to_dst_file) { + Error *local_err = NULL; + trace_migrate_fd_cleanup(); qemu_mutex_unlock_iothread(); if (s->migration_thread_running) { @@ -898,6 +1086,9 @@ static void migrate_fd_cleanup(void *opaque) } qemu_mutex_lock_iothread(); + if (multifd_save_cleanup(&local_err) != 0) { + error_report_err(local_err); + } qemu_fclose(s->to_dst_file); s->to_dst_file = NULL; } @@ -910,21 +1101,30 @@ static void migrate_fd_cleanup(void *opaque) MIGRATION_STATUS_CANCELLED); } + if (s->error) { + /* It is used on info migrate. We can't free it */ + error_report_err(error_copy(s->error)); + } notifier_list_notify(&migration_state_notifiers, s); block_cleanup_parameters(s); } +void migrate_set_error(MigrationState *s, const Error *error) +{ + qemu_mutex_lock(&s->error_mutex); + if (!s->error) { + s->error = error_copy(error); + } + qemu_mutex_unlock(&s->error_mutex); +} + void migrate_fd_error(MigrationState *s, const Error *error) { trace_migrate_fd_error(error_get_pretty(error)); assert(s->to_dst_file == NULL); migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED); - if (!s->error) { - s->error = error_copy(error); - } - notifier_list_notify(&migration_state_notifiers, s); - block_cleanup_parameters(s); + migrate_set_error(s, error); } static void migrate_fd_cancel(MigrationState *s) @@ -943,6 +1143,10 @@ static void migrate_fd_cancel(MigrationState *s) if (!migration_is_setup_or_active(old_state)) { break; } + /* If the migration is paused, kick it out of the pause */ + if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) { + qemu_sem_post(&s->pause_sem); + } migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING); } while (s->state != MIGRATION_STATUS_CANCELLING); @@ -966,7 +1170,6 @@ static void migrate_fd_cancel(MigrationState *s) s->block_inactive = false; } } - block_cleanup_parameters(s); } void add_migration_state_change_notifier(Notifier *notify) @@ -1022,6 +1225,8 @@ bool migration_is_idle(void) case MIGRATION_STATUS_ACTIVE: case MIGRATION_STATUS_POSTCOPY_ACTIVE: case MIGRATION_STATUS_COLO: + case MIGRATION_STATUS_PRE_SWITCHOVER: + case MIGRATION_STATUS_DEVICE: return false; case MIGRATION_STATUS__MAX: g_assert_not_reached(); @@ -1058,7 +1263,11 @@ MigrationState *migrate_init(void) migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); - s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + s->total_time = 0; + s->vm_was_running = false; + s->iteration_initial_bytes = 0; + s->threshold_size = 0; return s; } @@ -1201,33 +1410,25 @@ void qmp_migrate_cancel(Error **errp) migrate_fd_cancel(migrate_get_current()); } -void qmp_migrate_set_cache_size(int64_t value, Error **errp) +void qmp_migrate_continue(MigrationStatus state, Error **errp) { MigrationState *s = migrate_get_current(); - int64_t new_size; - - /* Check for truncation */ - if (value != (size_t)value) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "exceeding address space"); - return; - } - - /* Cache should not be larger than guest ram size */ - if (value > ram_bytes_total()) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "exceeds guest ram size "); + if (s->state != state) { + error_setg(errp, "Migration not in expected state: %s", + MigrationStatus_str(s->state)); return; } + qemu_sem_post(&s->pause_sem); +} - new_size = xbzrle_cache_resize(value); - if (new_size < 0) { - error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size", - "is smaller than page size"); - return; - } +void qmp_migrate_set_cache_size(int64_t value, Error **errp) +{ + MigrateSetParameters p = { + .has_xbzrle_cache_size = true, + .xbzrle_cache_size = value, + }; - s->xbzrle_cache_size = new_size; + qmp_migrate_set_parameters(&p, errp); } int64_t qmp_query_migrate_cache_size(Error **errp) @@ -1237,7 +1438,7 @@ int64_t qmp_query_migrate_cache_size(Error **errp) void qmp_migrate_set_speed(int64_t value, Error **errp) { - MigrationParameters p = { + MigrateSetParameters p = { .has_max_bandwidth = true, .max_bandwidth = value, }; @@ -1257,7 +1458,7 @@ void qmp_migrate_set_downtime(double value, Error **errp) value *= 1000; /* Convert to milliseconds */ value = MAX(0, MIN(INT64_MAX, value)); - MigrationParameters p = { + MigrateSetParameters p = { .has_downtime_limit = true, .downtime_limit = value, }; @@ -1283,6 +1484,11 @@ bool migrate_postcopy_ram(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM]; } +bool migrate_postcopy(void) +{ + return migrate_postcopy_ram(); +} + bool migrate_auto_converge(void) { MigrationState *s; @@ -1346,6 +1552,43 @@ bool migrate_use_events(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS]; } +bool migrate_use_multifd(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD]; +} + +bool migrate_pause_before_switchover(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->enabled_capabilities[ + MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER]; +} + +int migrate_multifd_channels(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.x_multifd_channels; +} + +int migrate_multifd_page_count(void) +{ + MigrationState *s; + + s = migrate_get_current(); + + return s->parameters.x_multifd_page_count; +} + int migrate_use_xbzrle(void) { MigrationState *s; @@ -1361,7 +1604,7 @@ int64_t migrate_xbzrle_cache_size(void) s = migrate_get_current(); - return s->xbzrle_cache_size; + return s->parameters.xbzrle_cache_size; } bool migrate_use_block(void) @@ -1599,28 +1842,36 @@ static int await_return_path_close_on_source(MigrationState *ms) * Switch from normal iteration to postcopy * Returns non-0 on error */ -static int postcopy_start(MigrationState *ms, bool *old_vm_running) +static int postcopy_start(MigrationState *ms) { int ret; QIOChannelBuffer *bioc; QEMUFile *fb; int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); bool restart_block = false; - migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, - MIGRATION_STATUS_POSTCOPY_ACTIVE); + int cur_state = MIGRATION_STATUS_ACTIVE; + if (!migrate_pause_before_switchover()) { + migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + } trace_postcopy_start(); qemu_mutex_lock_iothread(); trace_postcopy_start_set_run(); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); - *old_vm_running = runstate_is_running(); global_state_store(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); if (ret < 0) { goto fail; } + ret = migration_maybe_pause(ms, &cur_state, + MIGRATION_STATUS_POSTCOPY_ACTIVE); + if (ret < 0) { + goto fail; + } + ret = bdrv_inactivate_all(); if (ret < 0) { goto fail; @@ -1639,9 +1890,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) * need to tell the destination to throw any pages it's already received * that are dirty */ - if (ram_postcopy_send_discard_bitmap(ms)) { - error_report("postcopy send discard bitmap failed"); - goto fail; + if (migrate_postcopy_ram()) { + if (ram_postcopy_send_discard_bitmap(ms)) { + error_report("postcopy send discard bitmap failed"); + goto fail; + } } /* @@ -1650,8 +1903,10 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) * wrap their state up here */ qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX); - /* Ping just for debugging, helps line traces up */ - qemu_savevm_send_ping(ms->to_dst_file, 2); + if (migrate_postcopy_ram()) { + /* Ping just for debugging, helps line traces up */ + qemu_savevm_send_ping(ms->to_dst_file, 2); + } /* * While loading the device state we may trigger page transfer @@ -1676,7 +1931,9 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) qemu_savevm_send_postcopy_listen(fb); qemu_savevm_state_complete_precopy(fb, false, false); - qemu_savevm_send_ping(fb, 3); + if (migrate_postcopy_ram()) { + qemu_savevm_send_ping(fb, 3); + } qemu_savevm_send_postcopy_run(fb); @@ -1711,11 +1968,13 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running) qemu_mutex_unlock_iothread(); - /* - * Although this ping is just for debug, it could potentially be - * used for getting a better measurement of downtime at the source. - */ - qemu_savevm_send_ping(ms->to_dst_file, 4); + if (migrate_postcopy_ram()) { + /* + * Although this ping is just for debug, it could potentially be + * used for getting a better measurement of downtime at the source. + */ + qemu_savevm_send_ping(ms->to_dst_file, 4); + } if (migrate_release_ram()) { ram_postcopy_migrated_memory_release(ms); @@ -1750,31 +2009,66 @@ fail: return -1; } +/** + * migration_maybe_pause: Pause if required to by + * migrate_pause_before_switchover called with the iothread locked + * Returns: 0 on success + */ +static int migration_maybe_pause(MigrationState *s, + int *current_active_state, + int new_state) +{ + if (!migrate_pause_before_switchover()) { + return 0; + } + + /* Since leaving this state is not atomic with posting the semaphore + * it's possible that someone could have issued multiple migrate_continue + * and the semaphore is incorrectly positive at this point; + * the docs say it's undefined to reinit a semaphore that's already + * init'd, so use timedwait to eat up any existing posts. + */ + while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) { + /* This block intentionally left blank */ + } + + qemu_mutex_unlock_iothread(); + migrate_set_state(&s->state, *current_active_state, + MIGRATION_STATUS_PRE_SWITCHOVER); + qemu_sem_wait(&s->pause_sem); + migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER, + new_state); + *current_active_state = new_state; + qemu_mutex_lock_iothread(); + + return s->state == new_state ? 0 : -EINVAL; +} + /** * migration_completion: Used by migration_thread when there's not much left. * The caller 'breaks' the loop when this returns. * * @s: Current migration state - * @current_active_state: The migration state we expect to be in - * @*old_vm_running: Pointer to old_vm_running flag - * @*start_time: Pointer to time to update */ -static void migration_completion(MigrationState *s, int current_active_state, - bool *old_vm_running, - int64_t *start_time) +static void migration_completion(MigrationState *s) { int ret; + int current_active_state = s->state; if (s->state == MIGRATION_STATUS_ACTIVE) { qemu_mutex_lock_iothread(); - *start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); - *old_vm_running = runstate_is_running(); + s->vm_was_running = runstate_is_running(); ret = global_state_store(); if (!ret) { bool inactivate = !migrate_colo_enabled(); ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + if (ret >= 0) { + ret = migration_maybe_pause(s, ¤t_active_state, + MIGRATION_STATUS_DEVICE); + } if (ret >= 0) { qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX); ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false, @@ -1852,6 +2146,154 @@ bool migrate_colo_enabled(void) return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO]; } +static void migration_calculate_complete(MigrationState *s) +{ + uint64_t bytes = qemu_ftell(s->to_dst_file); + int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + + s->total_time = end_time - s->start_time; + if (!s->downtime) { + /* + * It's still not set, so we are precopy migration. For + * postcopy, downtime is calculated during postcopy_start(). + */ + s->downtime = end_time - s->downtime_start; + } + + if (s->total_time) { + s->mbps = ((double) bytes * 8.0) / s->total_time / 1000; + } +} + +static void migration_update_counters(MigrationState *s, + int64_t current_time) +{ + uint64_t transferred, time_spent; + double bandwidth; + + if (current_time < s->iteration_start_time + BUFFER_DELAY) { + return; + } + + transferred = qemu_ftell(s->to_dst_file) - s->iteration_initial_bytes; + time_spent = current_time - s->iteration_start_time; + bandwidth = (double)transferred / time_spent; + s->threshold_size = bandwidth * s->parameters.downtime_limit; + + s->mbps = (((double) transferred * 8.0) / + ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; + + /* + * if we haven't sent anything, we don't want to + * recalculate. 10000 is a small enough number for our purposes + */ + if (ram_counters.dirty_pages_rate && transferred > 10000) { + s->expected_downtime = ram_counters.dirty_pages_rate * + qemu_target_page_size() / bandwidth; + } + + qemu_file_reset_rate_limit(s->to_dst_file); + + s->iteration_start_time = current_time; + s->iteration_initial_bytes = qemu_ftell(s->to_dst_file); + + trace_migrate_transferred(transferred, time_spent, + bandwidth, s->threshold_size); +} + +/* Migration thread iteration status */ +typedef enum { + MIG_ITERATE_RESUME, /* Resume current iteration */ + MIG_ITERATE_SKIP, /* Skip current iteration */ + MIG_ITERATE_BREAK, /* Break the loop */ +} MigIterateState; + +/* + * Return true if continue to the next iteration directly, false + * otherwise. + */ +static MigIterateState migration_iteration_run(MigrationState *s) +{ + uint64_t pending_size, pend_post, pend_nonpost; + bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE; + + qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, + &pend_nonpost, &pend_post); + pending_size = pend_nonpost + pend_post; + + trace_migrate_pending(pending_size, s->threshold_size, + pend_post, pend_nonpost); + + if (pending_size && pending_size >= s->threshold_size) { + /* Still a significant amount to transfer */ + if (migrate_postcopy() && !in_postcopy && + pend_nonpost <= s->threshold_size && + atomic_read(&s->start_postcopy)) { + if (postcopy_start(s)) { + error_report("%s: postcopy failed to start", __func__); + } + return MIG_ITERATE_SKIP; + } + /* Just another iteration step */ + qemu_savevm_state_iterate(s->to_dst_file, + s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE); + } else { + trace_migration_thread_low_pending(pending_size); + migration_completion(s); + return MIG_ITERATE_BREAK; + } + + return MIG_ITERATE_RESUME; +} + +static void migration_iteration_finish(MigrationState *s) +{ + /* If we enabled cpu throttling for auto-converge, turn it off. */ + cpu_throttle_stop(); + + qemu_mutex_lock_iothread(); + switch (s->state) { + case MIGRATION_STATUS_COMPLETED: + migration_calculate_complete(s); + runstate_set(RUN_STATE_POSTMIGRATE); + break; + + case MIGRATION_STATUS_ACTIVE: + /* + * We should really assert here, but since it's during + * migration, let's try to reduce the usage of assertions. + */ + if (!migrate_colo_enabled()) { + error_report("%s: critical error: calling COLO code without " + "COLO enabled", __func__); + } + migrate_start_colo_process(s); + /* + * Fixme: we will run VM in COLO no matter its old running state. + * After exited COLO, we will keep running. + */ + s->vm_was_running = true; + /* Fallthrough */ + case MIGRATION_STATUS_FAILED: + case MIGRATION_STATUS_CANCELLED: + if (s->vm_was_running) { + vm_start(); + } else { + if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { + runstate_set(RUN_STATE_POSTMIGRATE); + } + } + break; + + default: + /* Should not reach here, but if so, forgive the VM. */ + error_report("%s: Unknown ending state %d", __func__, s->state); + break; + } + qemu_bh_schedule(s->cleanup_bh); + qemu_mutex_unlock_iothread(); +} + /* * Master migration thread on the source VM. * It drives the migration and pumps the data down the outgoing channel. @@ -1859,26 +2301,12 @@ bool migrate_colo_enabled(void) static void *migration_thread(void *opaque) { MigrationState *s = opaque; - /* Used by the bandwidth calcs, updated later */ - int64_t initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST); - int64_t initial_bytes = 0; - /* - * The final stage happens when the remaining data is smaller than - * this threshold; it's calculated from the requested downtime and - * measured bandwidth - */ - int64_t threshold_size = 0; - int64_t start_time = initial_time; - int64_t end_time; - bool old_vm_running = false; - bool entered_postcopy = false; - /* The active state we expect to be in; ACTIVE or POSTCOPY_ACTIVE */ - enum MigrationStatus current_active_state = MIGRATION_STATUS_ACTIVE; - bool enable_colo = migrate_colo_enabled(); rcu_register_thread(); + s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); + qemu_savevm_state_header(s->to_dst_file); /* @@ -1893,7 +2321,7 @@ static void *migration_thread(void *opaque) qemu_savevm_send_ping(s->to_dst_file, 1); } - if (migrate_postcopy_ram()) { + if (migrate_postcopy()) { /* * Tell the destination that we *might* want to do postcopy later; * if the other end can't do postcopy it should fail now, nice and @@ -1913,130 +2341,51 @@ static void *migration_thread(void *opaque) while (s->state == MIGRATION_STATUS_ACTIVE || s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) { int64_t current_time; - uint64_t pending_size; if (!qemu_file_rate_limit(s->to_dst_file)) { - uint64_t pend_post, pend_nonpost; - - qemu_savevm_state_pending(s->to_dst_file, threshold_size, - &pend_nonpost, &pend_post); - pending_size = pend_nonpost + pend_post; - trace_migrate_pending(pending_size, threshold_size, - pend_post, pend_nonpost); - if (pending_size && pending_size >= threshold_size) { - /* Still a significant amount to transfer */ - - if (migrate_postcopy_ram() && - s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE && - pend_nonpost <= threshold_size && - atomic_read(&s->start_postcopy)) { - - if (!postcopy_start(s, &old_vm_running)) { - current_active_state = MIGRATION_STATUS_POSTCOPY_ACTIVE; - entered_postcopy = true; - } - - continue; - } - /* Just another iteration step */ - qemu_savevm_state_iterate(s->to_dst_file, entered_postcopy); - } else { - trace_migration_thread_low_pending(pending_size); - migration_completion(s, current_active_state, - &old_vm_running, &start_time); + MigIterateState iter_state = migration_iteration_run(s); + if (iter_state == MIG_ITERATE_SKIP) { + continue; + } else if (iter_state == MIG_ITERATE_BREAK) { break; } } if (qemu_file_get_error(s->to_dst_file)) { - migrate_set_state(&s->state, current_active_state, - MIGRATION_STATUS_FAILED); + if (migration_is_setup_or_active(s->state)) { + migrate_set_state(&s->state, s->state, + MIGRATION_STATUS_FAILED); + } trace_migration_thread_file_err(); break; } + current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - if (current_time >= initial_time + BUFFER_DELAY) { - uint64_t transferred_bytes = qemu_ftell(s->to_dst_file) - - initial_bytes; - uint64_t time_spent = current_time - initial_time; - double bandwidth = (double)transferred_bytes / time_spent; - threshold_size = bandwidth * s->parameters.downtime_limit; - - s->mbps = (((double) transferred_bytes * 8.0) / - ((double) time_spent / 1000.0)) / 1000.0 / 1000.0; - - trace_migrate_transferred(transferred_bytes, time_spent, - bandwidth, threshold_size); - /* if we haven't sent anything, we don't want to recalculate - 10000 is a small enough number for our purposes */ - if (ram_counters.dirty_pages_rate && transferred_bytes > 10000) { - s->expected_downtime = ram_counters.dirty_pages_rate * - qemu_target_page_size() / bandwidth; - } - qemu_file_reset_rate_limit(s->to_dst_file); - initial_time = current_time; - initial_bytes = qemu_ftell(s->to_dst_file); - } + migration_update_counters(s, current_time); + if (qemu_file_rate_limit(s->to_dst_file)) { /* usleep expects microseconds */ - g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); + g_usleep((s->iteration_start_time + BUFFER_DELAY - + current_time) * 1000); } } trace_migration_thread_after_loop(); - /* If we enabled cpu throttling for auto-converge, turn it off. */ - cpu_throttle_stop(); - end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME); - - qemu_mutex_lock_iothread(); - /* - * The resource has been allocated by migration will be reused in COLO - * process, so don't release them. - */ - if (!enable_colo) { - qemu_savevm_state_cleanup(); - } - if (s->state == MIGRATION_STATUS_COMPLETED) { - uint64_t transferred_bytes = qemu_ftell(s->to_dst_file); - s->total_time = end_time - s->total_time; - if (!entered_postcopy) { - s->downtime = end_time - start_time; - } - if (s->total_time) { - s->mbps = (((double) transferred_bytes * 8.0) / - ((double) s->total_time)) / 1000; - } - runstate_set(RUN_STATE_POSTMIGRATE); - } else { - if (s->state == MIGRATION_STATUS_ACTIVE && enable_colo) { - migrate_start_colo_process(s); - qemu_savevm_state_cleanup(); - /* - * Fixme: we will run VM in COLO no matter its old running state. - * After exited COLO, we will keep running. - */ - old_vm_running = true; - } - if (old_vm_running && !entered_postcopy) { - vm_start(); - } else { - if (runstate_check(RUN_STATE_FINISH_MIGRATE)) { - runstate_set(RUN_STATE_POSTMIGRATE); - } - } - } - qemu_bh_schedule(s->cleanup_bh); - qemu_mutex_unlock_iothread(); - + migration_iteration_finish(s); rcu_unregister_thread(); return NULL; } -void migrate_fd_connect(MigrationState *s) +void migrate_fd_connect(MigrationState *s, Error *error_in) { s->expected_downtime = s->parameters.downtime_limit; s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); + if (error_in) { + migrate_fd_error(s, error_in); + migrate_fd_cleanup(s); + return; + } qemu_file_set_blocking(s->to_dst_file, true); qemu_file_set_rate_limit(s->to_dst_file, @@ -2060,6 +2409,12 @@ void migrate_fd_connect(MigrationState *s) } } + if (multifd_save_setup() != 0) { + migrate_set_state(&s->state, MIGRATION_STATUS_SETUP, + MIGRATION_STATUS_FAILED); + migrate_fd_cleanup(s); + return; + } qemu_thread_create(&s->thread, "live_migration", migration_thread, s, QEMU_THREAD_JOINABLE); s->migration_thread_running = true; @@ -2069,10 +2424,15 @@ void migration_global_dump(Monitor *mon) { MigrationState *ms = migrate_get_current(); - monitor_printf(mon, "globals: store-global-state=%d, only_migratable=%d, " - "send-configuration=%d, send-section-footer=%d\n", - ms->store_global_state, ms->only_migratable, - ms->send_configuration, ms->send_section_footer); + monitor_printf(mon, "globals:\n"); + monitor_printf(mon, "store-global-state: %s\n", + ms->store_global_state ? "on" : "off"); + monitor_printf(mon, "only-migratable: %s\n", + ms->only_migratable ? "on" : "off"); + monitor_printf(mon, "send-configuration: %s\n", + ms->send_configuration ? "on" : "off"); + monitor_printf(mon, "send-section-footer: %s\n", + ms->send_section_footer ? "on" : "off"); } #define DEFINE_PROP_MIG_CAP(name, x) \ @@ -2088,29 +2448,38 @@ static Property migration_properties[] = { send_section_footer, true), /* Migration parameters */ - DEFINE_PROP_INT64("x-compress-level", MigrationState, + DEFINE_PROP_UINT8("x-compress-level", MigrationState, parameters.compress_level, DEFAULT_MIGRATE_COMPRESS_LEVEL), - DEFINE_PROP_INT64("x-compress-threads", MigrationState, + DEFINE_PROP_UINT8("x-compress-threads", MigrationState, parameters.compress_threads, DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT), - DEFINE_PROP_INT64("x-decompress-threads", MigrationState, + DEFINE_PROP_UINT8("x-decompress-threads", MigrationState, parameters.decompress_threads, DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT), - DEFINE_PROP_INT64("x-cpu-throttle-initial", MigrationState, + DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState, parameters.cpu_throttle_initial, DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL), - DEFINE_PROP_INT64("x-cpu-throttle-increment", MigrationState, + DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState, parameters.cpu_throttle_increment, DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT), - DEFINE_PROP_INT64("x-max-bandwidth", MigrationState, + DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState, parameters.max_bandwidth, MAX_THROTTLE), - DEFINE_PROP_INT64("x-downtime-limit", MigrationState, + DEFINE_PROP_UINT64("x-downtime-limit", MigrationState, parameters.downtime_limit, DEFAULT_MIGRATE_SET_DOWNTIME), - DEFINE_PROP_INT64("x-checkpoint-delay", MigrationState, + DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState, parameters.x_checkpoint_delay, DEFAULT_MIGRATE_X_CHECKPOINT_DELAY), + DEFINE_PROP_UINT8("x-multifd-channels", MigrationState, + parameters.x_multifd_channels, + DEFAULT_MIGRATE_MULTIFD_CHANNELS), + DEFINE_PROP_UINT32("x-multifd-page-count", MigrationState, + parameters.x_multifd_page_count, + DEFAULT_MIGRATE_MULTIFD_PAGE_COUNT), + DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState, + parameters.xbzrle_cache_size, + DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE), /* Migration capabilities */ DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE), @@ -2124,6 +2493,7 @@ static Property migration_properties[] = { DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM), DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK), DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH), + DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_X_MULTIFD), DEFINE_PROP_END_OF_LIST(), }; @@ -2136,14 +2506,26 @@ static void migration_class_init(ObjectClass *klass, void *data) dc->props = migration_properties; } +static void migration_instance_finalize(Object *obj) +{ + MigrationState *ms = MIGRATION_OBJ(obj); + MigrationParameters *params = &ms->parameters; + + qemu_mutex_destroy(&ms->error_mutex); + g_free(params->tls_hostname); + g_free(params->tls_creds); + qemu_sem_destroy(&ms->pause_sem); +} + static void migration_instance_init(Object *obj) { MigrationState *ms = MIGRATION_OBJ(obj); MigrationParameters *params = &ms->parameters; ms->state = MIGRATION_STATUS_NONE; - ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE; ms->mbps = -1; + qemu_sem_init(&ms->pause_sem, 0); + qemu_mutex_init(&ms->error_mutex); params->tls_hostname = g_strdup(""); params->tls_creds = g_strdup(""); @@ -2158,6 +2540,9 @@ static void migration_instance_init(Object *obj) params->has_downtime_limit = true; params->has_x_checkpoint_delay = true; params->has_block_incremental = true; + params->has_x_multifd_channels = true; + params->has_x_multifd_page_count = true; + params->has_xbzrle_cache_size = true; } /* @@ -2204,6 +2589,7 @@ static const TypeInfo migration_type = { .class_size = sizeof(MigrationClass), .instance_size = sizeof(MigrationState), .instance_init = migration_instance_init, + .instance_finalize = migration_instance_finalize, }; static void register_migration_types(void)