]> Git Repo - qemu.git/blob - migration/migration.c
migration: Allow migrate-recover to run multiple times
[qemu.git] / migration / migration.c
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <[email protected]>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "exec.h"
22 #include "fd.h"
23 #include "socket.h"
24 #include "sysemu/runstate.h"
25 #include "sysemu/sysemu.h"
26 #include "sysemu/cpu-throttle.h"
27 #include "rdma.h"
28 #include "ram.h"
29 #include "migration/global_state.h"
30 #include "migration/misc.h"
31 #include "migration.h"
32 #include "savevm.h"
33 #include "qemu-file-channel.h"
34 #include "qemu-file.h"
35 #include "migration/vmstate.h"
36 #include "block/block.h"
37 #include "qapi/error.h"
38 #include "qapi/clone-visitor.h"
39 #include "qapi/qapi-visit-migration.h"
40 #include "qapi/qapi-visit-sockets.h"
41 #include "qapi/qapi-commands-migration.h"
42 #include "qapi/qapi-events-migration.h"
43 #include "qapi/qmp/qerror.h"
44 #include "qapi/qmp/qnull.h"
45 #include "qemu/rcu.h"
46 #include "block.h"
47 #include "postcopy-ram.h"
48 #include "qemu/thread.h"
49 #include "trace.h"
50 #include "exec/target_page.h"
51 #include "io/channel-buffer.h"
52 #include "migration/colo.h"
53 #include "hw/boards.h"
54 #include "hw/qdev-properties.h"
55 #include "hw/qdev-properties-system.h"
56 #include "monitor/monitor.h"
57 #include "net/announce.h"
58 #include "qemu/queue.h"
59 #include "multifd.h"
60 #include "qemu/yank.h"
61 #include "sysemu/cpus.h"
62 #include "yank_functions.h"
63 #include "sysemu/qtest.h"
64
65 #define MAX_THROTTLE  (128 << 20)      /* Migration transfer speed throttling */
66
67 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
68  * data. */
69 #define BUFFER_DELAY     100
70 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
71
72 /* Time in milliseconds we are allowed to stop the source,
73  * for sending the last part */
74 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
75
76 /* Maximum migrate downtime set to 2000 seconds */
77 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
78 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
79
80 /* Default compression thread count */
81 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
82 /* Default decompression thread count, usually decompression is at
83  * least 4 times as fast as compression.*/
84 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
85 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
86 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
87 /* Define default autoconverge cpu throttle migration parameters */
88 #define DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD 50
89 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
90 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
91 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
92
93 /* Migration XBZRLE default cache size */
94 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
95
96 /* The delay time (in ms) between two COLO checkpoints */
97 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
98 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
99 #define DEFAULT_MIGRATE_MULTIFD_COMPRESSION MULTIFD_COMPRESSION_NONE
100 /* 0: means nocompress, 1: best speed, ... 9: best compress ratio */
101 #define DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL 1
102 /* 0: means nocompress, 1: best speed, ... 20: best compress ratio */
103 #define DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL 1
104
105 /* Background transfer rate for postcopy, 0 means unlimited, note
106  * that page requests can still exceed this limit.
107  */
108 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
109
110 /*
111  * Parameters for self_announce_delay giving a stream of RARP/ARP
112  * packets after migration.
113  */
114 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL  50
115 #define DEFAULT_MIGRATE_ANNOUNCE_MAX     550
116 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS    5
117 #define DEFAULT_MIGRATE_ANNOUNCE_STEP    100
118
119 static NotifierList migration_state_notifiers =
120     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
121
122 /* Messages sent on the return path from destination to source */
123 enum mig_rp_message_type {
124     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
125     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
126     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
127
128     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
129     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
130     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
131     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
132
133     MIG_RP_MSG_MAX
134 };
135
136 /* Migration capabilities set */
137 struct MigrateCapsSet {
138     int size;                       /* Capability set size */
139     MigrationCapability caps[];     /* Variadic array of capabilities */
140 };
141 typedef struct MigrateCapsSet MigrateCapsSet;
142
143 /* Define and initialize MigrateCapsSet */
144 #define INITIALIZE_MIGRATE_CAPS_SET(_name, ...)   \
145     MigrateCapsSet _name = {    \
146         .size = sizeof((int []) { __VA_ARGS__ }) / sizeof(int), \
147         .caps = { __VA_ARGS__ } \
148     }
149
150 /* Background-snapshot compatibility check list */
151 static const
152 INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
153     MIGRATION_CAPABILITY_POSTCOPY_RAM,
154     MIGRATION_CAPABILITY_DIRTY_BITMAPS,
155     MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME,
156     MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE,
157     MIGRATION_CAPABILITY_RETURN_PATH,
158     MIGRATION_CAPABILITY_MULTIFD,
159     MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER,
160     MIGRATION_CAPABILITY_AUTO_CONVERGE,
161     MIGRATION_CAPABILITY_RELEASE_RAM,
162     MIGRATION_CAPABILITY_RDMA_PIN_ALL,
163     MIGRATION_CAPABILITY_COMPRESS,
164     MIGRATION_CAPABILITY_XBZRLE,
165     MIGRATION_CAPABILITY_X_COLO,
166     MIGRATION_CAPABILITY_VALIDATE_UUID);
167
168 /* When we add fault tolerance, we could have several
169    migrations at once.  For now we don't need to add
170    dynamic creation of migration */
171
172 static MigrationState *current_migration;
173 static MigrationIncomingState *current_incoming;
174
175 static GSList *migration_blockers;
176
177 static bool migration_object_check(MigrationState *ms, Error **errp);
178 static int migration_maybe_pause(MigrationState *s,
179                                  int *current_active_state,
180                                  int new_state);
181 static void migrate_fd_cancel(MigrationState *s);
182
183 static bool migrate_allow_multi_channels = true;
184
185 void migrate_protocol_allow_multi_channels(bool allow)
186 {
187     migrate_allow_multi_channels = allow;
188 }
189
190 bool migrate_multi_channels_is_allowed(void)
191 {
192     return migrate_allow_multi_channels;
193 }
194
195 static gint page_request_addr_cmp(gconstpointer ap, gconstpointer bp)
196 {
197     uintptr_t a = (uintptr_t) ap, b = (uintptr_t) bp;
198
199     return (a > b) - (a < b);
200 }
201
202 void migration_object_init(void)
203 {
204     /* This can only be called once. */
205     assert(!current_migration);
206     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
207
208     /*
209      * Init the migrate incoming object as well no matter whether
210      * we'll use it or not.
211      */
212     assert(!current_incoming);
213     current_incoming = g_new0(MigrationIncomingState, 1);
214     current_incoming->state = MIGRATION_STATUS_NONE;
215     current_incoming->postcopy_remote_fds =
216         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
217     qemu_mutex_init(&current_incoming->rp_mutex);
218     qemu_event_init(&current_incoming->main_thread_load_event, false);
219     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
220     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
221     qemu_mutex_init(&current_incoming->page_request_mutex);
222     current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
223
224     migration_object_check(current_migration, &error_fatal);
225
226     blk_mig_init();
227     ram_mig_init();
228     dirty_bitmap_mig_init();
229 }
230
231 void migration_cancel(const Error *error)
232 {
233     if (error) {
234         migrate_set_error(current_migration, error);
235     }
236     migrate_fd_cancel(current_migration);
237 }
238
239 void migration_shutdown(void)
240 {
241     /*
242      * When the QEMU main thread exit, the COLO thread
243      * may wait a semaphore. So, we should wakeup the
244      * COLO thread before migration shutdown.
245      */
246     colo_shutdown();
247     /*
248      * Cancel the current migration - that will (eventually)
249      * stop the migration using this structure
250      */
251     migration_cancel(NULL);
252     object_unref(OBJECT(current_migration));
253
254     /*
255      * Cancel outgoing migration of dirty bitmaps. It should
256      * at least unref used block nodes.
257      */
258     dirty_bitmap_mig_cancel_outgoing();
259
260     /*
261      * Cancel incoming migration of dirty bitmaps. Dirty bitmaps
262      * are non-critical data, and their loss never considered as
263      * something serious.
264      */
265     dirty_bitmap_mig_cancel_incoming();
266 }
267
268 /* For outgoing */
269 MigrationState *migrate_get_current(void)
270 {
271     /* This can only be called after the object created. */
272     assert(current_migration);
273     return current_migration;
274 }
275
276 MigrationIncomingState *migration_incoming_get_current(void)
277 {
278     assert(current_incoming);
279     return current_incoming;
280 }
281
282 void migration_incoming_transport_cleanup(MigrationIncomingState *mis)
283 {
284     if (mis->socket_address_list) {
285         qapi_free_SocketAddressList(mis->socket_address_list);
286         mis->socket_address_list = NULL;
287     }
288
289     if (mis->transport_cleanup) {
290         mis->transport_cleanup(mis->transport_data);
291         mis->transport_data = mis->transport_cleanup = NULL;
292     }
293 }
294
295 void migration_incoming_state_destroy(void)
296 {
297     struct MigrationIncomingState *mis = migration_incoming_get_current();
298
299     if (mis->to_src_file) {
300         /* Tell source that we are done */
301         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
302         qemu_fclose(mis->to_src_file);
303         mis->to_src_file = NULL;
304     }
305
306     if (mis->from_src_file) {
307         migration_ioc_unregister_yank_from_file(mis->from_src_file);
308         qemu_fclose(mis->from_src_file);
309         mis->from_src_file = NULL;
310     }
311     if (mis->postcopy_remote_fds) {
312         g_array_free(mis->postcopy_remote_fds, TRUE);
313         mis->postcopy_remote_fds = NULL;
314     }
315
316     migration_incoming_transport_cleanup(mis);
317     qemu_event_reset(&mis->main_thread_load_event);
318
319     if (mis->page_requested) {
320         g_tree_destroy(mis->page_requested);
321         mis->page_requested = NULL;
322     }
323
324     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
325 }
326
327 static void migrate_generate_event(int new_state)
328 {
329     if (migrate_use_events()) {
330         qapi_event_send_migration(new_state);
331     }
332 }
333
334 static bool migrate_late_block_activate(void)
335 {
336     MigrationState *s;
337
338     s = migrate_get_current();
339
340     return s->enabled_capabilities[
341         MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
342 }
343
344 /*
345  * Send a message on the return channel back to the source
346  * of the migration.
347  */
348 static int migrate_send_rp_message(MigrationIncomingState *mis,
349                                    enum mig_rp_message_type message_type,
350                                    uint16_t len, void *data)
351 {
352     int ret = 0;
353
354     trace_migrate_send_rp_message((int)message_type, len);
355     QEMU_LOCK_GUARD(&mis->rp_mutex);
356
357     /*
358      * It's possible that the file handle got lost due to network
359      * failures.
360      */
361     if (!mis->to_src_file) {
362         ret = -EIO;
363         return ret;
364     }
365
366     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
367     qemu_put_be16(mis->to_src_file, len);
368     qemu_put_buffer(mis->to_src_file, data, len);
369     qemu_fflush(mis->to_src_file);
370
371     /* It's possible that qemu file got error during sending */
372     ret = qemu_file_get_error(mis->to_src_file);
373
374     return ret;
375 }
376
377 /* Request one page from the source VM at the given start address.
378  *   rb: the RAMBlock to request the page in
379  *   Start: Address offset within the RB
380  *   Len: Length in bytes required - must be a multiple of pagesize
381  */
382 int migrate_send_rp_message_req_pages(MigrationIncomingState *mis,
383                                       RAMBlock *rb, ram_addr_t start)
384 {
385     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
386     size_t msglen = 12; /* start + len */
387     size_t len = qemu_ram_pagesize(rb);
388     enum mig_rp_message_type msg_type;
389     const char *rbname;
390     int rbname_len;
391
392     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
393     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
394
395     /*
396      * We maintain the last ramblock that we requested for page.  Note that we
397      * don't need locking because this function will only be called within the
398      * postcopy ram fault thread.
399      */
400     if (rb != mis->last_rb) {
401         mis->last_rb = rb;
402
403         rbname = qemu_ram_get_idstr(rb);
404         rbname_len = strlen(rbname);
405
406         assert(rbname_len < 256);
407
408         bufc[msglen++] = rbname_len;
409         memcpy(bufc + msglen, rbname, rbname_len);
410         msglen += rbname_len;
411         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
412     } else {
413         msg_type = MIG_RP_MSG_REQ_PAGES;
414     }
415
416     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
417 }
418
419 int migrate_send_rp_req_pages(MigrationIncomingState *mis,
420                               RAMBlock *rb, ram_addr_t start, uint64_t haddr)
421 {
422     void *aligned = (void *)(uintptr_t)ROUND_DOWN(haddr, qemu_ram_pagesize(rb));
423     bool received = false;
424
425     WITH_QEMU_LOCK_GUARD(&mis->page_request_mutex) {
426         received = ramblock_recv_bitmap_test_byte_offset(rb, start);
427         if (!received && !g_tree_lookup(mis->page_requested, aligned)) {
428             /*
429              * The page has not been received, and it's not yet in the page
430              * request list.  Queue it.  Set the value of element to 1, so that
431              * things like g_tree_lookup() will return TRUE (1) when found.
432              */
433             g_tree_insert(mis->page_requested, aligned, (gpointer)1);
434             mis->page_requested_count++;
435             trace_postcopy_page_req_add(aligned, mis->page_requested_count);
436         }
437     }
438
439     /*
440      * If the page is there, skip sending the message.  We don't even need the
441      * lock because as long as the page arrived, it'll be there forever.
442      */
443     if (received) {
444         return 0;
445     }
446
447     return migrate_send_rp_message_req_pages(mis, rb, start);
448 }
449
450 static bool migration_colo_enabled;
451 bool migration_incoming_colo_enabled(void)
452 {
453     return migration_colo_enabled;
454 }
455
456 void migration_incoming_disable_colo(void)
457 {
458     ram_block_discard_disable(false);
459     migration_colo_enabled = false;
460 }
461
462 int migration_incoming_enable_colo(void)
463 {
464     if (ram_block_discard_disable(true)) {
465         error_report("COLO: cannot disable RAM discard");
466         return -EBUSY;
467     }
468     migration_colo_enabled = true;
469     return 0;
470 }
471
472 void migrate_add_address(SocketAddress *address)
473 {
474     MigrationIncomingState *mis = migration_incoming_get_current();
475
476     QAPI_LIST_PREPEND(mis->socket_address_list,
477                       QAPI_CLONE(SocketAddress, address));
478 }
479
480 static void qemu_start_incoming_migration(const char *uri, Error **errp)
481 {
482     const char *p = NULL;
483
484     migrate_protocol_allow_multi_channels(false); /* reset it anyway */
485     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
486     if (strstart(uri, "tcp:", &p) ||
487         strstart(uri, "unix:", NULL) ||
488         strstart(uri, "vsock:", NULL)) {
489         migrate_protocol_allow_multi_channels(true);
490         socket_start_incoming_migration(p ? p : uri, errp);
491 #ifdef CONFIG_RDMA
492     } else if (strstart(uri, "rdma:", &p)) {
493         rdma_start_incoming_migration(p, errp);
494 #endif
495     } else if (strstart(uri, "exec:", &p)) {
496         exec_start_incoming_migration(p, errp);
497     } else if (strstart(uri, "fd:", &p)) {
498         fd_start_incoming_migration(p, errp);
499     } else {
500         error_setg(errp, "unknown migration protocol: %s", uri);
501     }
502 }
503
504 static void process_incoming_migration_bh(void *opaque)
505 {
506     Error *local_err = NULL;
507     MigrationIncomingState *mis = opaque;
508
509     /* If capability late_block_activate is set:
510      * Only fire up the block code now if we're going to restart the
511      * VM, else 'cont' will do it.
512      * This causes file locking to happen; so we don't want it to happen
513      * unless we really are starting the VM.
514      */
515     if (!migrate_late_block_activate() ||
516          (autostart && (!global_state_received() ||
517             global_state_get_runstate() == RUN_STATE_RUNNING))) {
518         /* Make sure all file formats throw away their mutable metadata.
519          * If we get an error here, just don't restart the VM yet. */
520         bdrv_activate_all(&local_err);
521         if (local_err) {
522             error_report_err(local_err);
523             local_err = NULL;
524             autostart = false;
525         }
526     }
527
528     /*
529      * This must happen after all error conditions are dealt with and
530      * we're sure the VM is going to be running on this host.
531      */
532     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
533
534     if (multifd_load_cleanup(&local_err) != 0) {
535         error_report_err(local_err);
536         autostart = false;
537     }
538     /* If global state section was not received or we are in running
539        state, we need to obey autostart. Any other state is set with
540        runstate_set. */
541
542     dirty_bitmap_mig_before_vm_start();
543
544     if (!global_state_received() ||
545         global_state_get_runstate() == RUN_STATE_RUNNING) {
546         if (autostart) {
547             vm_start();
548         } else {
549             runstate_set(RUN_STATE_PAUSED);
550         }
551     } else if (migration_incoming_colo_enabled()) {
552         migration_incoming_disable_colo();
553         vm_start();
554     } else {
555         runstate_set(global_state_get_runstate());
556     }
557     /*
558      * This must happen after any state changes since as soon as an external
559      * observer sees this event they might start to prod at the VM assuming
560      * it's ready to use.
561      */
562     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
563                       MIGRATION_STATUS_COMPLETED);
564     qemu_bh_delete(mis->bh);
565     migration_incoming_state_destroy();
566 }
567
568 static void process_incoming_migration_co(void *opaque)
569 {
570     MigrationIncomingState *mis = migration_incoming_get_current();
571     PostcopyState ps;
572     int ret;
573     Error *local_err = NULL;
574
575     assert(mis->from_src_file);
576     mis->migration_incoming_co = qemu_coroutine_self();
577     mis->largest_page_size = qemu_ram_pagesize_largest();
578     postcopy_state_set(POSTCOPY_INCOMING_NONE);
579     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
580                       MIGRATION_STATUS_ACTIVE);
581     ret = qemu_loadvm_state(mis->from_src_file);
582
583     ps = postcopy_state_get();
584     trace_process_incoming_migration_co_end(ret, ps);
585     if (ps != POSTCOPY_INCOMING_NONE) {
586         if (ps == POSTCOPY_INCOMING_ADVISE) {
587             /*
588              * Where a migration had postcopy enabled (and thus went to advise)
589              * but managed to complete within the precopy period, we can use
590              * the normal exit.
591              */
592             postcopy_ram_incoming_cleanup(mis);
593         } else if (ret >= 0) {
594             /*
595              * Postcopy was started, cleanup should happen at the end of the
596              * postcopy thread.
597              */
598             trace_process_incoming_migration_co_postcopy_end_main();
599             return;
600         }
601         /* Else if something went wrong then just fall out of the normal exit */
602     }
603
604     /* we get COLO info, and know if we are in COLO mode */
605     if (!ret && migration_incoming_colo_enabled()) {
606         /* Make sure all file formats throw away their mutable metadata */
607         bdrv_activate_all(&local_err);
608         if (local_err) {
609             error_report_err(local_err);
610             goto fail;
611         }
612
613         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
614              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
615         mis->have_colo_incoming_thread = true;
616         qemu_coroutine_yield();
617
618         qemu_mutex_unlock_iothread();
619         /* Wait checkpoint incoming thread exit before free resource */
620         qemu_thread_join(&mis->colo_incoming_thread);
621         qemu_mutex_lock_iothread();
622         /* We hold the global iothread lock, so it is safe here */
623         colo_release_ram_cache();
624     }
625
626     if (ret < 0) {
627         error_report("load of migration failed: %s", strerror(-ret));
628         goto fail;
629     }
630     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
631     qemu_bh_schedule(mis->bh);
632     mis->migration_incoming_co = NULL;
633     return;
634 fail:
635     local_err = NULL;
636     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
637                       MIGRATION_STATUS_FAILED);
638     qemu_fclose(mis->from_src_file);
639     if (multifd_load_cleanup(&local_err) != 0) {
640         error_report_err(local_err);
641     }
642     exit(EXIT_FAILURE);
643 }
644
645 /**
646  * migration_incoming_setup: Setup incoming migration
647  * @f: file for main migration channel
648  * @errp: where to put errors
649  *
650  * Returns: %true on success, %false on error.
651  */
652 static bool migration_incoming_setup(QEMUFile *f, Error **errp)
653 {
654     MigrationIncomingState *mis = migration_incoming_get_current();
655
656     if (multifd_load_setup(errp) != 0) {
657         return false;
658     }
659
660     if (!mis->from_src_file) {
661         mis->from_src_file = f;
662     }
663     qemu_file_set_blocking(f, false);
664     return true;
665 }
666
667 void migration_incoming_process(void)
668 {
669     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
670     qemu_coroutine_enter(co);
671 }
672
673 /* Returns true if recovered from a paused migration, otherwise false */
674 static bool postcopy_try_recover(void)
675 {
676     MigrationIncomingState *mis = migration_incoming_get_current();
677
678     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
679         /* Resumed from a paused postcopy migration */
680
681         /* This should be set already in migration_incoming_setup() */
682         assert(mis->from_src_file);
683         /* Postcopy has standalone thread to do vm load */
684         qemu_file_set_blocking(mis->from_src_file, true);
685
686         /* Re-configure the return path */
687         mis->to_src_file = qemu_file_get_return_path(mis->from_src_file);
688
689         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
690                           MIGRATION_STATUS_POSTCOPY_RECOVER);
691
692         /*
693          * Here, we only wake up the main loading thread (while the
694          * fault thread will still be waiting), so that we can receive
695          * commands from source now, and answer it if needed. The
696          * fault thread will be woken up afterwards until we are sure
697          * that source is ready to reply to page requests.
698          */
699         qemu_sem_post(&mis->postcopy_pause_sem_dst);
700         return true;
701     }
702
703     return false;
704 }
705
706 void migration_fd_process_incoming(QEMUFile *f, Error **errp)
707 {
708     if (!migration_incoming_setup(f, errp)) {
709         return;
710     }
711     if (postcopy_try_recover()) {
712         return;
713     }
714     migration_incoming_process();
715 }
716
717 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
718 {
719     MigrationIncomingState *mis = migration_incoming_get_current();
720     Error *local_err = NULL;
721     bool start_migration;
722
723     if (!mis->from_src_file) {
724         /* The first connection (multifd may have multiple) */
725         QEMUFile *f = qemu_fopen_channel_input(ioc);
726
727         if (!migration_incoming_setup(f, errp)) {
728             return;
729         }
730
731         /*
732          * Common migration only needs one channel, so we can start
733          * right now.  Multifd needs more than one channel, we wait.
734          */
735         start_migration = !migrate_use_multifd();
736     } else {
737         /* Multiple connections */
738         assert(migrate_use_multifd());
739         start_migration = multifd_recv_new_channel(ioc, &local_err);
740         if (local_err) {
741             error_propagate(errp, local_err);
742             return;
743         }
744     }
745
746     if (start_migration) {
747         /* If it's a recovery, we're done */
748         if (postcopy_try_recover()) {
749             return;
750         }
751         migration_incoming_process();
752     }
753 }
754
755 /**
756  * @migration_has_all_channels: We have received all channels that we need
757  *
758  * Returns true when we have got connections to all the channels that
759  * we need for migration.
760  */
761 bool migration_has_all_channels(void)
762 {
763     MigrationIncomingState *mis = migration_incoming_get_current();
764     bool all_channels;
765
766     all_channels = multifd_recv_all_channels_created();
767
768     return all_channels && mis->from_src_file != NULL;
769 }
770
771 /*
772  * Send a 'SHUT' message on the return channel with the given value
773  * to indicate that we've finished with the RP.  Non-0 value indicates
774  * error.
775  */
776 void migrate_send_rp_shut(MigrationIncomingState *mis,
777                           uint32_t value)
778 {
779     uint32_t buf;
780
781     buf = cpu_to_be32(value);
782     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
783 }
784
785 /*
786  * Send a 'PONG' message on the return channel with the given value
787  * (normally in response to a 'PING')
788  */
789 void migrate_send_rp_pong(MigrationIncomingState *mis,
790                           uint32_t value)
791 {
792     uint32_t buf;
793
794     buf = cpu_to_be32(value);
795     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
796 }
797
798 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
799                                  char *block_name)
800 {
801     char buf[512];
802     int len;
803     int64_t res;
804
805     /*
806      * First, we send the header part. It contains only the len of
807      * idstr, and the idstr itself.
808      */
809     len = strlen(block_name);
810     buf[0] = len;
811     memcpy(buf + 1, block_name, len);
812
813     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
814         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
815                      __func__);
816         return;
817     }
818
819     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
820
821     /*
822      * Next, we dump the received bitmap to the stream.
823      *
824      * TODO: currently we are safe since we are the only one that is
825      * using the to_src_file handle (fault thread is still paused),
826      * and it's ok even not taking the mutex. However the best way is
827      * to take the lock before sending the message header, and release
828      * the lock after sending the bitmap.
829      */
830     qemu_mutex_lock(&mis->rp_mutex);
831     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
832     qemu_mutex_unlock(&mis->rp_mutex);
833
834     trace_migrate_send_rp_recv_bitmap(block_name, res);
835 }
836
837 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
838 {
839     uint32_t buf;
840
841     buf = cpu_to_be32(value);
842     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
843 }
844
845 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
846 {
847     MigrationCapabilityStatusList *head = NULL, **tail = &head;
848     MigrationCapabilityStatus *caps;
849     MigrationState *s = migrate_get_current();
850     int i;
851
852     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
853 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
854         if (i == MIGRATION_CAPABILITY_BLOCK) {
855             continue;
856         }
857 #endif
858         caps = g_malloc0(sizeof(*caps));
859         caps->capability = i;
860         caps->state = s->enabled_capabilities[i];
861         QAPI_LIST_APPEND(tail, caps);
862     }
863
864     return head;
865 }
866
867 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
868 {
869     MigrationParameters *params;
870     MigrationState *s = migrate_get_current();
871
872     /* TODO use QAPI_CLONE() instead of duplicating it inline */
873     params = g_malloc0(sizeof(*params));
874     params->has_compress_level = true;
875     params->compress_level = s->parameters.compress_level;
876     params->has_compress_threads = true;
877     params->compress_threads = s->parameters.compress_threads;
878     params->has_compress_wait_thread = true;
879     params->compress_wait_thread = s->parameters.compress_wait_thread;
880     params->has_decompress_threads = true;
881     params->decompress_threads = s->parameters.decompress_threads;
882     params->has_throttle_trigger_threshold = true;
883     params->throttle_trigger_threshold = s->parameters.throttle_trigger_threshold;
884     params->has_cpu_throttle_initial = true;
885     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
886     params->has_cpu_throttle_increment = true;
887     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
888     params->has_cpu_throttle_tailslow = true;
889     params->cpu_throttle_tailslow = s->parameters.cpu_throttle_tailslow;
890     params->has_tls_creds = true;
891     params->tls_creds = g_strdup(s->parameters.tls_creds);
892     params->has_tls_hostname = true;
893     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
894     params->has_tls_authz = true;
895     params->tls_authz = g_strdup(s->parameters.tls_authz ?
896                                  s->parameters.tls_authz : "");
897     params->has_max_bandwidth = true;
898     params->max_bandwidth = s->parameters.max_bandwidth;
899     params->has_downtime_limit = true;
900     params->downtime_limit = s->parameters.downtime_limit;
901     params->has_x_checkpoint_delay = true;
902     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
903     params->has_block_incremental = true;
904     params->block_incremental = s->parameters.block_incremental;
905     params->has_multifd_channels = true;
906     params->multifd_channels = s->parameters.multifd_channels;
907     params->has_multifd_compression = true;
908     params->multifd_compression = s->parameters.multifd_compression;
909     params->has_multifd_zlib_level = true;
910     params->multifd_zlib_level = s->parameters.multifd_zlib_level;
911     params->has_multifd_zstd_level = true;
912     params->multifd_zstd_level = s->parameters.multifd_zstd_level;
913     params->has_xbzrle_cache_size = true;
914     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
915     params->has_max_postcopy_bandwidth = true;
916     params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
917     params->has_max_cpu_throttle = true;
918     params->max_cpu_throttle = s->parameters.max_cpu_throttle;
919     params->has_announce_initial = true;
920     params->announce_initial = s->parameters.announce_initial;
921     params->has_announce_max = true;
922     params->announce_max = s->parameters.announce_max;
923     params->has_announce_rounds = true;
924     params->announce_rounds = s->parameters.announce_rounds;
925     params->has_announce_step = true;
926     params->announce_step = s->parameters.announce_step;
927
928     if (s->parameters.has_block_bitmap_mapping) {
929         params->has_block_bitmap_mapping = true;
930         params->block_bitmap_mapping =
931             QAPI_CLONE(BitmapMigrationNodeAliasList,
932                        s->parameters.block_bitmap_mapping);
933     }
934
935     return params;
936 }
937
938 AnnounceParameters *migrate_announce_params(void)
939 {
940     static AnnounceParameters ap;
941
942     MigrationState *s = migrate_get_current();
943
944     ap.initial = s->parameters.announce_initial;
945     ap.max = s->parameters.announce_max;
946     ap.rounds = s->parameters.announce_rounds;
947     ap.step = s->parameters.announce_step;
948
949     return &ap;
950 }
951
952 /*
953  * Return true if we're already in the middle of a migration
954  * (i.e. any of the active or setup states)
955  */
956 bool migration_is_setup_or_active(int state)
957 {
958     switch (state) {
959     case MIGRATION_STATUS_ACTIVE:
960     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
961     case MIGRATION_STATUS_POSTCOPY_PAUSED:
962     case MIGRATION_STATUS_POSTCOPY_RECOVER:
963     case MIGRATION_STATUS_SETUP:
964     case MIGRATION_STATUS_PRE_SWITCHOVER:
965     case MIGRATION_STATUS_DEVICE:
966     case MIGRATION_STATUS_WAIT_UNPLUG:
967     case MIGRATION_STATUS_COLO:
968         return true;
969
970     default:
971         return false;
972
973     }
974 }
975
976 bool migration_is_running(int state)
977 {
978     switch (state) {
979     case MIGRATION_STATUS_ACTIVE:
980     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
981     case MIGRATION_STATUS_POSTCOPY_PAUSED:
982     case MIGRATION_STATUS_POSTCOPY_RECOVER:
983     case MIGRATION_STATUS_SETUP:
984     case MIGRATION_STATUS_PRE_SWITCHOVER:
985     case MIGRATION_STATUS_DEVICE:
986     case MIGRATION_STATUS_WAIT_UNPLUG:
987     case MIGRATION_STATUS_CANCELLING:
988         return true;
989
990     default:
991         return false;
992
993     }
994 }
995
996 static void populate_time_info(MigrationInfo *info, MigrationState *s)
997 {
998     info->has_status = true;
999     info->has_setup_time = true;
1000     info->setup_time = s->setup_time;
1001     if (s->state == MIGRATION_STATUS_COMPLETED) {
1002         info->has_total_time = true;
1003         info->total_time = s->total_time;
1004         info->has_downtime = true;
1005         info->downtime = s->downtime;
1006     } else {
1007         info->has_total_time = true;
1008         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
1009                            s->start_time;
1010         info->has_expected_downtime = true;
1011         info->expected_downtime = s->expected_downtime;
1012     }
1013 }
1014
1015 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
1016 {
1017     size_t page_size = qemu_target_page_size();
1018
1019     info->has_ram = true;
1020     info->ram = g_malloc0(sizeof(*info->ram));
1021     info->ram->transferred = ram_counters.transferred;
1022     info->ram->total = ram_bytes_total();
1023     info->ram->duplicate = ram_counters.duplicate;
1024     /* legacy value.  It is not used anymore */
1025     info->ram->skipped = 0;
1026     info->ram->normal = ram_counters.normal;
1027     info->ram->normal_bytes = ram_counters.normal * page_size;
1028     info->ram->mbps = s->mbps;
1029     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
1030     info->ram->postcopy_requests = ram_counters.postcopy_requests;
1031     info->ram->page_size = page_size;
1032     info->ram->multifd_bytes = ram_counters.multifd_bytes;
1033     info->ram->pages_per_second = s->pages_per_second;
1034     info->ram->precopy_bytes = ram_counters.precopy_bytes;
1035     info->ram->downtime_bytes = ram_counters.downtime_bytes;
1036     info->ram->postcopy_bytes = ram_counters.postcopy_bytes;
1037
1038     if (migrate_use_xbzrle()) {
1039         info->has_xbzrle_cache = true;
1040         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
1041         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
1042         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
1043         info->xbzrle_cache->pages = xbzrle_counters.pages;
1044         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
1045         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
1046         info->xbzrle_cache->encoding_rate = xbzrle_counters.encoding_rate;
1047         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
1048     }
1049
1050     if (migrate_use_compression()) {
1051         info->has_compression = true;
1052         info->compression = g_malloc0(sizeof(*info->compression));
1053         info->compression->pages = compression_counters.pages;
1054         info->compression->busy = compression_counters.busy;
1055         info->compression->busy_rate = compression_counters.busy_rate;
1056         info->compression->compressed_size =
1057                                     compression_counters.compressed_size;
1058         info->compression->compression_rate =
1059                                     compression_counters.compression_rate;
1060     }
1061
1062     if (cpu_throttle_active()) {
1063         info->has_cpu_throttle_percentage = true;
1064         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
1065     }
1066
1067     if (s->state != MIGRATION_STATUS_COMPLETED) {
1068         info->ram->remaining = ram_bytes_remaining();
1069         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
1070     }
1071 }
1072
1073 static void populate_disk_info(MigrationInfo *info)
1074 {
1075     if (blk_mig_active()) {
1076         info->has_disk = true;
1077         info->disk = g_malloc0(sizeof(*info->disk));
1078         info->disk->transferred = blk_mig_bytes_transferred();
1079         info->disk->remaining = blk_mig_bytes_remaining();
1080         info->disk->total = blk_mig_bytes_total();
1081     }
1082 }
1083
1084 static void fill_source_migration_info(MigrationInfo *info)
1085 {
1086     MigrationState *s = migrate_get_current();
1087     GSList *cur_blocker = migration_blockers;
1088
1089     info->blocked_reasons = NULL;
1090
1091     /*
1092      * There are two types of reasons a migration might be blocked;
1093      * a) devices marked in VMState as non-migratable, and
1094      * b) Explicit migration blockers
1095      * We need to add both of them here.
1096      */
1097     qemu_savevm_non_migratable_list(&info->blocked_reasons);
1098
1099     while (cur_blocker) {
1100         QAPI_LIST_PREPEND(info->blocked_reasons,
1101                           g_strdup(error_get_pretty(cur_blocker->data)));
1102         cur_blocker = g_slist_next(cur_blocker);
1103     }
1104     info->has_blocked_reasons = info->blocked_reasons != NULL;
1105
1106     switch (s->state) {
1107     case MIGRATION_STATUS_NONE:
1108         /* no migration has happened ever */
1109         /* do not overwrite destination migration status */
1110         return;
1111     case MIGRATION_STATUS_SETUP:
1112         info->has_status = true;
1113         info->has_total_time = false;
1114         break;
1115     case MIGRATION_STATUS_ACTIVE:
1116     case MIGRATION_STATUS_CANCELLING:
1117     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1118     case MIGRATION_STATUS_PRE_SWITCHOVER:
1119     case MIGRATION_STATUS_DEVICE:
1120     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1121     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1122         /* TODO add some postcopy stats */
1123         populate_time_info(info, s);
1124         populate_ram_info(info, s);
1125         populate_disk_info(info);
1126         populate_vfio_info(info);
1127         break;
1128     case MIGRATION_STATUS_COLO:
1129         info->has_status = true;
1130         /* TODO: display COLO specific information (checkpoint info etc.) */
1131         break;
1132     case MIGRATION_STATUS_COMPLETED:
1133         populate_time_info(info, s);
1134         populate_ram_info(info, s);
1135         populate_vfio_info(info);
1136         break;
1137     case MIGRATION_STATUS_FAILED:
1138         info->has_status = true;
1139         if (s->error) {
1140             info->has_error_desc = true;
1141             info->error_desc = g_strdup(error_get_pretty(s->error));
1142         }
1143         break;
1144     case MIGRATION_STATUS_CANCELLED:
1145         info->has_status = true;
1146         break;
1147     case MIGRATION_STATUS_WAIT_UNPLUG:
1148         info->has_status = true;
1149         break;
1150     }
1151     info->status = s->state;
1152 }
1153
1154 typedef enum WriteTrackingSupport {
1155     WT_SUPPORT_UNKNOWN = 0,
1156     WT_SUPPORT_ABSENT,
1157     WT_SUPPORT_AVAILABLE,
1158     WT_SUPPORT_COMPATIBLE
1159 } WriteTrackingSupport;
1160
1161 static
1162 WriteTrackingSupport migrate_query_write_tracking(void)
1163 {
1164     /* Check if kernel supports required UFFD features */
1165     if (!ram_write_tracking_available()) {
1166         return WT_SUPPORT_ABSENT;
1167     }
1168     /*
1169      * Check if current memory configuration is
1170      * compatible with required UFFD features.
1171      */
1172     if (!ram_write_tracking_compatible()) {
1173         return WT_SUPPORT_AVAILABLE;
1174     }
1175
1176     return WT_SUPPORT_COMPATIBLE;
1177 }
1178
1179 /**
1180  * @migration_caps_check - check capability validity
1181  *
1182  * @cap_list: old capability list, array of bool
1183  * @params: new capabilities to be applied soon
1184  * @errp: set *errp if the check failed, with reason
1185  *
1186  * Returns true if check passed, otherwise false.
1187  */
1188 static bool migrate_caps_check(bool *cap_list,
1189                                MigrationCapabilityStatusList *params,
1190                                Error **errp)
1191 {
1192     MigrationCapabilityStatusList *cap;
1193     bool old_postcopy_cap;
1194     MigrationIncomingState *mis = migration_incoming_get_current();
1195
1196     old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
1197
1198     for (cap = params; cap; cap = cap->next) {
1199         cap_list[cap->value->capability] = cap->value->state;
1200     }
1201
1202 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
1203     if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
1204         error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
1205                    "block migration");
1206         error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
1207         return false;
1208     }
1209 #endif
1210
1211 #ifndef CONFIG_REPLICATION
1212     if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
1213         error_setg(errp, "QEMU compiled without replication module"
1214                    " can't enable COLO");
1215         error_append_hint(errp, "Please enable replication before COLO.\n");
1216         return false;
1217     }
1218 #endif
1219
1220     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
1221         /* This check is reasonably expensive, so only when it's being
1222          * set the first time, also it's only the destination that needs
1223          * special support.
1224          */
1225         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
1226             !postcopy_ram_supported_by_host(mis)) {
1227             /* postcopy_ram_supported_by_host will have emitted a more
1228              * detailed message
1229              */
1230             error_setg(errp, "Postcopy is not supported");
1231             return false;
1232         }
1233
1234         if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
1235             error_setg(errp, "Postcopy is not compatible with ignore-shared");
1236             return false;
1237         }
1238     }
1239
1240     if (cap_list[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT]) {
1241         WriteTrackingSupport wt_support;
1242         int idx;
1243         /*
1244          * Check if 'background-snapshot' capability is supported by
1245          * host kernel and compatible with guest memory configuration.
1246          */
1247         wt_support = migrate_query_write_tracking();
1248         if (wt_support < WT_SUPPORT_AVAILABLE) {
1249             error_setg(errp, "Background-snapshot is not supported by host kernel");
1250             return false;
1251         }
1252         if (wt_support < WT_SUPPORT_COMPATIBLE) {
1253             error_setg(errp, "Background-snapshot is not compatible "
1254                     "with guest memory configuration");
1255             return false;
1256         }
1257
1258         /*
1259          * Check if there are any migration capabilities
1260          * incompatible with 'background-snapshot'.
1261          */
1262         for (idx = 0; idx < check_caps_background_snapshot.size; idx++) {
1263             int incomp_cap = check_caps_background_snapshot.caps[idx];
1264             if (cap_list[incomp_cap]) {
1265                 error_setg(errp,
1266                         "Background-snapshot is not compatible with %s",
1267                         MigrationCapability_str(incomp_cap));
1268                 return false;
1269             }
1270         }
1271     }
1272
1273     /* incoming side only */
1274     if (runstate_check(RUN_STATE_INMIGRATE) &&
1275         !migrate_multi_channels_is_allowed() &&
1276         cap_list[MIGRATION_CAPABILITY_MULTIFD]) {
1277         error_setg(errp, "multifd is not supported by current protocol");
1278         return false;
1279     }
1280
1281     return true;
1282 }
1283
1284 static void fill_destination_migration_info(MigrationInfo *info)
1285 {
1286     MigrationIncomingState *mis = migration_incoming_get_current();
1287
1288     if (mis->socket_address_list) {
1289         info->has_socket_address = true;
1290         info->socket_address =
1291             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1292     }
1293
1294     switch (mis->state) {
1295     case MIGRATION_STATUS_NONE:
1296         return;
1297     case MIGRATION_STATUS_SETUP:
1298     case MIGRATION_STATUS_CANCELLING:
1299     case MIGRATION_STATUS_CANCELLED:
1300     case MIGRATION_STATUS_ACTIVE:
1301     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1302     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1303     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1304     case MIGRATION_STATUS_FAILED:
1305     case MIGRATION_STATUS_COLO:
1306         info->has_status = true;
1307         break;
1308     case MIGRATION_STATUS_COMPLETED:
1309         info->has_status = true;
1310         fill_destination_postcopy_migration_info(info);
1311         break;
1312     }
1313     info->status = mis->state;
1314 }
1315
1316 MigrationInfo *qmp_query_migrate(Error **errp)
1317 {
1318     MigrationInfo *info = g_malloc0(sizeof(*info));
1319
1320     fill_destination_migration_info(info);
1321     fill_source_migration_info(info);
1322
1323     return info;
1324 }
1325
1326 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
1327                                   Error **errp)
1328 {
1329     MigrationState *s = migrate_get_current();
1330     MigrationCapabilityStatusList *cap;
1331     bool cap_list[MIGRATION_CAPABILITY__MAX];
1332
1333     if (migration_is_running(s->state)) {
1334         error_setg(errp, QERR_MIGRATION_ACTIVE);
1335         return;
1336     }
1337
1338     memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
1339     if (!migrate_caps_check(cap_list, params, errp)) {
1340         return;
1341     }
1342
1343     for (cap = params; cap; cap = cap->next) {
1344         s->enabled_capabilities[cap->value->capability] = cap->value->state;
1345     }
1346 }
1347
1348 /*
1349  * Check whether the parameters are valid. Error will be put into errp
1350  * (if provided). Return true if valid, otherwise false.
1351  */
1352 static bool migrate_params_check(MigrationParameters *params, Error **errp)
1353 {
1354     if (params->has_compress_level &&
1355         (params->compress_level > 9)) {
1356         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
1357                    "a value between 0 and 9");
1358         return false;
1359     }
1360
1361     if (params->has_compress_threads && (params->compress_threads < 1)) {
1362         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1363                    "compress_threads",
1364                    "a value between 1 and 255");
1365         return false;
1366     }
1367
1368     if (params->has_decompress_threads && (params->decompress_threads < 1)) {
1369         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1370                    "decompress_threads",
1371                    "a value between 1 and 255");
1372         return false;
1373     }
1374
1375     if (params->has_throttle_trigger_threshold &&
1376         (params->throttle_trigger_threshold < 1 ||
1377          params->throttle_trigger_threshold > 100)) {
1378         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1379                    "throttle_trigger_threshold",
1380                    "an integer in the range of 1 to 100");
1381         return false;
1382     }
1383
1384     if (params->has_cpu_throttle_initial &&
1385         (params->cpu_throttle_initial < 1 ||
1386          params->cpu_throttle_initial > 99)) {
1387         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1388                    "cpu_throttle_initial",
1389                    "an integer in the range of 1 to 99");
1390         return false;
1391     }
1392
1393     if (params->has_cpu_throttle_increment &&
1394         (params->cpu_throttle_increment < 1 ||
1395          params->cpu_throttle_increment > 99)) {
1396         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1397                    "cpu_throttle_increment",
1398                    "an integer in the range of 1 to 99");
1399         return false;
1400     }
1401
1402     if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
1403         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1404                    "max_bandwidth",
1405                    "an integer in the range of 0 to "stringify(SIZE_MAX)
1406                    " bytes/second");
1407         return false;
1408     }
1409
1410     if (params->has_downtime_limit &&
1411         (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
1412         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1413                    "downtime_limit",
1414                    "an integer in the range of 0 to "
1415                     stringify(MAX_MIGRATE_DOWNTIME)" ms");
1416         return false;
1417     }
1418
1419     /* x_checkpoint_delay is now always positive */
1420
1421     if (params->has_multifd_channels && (params->multifd_channels < 1)) {
1422         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1423                    "multifd_channels",
1424                    "a value between 1 and 255");
1425         return false;
1426     }
1427
1428     if (params->has_multifd_zlib_level &&
1429         (params->multifd_zlib_level > 9)) {
1430         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zlib_level",
1431                    "a value between 0 and 9");
1432         return false;
1433     }
1434
1435     if (params->has_multifd_zstd_level &&
1436         (params->multifd_zstd_level > 20)) {
1437         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "multifd_zstd_level",
1438                    "a value between 0 and 20");
1439         return false;
1440     }
1441
1442     if (params->has_xbzrle_cache_size &&
1443         (params->xbzrle_cache_size < qemu_target_page_size() ||
1444          !is_power_of_2(params->xbzrle_cache_size))) {
1445         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1446                    "xbzrle_cache_size",
1447                    "a power of two no less than the target page size");
1448         return false;
1449     }
1450
1451     if (params->has_max_cpu_throttle &&
1452         (params->max_cpu_throttle < params->cpu_throttle_initial ||
1453          params->max_cpu_throttle > 99)) {
1454         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1455                    "max_cpu_throttle",
1456                    "an integer in the range of cpu_throttle_initial to 99");
1457         return false;
1458     }
1459
1460     if (params->has_announce_initial &&
1461         params->announce_initial > 100000) {
1462         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1463                    "announce_initial",
1464                    "a value between 0 and 100000");
1465         return false;
1466     }
1467     if (params->has_announce_max &&
1468         params->announce_max > 100000) {
1469         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1470                    "announce_max",
1471                    "a value between 0 and 100000");
1472        return false;
1473     }
1474     if (params->has_announce_rounds &&
1475         params->announce_rounds > 1000) {
1476         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1477                    "announce_rounds",
1478                    "a value between 0 and 1000");
1479        return false;
1480     }
1481     if (params->has_announce_step &&
1482         (params->announce_step < 1 ||
1483         params->announce_step > 10000)) {
1484         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1485                    "announce_step",
1486                    "a value between 0 and 10000");
1487        return false;
1488     }
1489
1490     if (params->has_block_bitmap_mapping &&
1491         !check_dirty_bitmap_mig_alias_map(params->block_bitmap_mapping, errp)) {
1492         error_prepend(errp, "Invalid mapping given for block-bitmap-mapping: ");
1493         return false;
1494     }
1495
1496     return true;
1497 }
1498
1499 static void migrate_params_test_apply(MigrateSetParameters *params,
1500                                       MigrationParameters *dest)
1501 {
1502     *dest = migrate_get_current()->parameters;
1503
1504     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1505
1506     if (params->has_compress_level) {
1507         dest->compress_level = params->compress_level;
1508     }
1509
1510     if (params->has_compress_threads) {
1511         dest->compress_threads = params->compress_threads;
1512     }
1513
1514     if (params->has_compress_wait_thread) {
1515         dest->compress_wait_thread = params->compress_wait_thread;
1516     }
1517
1518     if (params->has_decompress_threads) {
1519         dest->decompress_threads = params->decompress_threads;
1520     }
1521
1522     if (params->has_throttle_trigger_threshold) {
1523         dest->throttle_trigger_threshold = params->throttle_trigger_threshold;
1524     }
1525
1526     if (params->has_cpu_throttle_initial) {
1527         dest->cpu_throttle_initial = params->cpu_throttle_initial;
1528     }
1529
1530     if (params->has_cpu_throttle_increment) {
1531         dest->cpu_throttle_increment = params->cpu_throttle_increment;
1532     }
1533
1534     if (params->has_cpu_throttle_tailslow) {
1535         dest->cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1536     }
1537
1538     if (params->has_tls_creds) {
1539         assert(params->tls_creds->type == QTYPE_QSTRING);
1540         dest->tls_creds = params->tls_creds->u.s;
1541     }
1542
1543     if (params->has_tls_hostname) {
1544         assert(params->tls_hostname->type == QTYPE_QSTRING);
1545         dest->tls_hostname = params->tls_hostname->u.s;
1546     }
1547
1548     if (params->has_max_bandwidth) {
1549         dest->max_bandwidth = params->max_bandwidth;
1550     }
1551
1552     if (params->has_downtime_limit) {
1553         dest->downtime_limit = params->downtime_limit;
1554     }
1555
1556     if (params->has_x_checkpoint_delay) {
1557         dest->x_checkpoint_delay = params->x_checkpoint_delay;
1558     }
1559
1560     if (params->has_block_incremental) {
1561         dest->block_incremental = params->block_incremental;
1562     }
1563     if (params->has_multifd_channels) {
1564         dest->multifd_channels = params->multifd_channels;
1565     }
1566     if (params->has_multifd_compression) {
1567         dest->multifd_compression = params->multifd_compression;
1568     }
1569     if (params->has_xbzrle_cache_size) {
1570         dest->xbzrle_cache_size = params->xbzrle_cache_size;
1571     }
1572     if (params->has_max_postcopy_bandwidth) {
1573         dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1574     }
1575     if (params->has_max_cpu_throttle) {
1576         dest->max_cpu_throttle = params->max_cpu_throttle;
1577     }
1578     if (params->has_announce_initial) {
1579         dest->announce_initial = params->announce_initial;
1580     }
1581     if (params->has_announce_max) {
1582         dest->announce_max = params->announce_max;
1583     }
1584     if (params->has_announce_rounds) {
1585         dest->announce_rounds = params->announce_rounds;
1586     }
1587     if (params->has_announce_step) {
1588         dest->announce_step = params->announce_step;
1589     }
1590
1591     if (params->has_block_bitmap_mapping) {
1592         dest->has_block_bitmap_mapping = true;
1593         dest->block_bitmap_mapping = params->block_bitmap_mapping;
1594     }
1595 }
1596
1597 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
1598 {
1599     MigrationState *s = migrate_get_current();
1600
1601     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1602
1603     if (params->has_compress_level) {
1604         s->parameters.compress_level = params->compress_level;
1605     }
1606
1607     if (params->has_compress_threads) {
1608         s->parameters.compress_threads = params->compress_threads;
1609     }
1610
1611     if (params->has_compress_wait_thread) {
1612         s->parameters.compress_wait_thread = params->compress_wait_thread;
1613     }
1614
1615     if (params->has_decompress_threads) {
1616         s->parameters.decompress_threads = params->decompress_threads;
1617     }
1618
1619     if (params->has_throttle_trigger_threshold) {
1620         s->parameters.throttle_trigger_threshold = params->throttle_trigger_threshold;
1621     }
1622
1623     if (params->has_cpu_throttle_initial) {
1624         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
1625     }
1626
1627     if (params->has_cpu_throttle_increment) {
1628         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
1629     }
1630
1631     if (params->has_cpu_throttle_tailslow) {
1632         s->parameters.cpu_throttle_tailslow = params->cpu_throttle_tailslow;
1633     }
1634
1635     if (params->has_tls_creds) {
1636         g_free(s->parameters.tls_creds);
1637         assert(params->tls_creds->type == QTYPE_QSTRING);
1638         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
1639     }
1640
1641     if (params->has_tls_hostname) {
1642         g_free(s->parameters.tls_hostname);
1643         assert(params->tls_hostname->type == QTYPE_QSTRING);
1644         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
1645     }
1646
1647     if (params->has_tls_authz) {
1648         g_free(s->parameters.tls_authz);
1649         assert(params->tls_authz->type == QTYPE_QSTRING);
1650         s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
1651     }
1652
1653     if (params->has_max_bandwidth) {
1654         s->parameters.max_bandwidth = params->max_bandwidth;
1655         if (s->to_dst_file && !migration_in_postcopy()) {
1656             qemu_file_set_rate_limit(s->to_dst_file,
1657                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
1658         }
1659     }
1660
1661     if (params->has_downtime_limit) {
1662         s->parameters.downtime_limit = params->downtime_limit;
1663     }
1664
1665     if (params->has_x_checkpoint_delay) {
1666         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
1667         if (migration_in_colo_state()) {
1668             colo_checkpoint_notify(s);
1669         }
1670     }
1671
1672     if (params->has_block_incremental) {
1673         s->parameters.block_incremental = params->block_incremental;
1674     }
1675     if (params->has_multifd_channels) {
1676         s->parameters.multifd_channels = params->multifd_channels;
1677     }
1678     if (params->has_multifd_compression) {
1679         s->parameters.multifd_compression = params->multifd_compression;
1680     }
1681     if (params->has_xbzrle_cache_size) {
1682         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
1683         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
1684     }
1685     if (params->has_max_postcopy_bandwidth) {
1686         s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1687         if (s->to_dst_file && migration_in_postcopy()) {
1688             qemu_file_set_rate_limit(s->to_dst_file,
1689                     s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
1690         }
1691     }
1692     if (params->has_max_cpu_throttle) {
1693         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
1694     }
1695     if (params->has_announce_initial) {
1696         s->parameters.announce_initial = params->announce_initial;
1697     }
1698     if (params->has_announce_max) {
1699         s->parameters.announce_max = params->announce_max;
1700     }
1701     if (params->has_announce_rounds) {
1702         s->parameters.announce_rounds = params->announce_rounds;
1703     }
1704     if (params->has_announce_step) {
1705         s->parameters.announce_step = params->announce_step;
1706     }
1707
1708     if (params->has_block_bitmap_mapping) {
1709         qapi_free_BitmapMigrationNodeAliasList(
1710             s->parameters.block_bitmap_mapping);
1711
1712         s->parameters.has_block_bitmap_mapping = true;
1713         s->parameters.block_bitmap_mapping =
1714             QAPI_CLONE(BitmapMigrationNodeAliasList,
1715                        params->block_bitmap_mapping);
1716     }
1717 }
1718
1719 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1720 {
1721     MigrationParameters tmp;
1722
1723     /* TODO Rewrite "" to null instead */
1724     if (params->has_tls_creds
1725         && params->tls_creds->type == QTYPE_QNULL) {
1726         qobject_unref(params->tls_creds->u.n);
1727         params->tls_creds->type = QTYPE_QSTRING;
1728         params->tls_creds->u.s = strdup("");
1729     }
1730     /* TODO Rewrite "" to null instead */
1731     if (params->has_tls_hostname
1732         && params->tls_hostname->type == QTYPE_QNULL) {
1733         qobject_unref(params->tls_hostname->u.n);
1734         params->tls_hostname->type = QTYPE_QSTRING;
1735         params->tls_hostname->u.s = strdup("");
1736     }
1737
1738     migrate_params_test_apply(params, &tmp);
1739
1740     if (!migrate_params_check(&tmp, errp)) {
1741         /* Invalid parameter */
1742         return;
1743     }
1744
1745     migrate_params_apply(params, errp);
1746 }
1747
1748
1749 void qmp_migrate_start_postcopy(Error **errp)
1750 {
1751     MigrationState *s = migrate_get_current();
1752
1753     if (!migrate_postcopy()) {
1754         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1755                          " the start of migration");
1756         return;
1757     }
1758
1759     if (s->state == MIGRATION_STATUS_NONE) {
1760         error_setg(errp, "Postcopy must be started after migration has been"
1761                          " started");
1762         return;
1763     }
1764     /*
1765      * we don't error if migration has finished since that would be racy
1766      * with issuing this command.
1767      */
1768     qatomic_set(&s->start_postcopy, true);
1769 }
1770
1771 /* shared migration helpers */
1772
1773 void migrate_set_state(int *state, int old_state, int new_state)
1774 {
1775     assert(new_state < MIGRATION_STATUS__MAX);
1776     if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
1777         trace_migrate_set_state(MigrationStatus_str(new_state));
1778         migrate_generate_event(new_state);
1779     }
1780 }
1781
1782 static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
1783                                                   bool state)
1784 {
1785     MigrationCapabilityStatus *cap;
1786
1787     cap = g_new0(MigrationCapabilityStatus, 1);
1788     cap->capability = index;
1789     cap->state = state;
1790
1791     return cap;
1792 }
1793
1794 void migrate_set_block_enabled(bool value, Error **errp)
1795 {
1796     MigrationCapabilityStatusList *cap = NULL;
1797
1798     QAPI_LIST_PREPEND(cap, migrate_cap_add(MIGRATION_CAPABILITY_BLOCK, value));
1799     qmp_migrate_set_capabilities(cap, errp);
1800     qapi_free_MigrationCapabilityStatusList(cap);
1801 }
1802
1803 static void migrate_set_block_incremental(MigrationState *s, bool value)
1804 {
1805     s->parameters.block_incremental = value;
1806 }
1807
1808 static void block_cleanup_parameters(MigrationState *s)
1809 {
1810     if (s->must_remove_block_options) {
1811         /* setting to false can never fail */
1812         migrate_set_block_enabled(false, &error_abort);
1813         migrate_set_block_incremental(s, false);
1814         s->must_remove_block_options = false;
1815     }
1816 }
1817
1818 static void migrate_fd_cleanup(MigrationState *s)
1819 {
1820     qemu_bh_delete(s->cleanup_bh);
1821     s->cleanup_bh = NULL;
1822
1823     g_free(s->hostname);
1824     s->hostname = NULL;
1825
1826     qemu_savevm_state_cleanup();
1827
1828     if (s->to_dst_file) {
1829         QEMUFile *tmp;
1830
1831         trace_migrate_fd_cleanup();
1832         qemu_mutex_unlock_iothread();
1833         if (s->migration_thread_running) {
1834             qemu_thread_join(&s->thread);
1835             s->migration_thread_running = false;
1836         }
1837         qemu_mutex_lock_iothread();
1838
1839         multifd_save_cleanup();
1840         qemu_mutex_lock(&s->qemu_file_lock);
1841         tmp = s->to_dst_file;
1842         s->to_dst_file = NULL;
1843         qemu_mutex_unlock(&s->qemu_file_lock);
1844         /*
1845          * Close the file handle without the lock to make sure the
1846          * critical section won't block for long.
1847          */
1848         migration_ioc_unregister_yank_from_file(tmp);
1849         qemu_fclose(tmp);
1850     }
1851
1852     assert(!migration_is_active(s));
1853
1854     if (s->state == MIGRATION_STATUS_CANCELLING) {
1855         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1856                           MIGRATION_STATUS_CANCELLED);
1857     }
1858
1859     if (s->error) {
1860         /* It is used on info migrate.  We can't free it */
1861         error_report_err(error_copy(s->error));
1862     }
1863     notifier_list_notify(&migration_state_notifiers, s);
1864     block_cleanup_parameters(s);
1865     yank_unregister_instance(MIGRATION_YANK_INSTANCE);
1866 }
1867
1868 static void migrate_fd_cleanup_schedule(MigrationState *s)
1869 {
1870     /*
1871      * Ref the state for bh, because it may be called when
1872      * there're already no other refs
1873      */
1874     object_ref(OBJECT(s));
1875     qemu_bh_schedule(s->cleanup_bh);
1876 }
1877
1878 static void migrate_fd_cleanup_bh(void *opaque)
1879 {
1880     MigrationState *s = opaque;
1881     migrate_fd_cleanup(s);
1882     object_unref(OBJECT(s));
1883 }
1884
1885 void migrate_set_error(MigrationState *s, const Error *error)
1886 {
1887     QEMU_LOCK_GUARD(&s->error_mutex);
1888     if (!s->error) {
1889         s->error = error_copy(error);
1890     }
1891 }
1892
1893 static void migrate_error_free(MigrationState *s)
1894 {
1895     QEMU_LOCK_GUARD(&s->error_mutex);
1896     if (s->error) {
1897         error_free(s->error);
1898         s->error = NULL;
1899     }
1900 }
1901
1902 void migrate_fd_error(MigrationState *s, const Error *error)
1903 {
1904     trace_migrate_fd_error(error_get_pretty(error));
1905     assert(s->to_dst_file == NULL);
1906     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1907                       MIGRATION_STATUS_FAILED);
1908     migrate_set_error(s, error);
1909 }
1910
1911 static void migrate_fd_cancel(MigrationState *s)
1912 {
1913     int old_state ;
1914     QEMUFile *f = migrate_get_current()->to_dst_file;
1915     trace_migrate_fd_cancel();
1916
1917     WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) {
1918         if (s->rp_state.from_dst_file) {
1919             /* shutdown the rp socket, so causing the rp thread to shutdown */
1920             qemu_file_shutdown(s->rp_state.from_dst_file);
1921         }
1922     }
1923
1924     do {
1925         old_state = s->state;
1926         if (!migration_is_running(old_state)) {
1927             break;
1928         }
1929         /* If the migration is paused, kick it out of the pause */
1930         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1931             qemu_sem_post(&s->pause_sem);
1932         }
1933         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1934     } while (s->state != MIGRATION_STATUS_CANCELLING);
1935
1936     /*
1937      * If we're unlucky the migration code might be stuck somewhere in a
1938      * send/write while the network has failed and is waiting to timeout;
1939      * if we've got shutdown(2) available then we can force it to quit.
1940      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1941      * called in a bh, so there is no race against this cancel.
1942      */
1943     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1944         qemu_file_shutdown(f);
1945     }
1946     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1947         Error *local_err = NULL;
1948
1949         bdrv_activate_all(&local_err);
1950         if (local_err) {
1951             error_report_err(local_err);
1952         } else {
1953             s->block_inactive = false;
1954         }
1955     }
1956 }
1957
1958 void add_migration_state_change_notifier(Notifier *notify)
1959 {
1960     notifier_list_add(&migration_state_notifiers, notify);
1961 }
1962
1963 void remove_migration_state_change_notifier(Notifier *notify)
1964 {
1965     notifier_remove(notify);
1966 }
1967
1968 bool migration_in_setup(MigrationState *s)
1969 {
1970     return s->state == MIGRATION_STATUS_SETUP;
1971 }
1972
1973 bool migration_has_finished(MigrationState *s)
1974 {
1975     return s->state == MIGRATION_STATUS_COMPLETED;
1976 }
1977
1978 bool migration_has_failed(MigrationState *s)
1979 {
1980     return (s->state == MIGRATION_STATUS_CANCELLED ||
1981             s->state == MIGRATION_STATUS_FAILED);
1982 }
1983
1984 bool migration_in_postcopy(void)
1985 {
1986     MigrationState *s = migrate_get_current();
1987
1988     switch (s->state) {
1989     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1990     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1991     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1992         return true;
1993     default:
1994         return false;
1995     }
1996 }
1997
1998 bool migration_in_postcopy_after_devices(MigrationState *s)
1999 {
2000     return migration_in_postcopy() && s->postcopy_after_devices;
2001 }
2002
2003 bool migration_in_incoming_postcopy(void)
2004 {
2005     PostcopyState ps = postcopy_state_get();
2006
2007     return ps >= POSTCOPY_INCOMING_DISCARD && ps < POSTCOPY_INCOMING_END;
2008 }
2009
2010 bool migration_in_bg_snapshot(void)
2011 {
2012     MigrationState *s = migrate_get_current();
2013
2014     return migrate_background_snapshot() &&
2015             migration_is_setup_or_active(s->state);
2016 }
2017
2018 bool migration_is_idle(void)
2019 {
2020     MigrationState *s = current_migration;
2021
2022     if (!s) {
2023         return true;
2024     }
2025
2026     switch (s->state) {
2027     case MIGRATION_STATUS_NONE:
2028     case MIGRATION_STATUS_CANCELLED:
2029     case MIGRATION_STATUS_COMPLETED:
2030     case MIGRATION_STATUS_FAILED:
2031         return true;
2032     case MIGRATION_STATUS_SETUP:
2033     case MIGRATION_STATUS_CANCELLING:
2034     case MIGRATION_STATUS_ACTIVE:
2035     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
2036     case MIGRATION_STATUS_COLO:
2037     case MIGRATION_STATUS_PRE_SWITCHOVER:
2038     case MIGRATION_STATUS_DEVICE:
2039     case MIGRATION_STATUS_WAIT_UNPLUG:
2040         return false;
2041     case MIGRATION_STATUS__MAX:
2042         g_assert_not_reached();
2043     }
2044
2045     return false;
2046 }
2047
2048 bool migration_is_active(MigrationState *s)
2049 {
2050     return (s->state == MIGRATION_STATUS_ACTIVE ||
2051             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2052 }
2053
2054 void migrate_init(MigrationState *s)
2055 {
2056     /*
2057      * Reinitialise all migration state, except
2058      * parameters/capabilities that the user set, and
2059      * locks.
2060      */
2061     s->cleanup_bh = 0;
2062     s->vm_start_bh = 0;
2063     s->to_dst_file = NULL;
2064     s->state = MIGRATION_STATUS_NONE;
2065     s->rp_state.from_dst_file = NULL;
2066     s->rp_state.error = false;
2067     s->mbps = 0.0;
2068     s->pages_per_second = 0.0;
2069     s->downtime = 0;
2070     s->expected_downtime = 0;
2071     s->setup_time = 0;
2072     s->start_postcopy = false;
2073     s->postcopy_after_devices = false;
2074     s->migration_thread_running = false;
2075     error_free(s->error);
2076     s->error = NULL;
2077     s->hostname = NULL;
2078
2079     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
2080
2081     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2082     s->total_time = 0;
2083     s->vm_was_running = false;
2084     s->iteration_initial_bytes = 0;
2085     s->threshold_size = 0;
2086 }
2087
2088 int migrate_add_blocker_internal(Error *reason, Error **errp)
2089 {
2090     /* Snapshots are similar to migrations, so check RUN_STATE_SAVE_VM too. */
2091     if (runstate_check(RUN_STATE_SAVE_VM) || !migration_is_idle()) {
2092         error_propagate_prepend(errp, error_copy(reason),
2093                                 "disallowing migration blocker "
2094                                 "(migration/snapshot in progress) for: ");
2095         return -EBUSY;
2096     }
2097
2098     migration_blockers = g_slist_prepend(migration_blockers, reason);
2099     return 0;
2100 }
2101
2102 int migrate_add_blocker(Error *reason, Error **errp)
2103 {
2104     if (only_migratable) {
2105         error_propagate_prepend(errp, error_copy(reason),
2106                                 "disallowing migration blocker "
2107                                 "(--only-migratable) for: ");
2108         return -EACCES;
2109     }
2110
2111     return migrate_add_blocker_internal(reason, errp);
2112 }
2113
2114 void migrate_del_blocker(Error *reason)
2115 {
2116     migration_blockers = g_slist_remove(migration_blockers, reason);
2117 }
2118
2119 void qmp_migrate_incoming(const char *uri, Error **errp)
2120 {
2121     Error *local_err = NULL;
2122     static bool once = true;
2123
2124     if (!once) {
2125         error_setg(errp, "The incoming migration has already been started");
2126         return;
2127     }
2128     if (!runstate_check(RUN_STATE_INMIGRATE)) {
2129         error_setg(errp, "'-incoming' was not specified on the command line");
2130         return;
2131     }
2132
2133     if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2134         return;
2135     }
2136
2137     qemu_start_incoming_migration(uri, &local_err);
2138
2139     if (local_err) {
2140         yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2141         error_propagate(errp, local_err);
2142         return;
2143     }
2144
2145     once = false;
2146 }
2147
2148 void qmp_migrate_recover(const char *uri, Error **errp)
2149 {
2150     MigrationIncomingState *mis = migration_incoming_get_current();
2151
2152     /*
2153      * Don't even bother to use ERRP_GUARD() as it _must_ always be set by
2154      * callers (no one should ignore a recover failure); if there is, it's a
2155      * programming error.
2156      */
2157     assert(errp);
2158
2159     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2160         error_setg(errp, "Migrate recover can only be run "
2161                    "when postcopy is paused.");
2162         return;
2163     }
2164
2165     /* If there's an existing transport, release it */
2166     migration_incoming_transport_cleanup(mis);
2167
2168     /*
2169      * Note that this call will never start a real migration; it will
2170      * only re-setup the migration stream and poke existing migration
2171      * to continue using that newly established channel.
2172      */
2173     qemu_start_incoming_migration(uri, errp);
2174 }
2175
2176 void qmp_migrate_pause(Error **errp)
2177 {
2178     MigrationState *ms = migrate_get_current();
2179     MigrationIncomingState *mis = migration_incoming_get_current();
2180     int ret;
2181
2182     if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2183         /* Source side, during postcopy */
2184         qemu_mutex_lock(&ms->qemu_file_lock);
2185         ret = qemu_file_shutdown(ms->to_dst_file);
2186         qemu_mutex_unlock(&ms->qemu_file_lock);
2187         if (ret) {
2188             error_setg(errp, "Failed to pause source migration");
2189         }
2190         return;
2191     }
2192
2193     if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2194         ret = qemu_file_shutdown(mis->from_src_file);
2195         if (ret) {
2196             error_setg(errp, "Failed to pause destination migration");
2197         }
2198         return;
2199     }
2200
2201     error_setg(errp, "migrate-pause is currently only supported "
2202                "during postcopy-active state");
2203 }
2204
2205 bool migration_is_blocked(Error **errp)
2206 {
2207     if (qemu_savevm_state_blocked(errp)) {
2208         return true;
2209     }
2210
2211     if (migration_blockers) {
2212         error_propagate(errp, error_copy(migration_blockers->data));
2213         return true;
2214     }
2215
2216     return false;
2217 }
2218
2219 /* Returns true if continue to migrate, or false if error detected */
2220 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
2221                             bool resume, Error **errp)
2222 {
2223     Error *local_err = NULL;
2224
2225     if (resume) {
2226         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
2227             error_setg(errp, "Cannot resume if there is no "
2228                        "paused migration");
2229             return false;
2230         }
2231
2232         /*
2233          * Postcopy recovery won't work well with release-ram
2234          * capability since release-ram will drop the page buffer as
2235          * long as the page is put into the send buffer.  So if there
2236          * is a network failure happened, any page buffers that have
2237          * not yet reached the destination VM but have already been
2238          * sent from the source VM will be lost forever.  Let's refuse
2239          * the client from resuming such a postcopy migration.
2240          * Luckily release-ram was designed to only be used when src
2241          * and destination VMs are on the same host, so it should be
2242          * fine.
2243          */
2244         if (migrate_release_ram()) {
2245             error_setg(errp, "Postcopy recovery cannot work "
2246                        "when release-ram capability is set");
2247             return false;
2248         }
2249
2250         /* This is a resume, skip init status */
2251         return true;
2252     }
2253
2254     if (migration_is_running(s->state)) {
2255         error_setg(errp, QERR_MIGRATION_ACTIVE);
2256         return false;
2257     }
2258
2259     if (runstate_check(RUN_STATE_INMIGRATE)) {
2260         error_setg(errp, "Guest is waiting for an incoming migration");
2261         return false;
2262     }
2263
2264     if (runstate_check(RUN_STATE_POSTMIGRATE)) {
2265         error_setg(errp, "Can't migrate the vm that was paused due to "
2266                    "previous migration");
2267         return false;
2268     }
2269
2270     if (migration_is_blocked(errp)) {
2271         return false;
2272     }
2273
2274     if (blk || blk_inc) {
2275         if (migrate_colo_enabled()) {
2276             error_setg(errp, "No disk migration is required in COLO mode");
2277             return false;
2278         }
2279         if (migrate_use_block() || migrate_use_block_incremental()) {
2280             error_setg(errp, "Command options are incompatible with "
2281                        "current migration capabilities");
2282             return false;
2283         }
2284         migrate_set_block_enabled(true, &local_err);
2285         if (local_err) {
2286             error_propagate(errp, local_err);
2287             return false;
2288         }
2289         s->must_remove_block_options = true;
2290     }
2291
2292     if (blk_inc) {
2293         migrate_set_block_incremental(s, true);
2294     }
2295
2296     migrate_init(s);
2297     /*
2298      * set ram_counters compression_counters memory to zero for a
2299      * new migration
2300      */
2301     memset(&ram_counters, 0, sizeof(ram_counters));
2302     memset(&compression_counters, 0, sizeof(compression_counters));
2303
2304     return true;
2305 }
2306
2307 void qmp_migrate(const char *uri, bool has_blk, bool blk,
2308                  bool has_inc, bool inc, bool has_detach, bool detach,
2309                  bool has_resume, bool resume, Error **errp)
2310 {
2311     Error *local_err = NULL;
2312     MigrationState *s = migrate_get_current();
2313     const char *p = NULL;
2314
2315     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
2316                          has_resume && resume, errp)) {
2317         /* Error detected, put into errp */
2318         return;
2319     }
2320
2321     if (!(has_resume && resume)) {
2322         if (!yank_register_instance(MIGRATION_YANK_INSTANCE, errp)) {
2323             return;
2324         }
2325     }
2326
2327     migrate_protocol_allow_multi_channels(false);
2328     if (strstart(uri, "tcp:", &p) ||
2329         strstart(uri, "unix:", NULL) ||
2330         strstart(uri, "vsock:", NULL)) {
2331         migrate_protocol_allow_multi_channels(true);
2332         socket_start_outgoing_migration(s, p ? p : uri, &local_err);
2333 #ifdef CONFIG_RDMA
2334     } else if (strstart(uri, "rdma:", &p)) {
2335         rdma_start_outgoing_migration(s, p, &local_err);
2336 #endif
2337     } else if (strstart(uri, "exec:", &p)) {
2338         exec_start_outgoing_migration(s, p, &local_err);
2339     } else if (strstart(uri, "fd:", &p)) {
2340         fd_start_outgoing_migration(s, p, &local_err);
2341     } else {
2342         if (!(has_resume && resume)) {
2343             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2344         }
2345         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
2346                    "a valid migration protocol");
2347         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
2348                           MIGRATION_STATUS_FAILED);
2349         block_cleanup_parameters(s);
2350         return;
2351     }
2352
2353     if (local_err) {
2354         if (!(has_resume && resume)) {
2355             yank_unregister_instance(MIGRATION_YANK_INSTANCE);
2356         }
2357         migrate_fd_error(s, local_err);
2358         error_propagate(errp, local_err);
2359         return;
2360     }
2361 }
2362
2363 void qmp_migrate_cancel(Error **errp)
2364 {
2365     migration_cancel(NULL);
2366 }
2367
2368 void qmp_migrate_continue(MigrationStatus state, Error **errp)
2369 {
2370     MigrationState *s = migrate_get_current();
2371     if (s->state != state) {
2372         error_setg(errp,  "Migration not in expected state: %s",
2373                    MigrationStatus_str(s->state));
2374         return;
2375     }
2376     qemu_sem_post(&s->pause_sem);
2377 }
2378
2379 bool migrate_release_ram(void)
2380 {
2381     MigrationState *s;
2382
2383     s = migrate_get_current();
2384
2385     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
2386 }
2387
2388 bool migrate_postcopy_ram(void)
2389 {
2390     MigrationState *s;
2391
2392     s = migrate_get_current();
2393
2394     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
2395 }
2396
2397 bool migrate_postcopy(void)
2398 {
2399     return migrate_postcopy_ram() || migrate_dirty_bitmaps();
2400 }
2401
2402 bool migrate_auto_converge(void)
2403 {
2404     MigrationState *s;
2405
2406     s = migrate_get_current();
2407
2408     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
2409 }
2410
2411 bool migrate_zero_blocks(void)
2412 {
2413     MigrationState *s;
2414
2415     s = migrate_get_current();
2416
2417     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
2418 }
2419
2420 bool migrate_postcopy_blocktime(void)
2421 {
2422     MigrationState *s;
2423
2424     s = migrate_get_current();
2425
2426     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
2427 }
2428
2429 bool migrate_use_compression(void)
2430 {
2431     MigrationState *s;
2432
2433     s = migrate_get_current();
2434
2435     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
2436 }
2437
2438 int migrate_compress_level(void)
2439 {
2440     MigrationState *s;
2441
2442     s = migrate_get_current();
2443
2444     return s->parameters.compress_level;
2445 }
2446
2447 int migrate_compress_threads(void)
2448 {
2449     MigrationState *s;
2450
2451     s = migrate_get_current();
2452
2453     return s->parameters.compress_threads;
2454 }
2455
2456 int migrate_compress_wait_thread(void)
2457 {
2458     MigrationState *s;
2459
2460     s = migrate_get_current();
2461
2462     return s->parameters.compress_wait_thread;
2463 }
2464
2465 int migrate_decompress_threads(void)
2466 {
2467     MigrationState *s;
2468
2469     s = migrate_get_current();
2470
2471     return s->parameters.decompress_threads;
2472 }
2473
2474 bool migrate_dirty_bitmaps(void)
2475 {
2476     MigrationState *s;
2477
2478     s = migrate_get_current();
2479
2480     return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
2481 }
2482
2483 bool migrate_ignore_shared(void)
2484 {
2485     MigrationState *s;
2486
2487     s = migrate_get_current();
2488
2489     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
2490 }
2491
2492 bool migrate_validate_uuid(void)
2493 {
2494     MigrationState *s;
2495
2496     s = migrate_get_current();
2497
2498     return s->enabled_capabilities[MIGRATION_CAPABILITY_VALIDATE_UUID];
2499 }
2500
2501 bool migrate_use_events(void)
2502 {
2503     MigrationState *s;
2504
2505     s = migrate_get_current();
2506
2507     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
2508 }
2509
2510 bool migrate_use_multifd(void)
2511 {
2512     MigrationState *s;
2513
2514     s = migrate_get_current();
2515
2516     return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
2517 }
2518
2519 bool migrate_pause_before_switchover(void)
2520 {
2521     MigrationState *s;
2522
2523     s = migrate_get_current();
2524
2525     return s->enabled_capabilities[
2526         MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
2527 }
2528
2529 int migrate_multifd_channels(void)
2530 {
2531     MigrationState *s;
2532
2533     s = migrate_get_current();
2534
2535     return s->parameters.multifd_channels;
2536 }
2537
2538 MultiFDCompression migrate_multifd_compression(void)
2539 {
2540     MigrationState *s;
2541
2542     s = migrate_get_current();
2543
2544     return s->parameters.multifd_compression;
2545 }
2546
2547 int migrate_multifd_zlib_level(void)
2548 {
2549     MigrationState *s;
2550
2551     s = migrate_get_current();
2552
2553     return s->parameters.multifd_zlib_level;
2554 }
2555
2556 int migrate_multifd_zstd_level(void)
2557 {
2558     MigrationState *s;
2559
2560     s = migrate_get_current();
2561
2562     return s->parameters.multifd_zstd_level;
2563 }
2564
2565 int migrate_use_xbzrle(void)
2566 {
2567     MigrationState *s;
2568
2569     s = migrate_get_current();
2570
2571     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
2572 }
2573
2574 uint64_t migrate_xbzrle_cache_size(void)
2575 {
2576     MigrationState *s;
2577
2578     s = migrate_get_current();
2579
2580     return s->parameters.xbzrle_cache_size;
2581 }
2582
2583 static int64_t migrate_max_postcopy_bandwidth(void)
2584 {
2585     MigrationState *s;
2586
2587     s = migrate_get_current();
2588
2589     return s->parameters.max_postcopy_bandwidth;
2590 }
2591
2592 bool migrate_use_block(void)
2593 {
2594     MigrationState *s;
2595
2596     s = migrate_get_current();
2597
2598     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
2599 }
2600
2601 bool migrate_use_return_path(void)
2602 {
2603     MigrationState *s;
2604
2605     s = migrate_get_current();
2606
2607     return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
2608 }
2609
2610 bool migrate_use_block_incremental(void)
2611 {
2612     MigrationState *s;
2613
2614     s = migrate_get_current();
2615
2616     return s->parameters.block_incremental;
2617 }
2618
2619 bool migrate_background_snapshot(void)
2620 {
2621     MigrationState *s;
2622
2623     s = migrate_get_current();
2624
2625     return s->enabled_capabilities[MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT];
2626 }
2627
2628 /* migration thread support */
2629 /*
2630  * Something bad happened to the RP stream, mark an error
2631  * The caller shall print or trace something to indicate why
2632  */
2633 static void mark_source_rp_bad(MigrationState *s)
2634 {
2635     s->rp_state.error = true;
2636 }
2637
2638 static struct rp_cmd_args {
2639     ssize_t     len; /* -1 = variable */
2640     const char *name;
2641 } rp_cmd_args[] = {
2642     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2643     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2644     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2645     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2646     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2647     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2648     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2649     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2650 };
2651
2652 /*
2653  * Process a request for pages received on the return path,
2654  * We're allowed to send more than requested (e.g. to round to our page size)
2655  * and we don't need to send pages that have already been sent.
2656  */
2657 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2658                                        ram_addr_t start, size_t len)
2659 {
2660     long our_host_ps = qemu_real_host_page_size();
2661
2662     trace_migrate_handle_rp_req_pages(rbname, start, len);
2663
2664     /*
2665      * Since we currently insist on matching page sizes, just sanity check
2666      * we're being asked for whole host pages.
2667      */
2668     if (!QEMU_IS_ALIGNED(start, our_host_ps) ||
2669         !QEMU_IS_ALIGNED(len, our_host_ps)) {
2670         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
2671                      " len: %zd", __func__, start, len);
2672         mark_source_rp_bad(ms);
2673         return;
2674     }
2675
2676     if (ram_save_queue_pages(rbname, start, len)) {
2677         mark_source_rp_bad(ms);
2678     }
2679 }
2680
2681 /* Return true to retry, false to quit */
2682 static bool postcopy_pause_return_path_thread(MigrationState *s)
2683 {
2684     trace_postcopy_pause_return_path();
2685
2686     qemu_sem_wait(&s->postcopy_pause_rp_sem);
2687
2688     trace_postcopy_pause_return_path_continued();
2689
2690     return true;
2691 }
2692
2693 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
2694 {
2695     RAMBlock *block = qemu_ram_block_by_name(block_name);
2696
2697     if (!block) {
2698         error_report("%s: invalid block name '%s'", __func__, block_name);
2699         return -EINVAL;
2700     }
2701
2702     /* Fetch the received bitmap and refresh the dirty bitmap */
2703     return ram_dirty_bitmap_reload(s, block);
2704 }
2705
2706 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
2707 {
2708     trace_source_return_path_thread_resume_ack(value);
2709
2710     if (value != MIGRATION_RESUME_ACK_VALUE) {
2711         error_report("%s: illegal resume_ack value %"PRIu32,
2712                      __func__, value);
2713         return -1;
2714     }
2715
2716     /* Now both sides are active. */
2717     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2718                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2719
2720     /* Notify send thread that time to continue send pages */
2721     qemu_sem_post(&s->rp_state.rp_sem);
2722
2723     return 0;
2724 }
2725
2726 /* Release ms->rp_state.from_dst_file in a safe way */
2727 static void migration_release_from_dst_file(MigrationState *ms)
2728 {
2729     QEMUFile *file;
2730
2731     WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) {
2732         /*
2733          * Reset the from_dst_file pointer first before releasing it, as we
2734          * can't block within lock section
2735          */
2736         file = ms->rp_state.from_dst_file;
2737         ms->rp_state.from_dst_file = NULL;
2738     }
2739
2740     qemu_fclose(file);
2741 }
2742
2743 /*
2744  * Handles messages sent on the return path towards the source VM
2745  *
2746  */
2747 static void *source_return_path_thread(void *opaque)
2748 {
2749     MigrationState *ms = opaque;
2750     QEMUFile *rp = ms->rp_state.from_dst_file;
2751     uint16_t header_len, header_type;
2752     uint8_t buf[512];
2753     uint32_t tmp32, sibling_error;
2754     ram_addr_t start = 0; /* =0 to silence warning */
2755     size_t  len = 0, expected_len;
2756     int res;
2757
2758     trace_source_return_path_thread_entry();
2759     rcu_register_thread();
2760
2761 retry:
2762     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
2763            migration_is_setup_or_active(ms->state)) {
2764         trace_source_return_path_thread_loop_top();
2765         header_type = qemu_get_be16(rp);
2766         header_len = qemu_get_be16(rp);
2767
2768         if (qemu_file_get_error(rp)) {
2769             mark_source_rp_bad(ms);
2770             goto out;
2771         }
2772
2773         if (header_type >= MIG_RP_MSG_MAX ||
2774             header_type == MIG_RP_MSG_INVALID) {
2775             error_report("RP: Received invalid message 0x%04x length 0x%04x",
2776                          header_type, header_len);
2777             mark_source_rp_bad(ms);
2778             goto out;
2779         }
2780
2781         if ((rp_cmd_args[header_type].len != -1 &&
2782             header_len != rp_cmd_args[header_type].len) ||
2783             header_len > sizeof(buf)) {
2784             error_report("RP: Received '%s' message (0x%04x) with"
2785                          "incorrect length %d expecting %zu",
2786                          rp_cmd_args[header_type].name, header_type, header_len,
2787                          (size_t)rp_cmd_args[header_type].len);
2788             mark_source_rp_bad(ms);
2789             goto out;
2790         }
2791
2792         /* We know we've got a valid header by this point */
2793         res = qemu_get_buffer(rp, buf, header_len);
2794         if (res != header_len) {
2795             error_report("RP: Failed reading data for message 0x%04x"
2796                          " read %d expected %d",
2797                          header_type, res, header_len);
2798             mark_source_rp_bad(ms);
2799             goto out;
2800         }
2801
2802         /* OK, we have the message and the data */
2803         switch (header_type) {
2804         case MIG_RP_MSG_SHUT:
2805             sibling_error = ldl_be_p(buf);
2806             trace_source_return_path_thread_shut(sibling_error);
2807             if (sibling_error) {
2808                 error_report("RP: Sibling indicated error %d", sibling_error);
2809                 mark_source_rp_bad(ms);
2810             }
2811             /*
2812              * We'll let the main thread deal with closing the RP
2813              * we could do a shutdown(2) on it, but we're the only user
2814              * anyway, so there's nothing gained.
2815              */
2816             goto out;
2817
2818         case MIG_RP_MSG_PONG:
2819             tmp32 = ldl_be_p(buf);
2820             trace_source_return_path_thread_pong(tmp32);
2821             break;
2822
2823         case MIG_RP_MSG_REQ_PAGES:
2824             start = ldq_be_p(buf);
2825             len = ldl_be_p(buf + 8);
2826             migrate_handle_rp_req_pages(ms, NULL, start, len);
2827             break;
2828
2829         case MIG_RP_MSG_REQ_PAGES_ID:
2830             expected_len = 12 + 1; /* header + termination */
2831
2832             if (header_len >= expected_len) {
2833                 start = ldq_be_p(buf);
2834                 len = ldl_be_p(buf + 8);
2835                 /* Now we expect an idstr */
2836                 tmp32 = buf[12]; /* Length of the following idstr */
2837                 buf[13 + tmp32] = '\0';
2838                 expected_len += tmp32;
2839             }
2840             if (header_len != expected_len) {
2841                 error_report("RP: Req_Page_id with length %d expecting %zd",
2842                              header_len, expected_len);
2843                 mark_source_rp_bad(ms);
2844                 goto out;
2845             }
2846             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
2847             break;
2848
2849         case MIG_RP_MSG_RECV_BITMAP:
2850             if (header_len < 1) {
2851                 error_report("%s: missing block name", __func__);
2852                 mark_source_rp_bad(ms);
2853                 goto out;
2854             }
2855             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
2856             buf[buf[0] + 1] = '\0';
2857             if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
2858                 mark_source_rp_bad(ms);
2859                 goto out;
2860             }
2861             break;
2862
2863         case MIG_RP_MSG_RESUME_ACK:
2864             tmp32 = ldl_be_p(buf);
2865             if (migrate_handle_rp_resume_ack(ms, tmp32)) {
2866                 mark_source_rp_bad(ms);
2867                 goto out;
2868             }
2869             break;
2870
2871         default:
2872             break;
2873         }
2874     }
2875
2876 out:
2877     res = qemu_file_get_error(rp);
2878     if (res) {
2879         if (res && migration_in_postcopy()) {
2880             /*
2881              * Maybe there is something we can do: it looks like a
2882              * network down issue, and we pause for a recovery.
2883              */
2884             migration_release_from_dst_file(ms);
2885             rp = NULL;
2886             if (postcopy_pause_return_path_thread(ms)) {
2887                 /*
2888                  * Reload rp, reset the rest.  Referencing it is safe since
2889                  * it's reset only by us above, or when migration completes
2890                  */
2891                 rp = ms->rp_state.from_dst_file;
2892                 ms->rp_state.error = false;
2893                 goto retry;
2894             }
2895         }
2896
2897         trace_source_return_path_thread_bad_end();
2898         mark_source_rp_bad(ms);
2899     }
2900
2901     trace_source_return_path_thread_end();
2902     migration_release_from_dst_file(ms);
2903     rcu_unregister_thread();
2904     return NULL;
2905 }
2906
2907 static int open_return_path_on_source(MigrationState *ms,
2908                                       bool create_thread)
2909 {
2910     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
2911     if (!ms->rp_state.from_dst_file) {
2912         return -1;
2913     }
2914
2915     trace_open_return_path_on_source();
2916
2917     if (!create_thread) {
2918         /* We're done */
2919         return 0;
2920     }
2921
2922     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
2923                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
2924     ms->rp_state.rp_thread_created = true;
2925
2926     trace_open_return_path_on_source_continue();
2927
2928     return 0;
2929 }
2930
2931 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
2932 static int await_return_path_close_on_source(MigrationState *ms)
2933 {
2934     /*
2935      * If this is a normal exit then the destination will send a SHUT and the
2936      * rp_thread will exit, however if there's an error we need to cause
2937      * it to exit.
2938      */
2939     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
2940         /*
2941          * shutdown(2), if we have it, will cause it to unblock if it's stuck
2942          * waiting for the destination.
2943          */
2944         qemu_file_shutdown(ms->rp_state.from_dst_file);
2945         mark_source_rp_bad(ms);
2946     }
2947     trace_await_return_path_close_on_source_joining();
2948     qemu_thread_join(&ms->rp_state.rp_thread);
2949     ms->rp_state.rp_thread_created = false;
2950     trace_await_return_path_close_on_source_close();
2951     return ms->rp_state.error;
2952 }
2953
2954 /*
2955  * Switch from normal iteration to postcopy
2956  * Returns non-0 on error
2957  */
2958 static int postcopy_start(MigrationState *ms)
2959 {
2960     int ret;
2961     QIOChannelBuffer *bioc;
2962     QEMUFile *fb;
2963     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2964     int64_t bandwidth = migrate_max_postcopy_bandwidth();
2965     bool restart_block = false;
2966     int cur_state = MIGRATION_STATUS_ACTIVE;
2967     if (!migrate_pause_before_switchover()) {
2968         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
2969                           MIGRATION_STATUS_POSTCOPY_ACTIVE);
2970     }
2971
2972     trace_postcopy_start();
2973     qemu_mutex_lock_iothread();
2974     trace_postcopy_start_set_run();
2975
2976     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2977     global_state_store();
2978     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2979     if (ret < 0) {
2980         goto fail;
2981     }
2982
2983     ret = migration_maybe_pause(ms, &cur_state,
2984                                 MIGRATION_STATUS_POSTCOPY_ACTIVE);
2985     if (ret < 0) {
2986         goto fail;
2987     }
2988
2989     ret = bdrv_inactivate_all();
2990     if (ret < 0) {
2991         goto fail;
2992     }
2993     restart_block = true;
2994
2995     /*
2996      * Cause any non-postcopiable, but iterative devices to
2997      * send out their final data.
2998      */
2999     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
3000
3001     /*
3002      * in Finish migrate and with the io-lock held everything should
3003      * be quiet, but we've potentially still got dirty pages and we
3004      * need to tell the destination to throw any pages it's already received
3005      * that are dirty
3006      */
3007     if (migrate_postcopy_ram()) {
3008         ram_postcopy_send_discard_bitmap(ms);
3009     }
3010
3011     /*
3012      * send rest of state - note things that are doing postcopy
3013      * will notice we're in POSTCOPY_ACTIVE and not actually
3014      * wrap their state up here
3015      */
3016     /* 0 max-postcopy-bandwidth means unlimited */
3017     if (!bandwidth) {
3018         qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
3019     } else {
3020         qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
3021     }
3022     if (migrate_postcopy_ram()) {
3023         /* Ping just for debugging, helps line traces up */
3024         qemu_savevm_send_ping(ms->to_dst_file, 2);
3025     }
3026
3027     /*
3028      * While loading the device state we may trigger page transfer
3029      * requests and the fd must be free to process those, and thus
3030      * the destination must read the whole device state off the fd before
3031      * it starts processing it.  Unfortunately the ad-hoc migration format
3032      * doesn't allow the destination to know the size to read without fully
3033      * parsing it through each devices load-state code (especially the open
3034      * coded devices that use get/put).
3035      * So we wrap the device state up in a package with a length at the start;
3036      * to do this we use a qemu_buf to hold the whole of the device state.
3037      */
3038     bioc = qio_channel_buffer_new(4096);
3039     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
3040     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
3041     object_unref(OBJECT(bioc));
3042
3043     /*
3044      * Make sure the receiver can get incoming pages before we send the rest
3045      * of the state
3046      */
3047     qemu_savevm_send_postcopy_listen(fb);
3048
3049     qemu_savevm_state_complete_precopy(fb, false, false);
3050     if (migrate_postcopy_ram()) {
3051         qemu_savevm_send_ping(fb, 3);
3052     }
3053
3054     qemu_savevm_send_postcopy_run(fb);
3055
3056     /* <><> end of stuff going into the package */
3057
3058     /* Last point of recovery; as soon as we send the package the destination
3059      * can open devices and potentially start running.
3060      * Lets just check again we've not got any errors.
3061      */
3062     ret = qemu_file_get_error(ms->to_dst_file);
3063     if (ret) {
3064         error_report("postcopy_start: Migration stream errored (pre package)");
3065         goto fail_closefb;
3066     }
3067
3068     restart_block = false;
3069
3070     /* Now send that blob */
3071     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
3072         goto fail_closefb;
3073     }
3074     qemu_fclose(fb);
3075
3076     /* Send a notify to give a chance for anything that needs to happen
3077      * at the transition to postcopy and after the device state; in particular
3078      * spice needs to trigger a transition now
3079      */
3080     ms->postcopy_after_devices = true;
3081     notifier_list_notify(&migration_state_notifiers, ms);
3082
3083     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
3084
3085     qemu_mutex_unlock_iothread();
3086
3087     if (migrate_postcopy_ram()) {
3088         /*
3089          * Although this ping is just for debug, it could potentially be
3090          * used for getting a better measurement of downtime at the source.
3091          */
3092         qemu_savevm_send_ping(ms->to_dst_file, 4);
3093     }
3094
3095     if (migrate_release_ram()) {
3096         ram_postcopy_migrated_memory_release(ms);
3097     }
3098
3099     ret = qemu_file_get_error(ms->to_dst_file);
3100     if (ret) {
3101         error_report("postcopy_start: Migration stream errored");
3102         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
3103                               MIGRATION_STATUS_FAILED);
3104     }
3105
3106     return ret;
3107
3108 fail_closefb:
3109     qemu_fclose(fb);
3110 fail:
3111     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
3112                           MIGRATION_STATUS_FAILED);
3113     if (restart_block) {
3114         /* A failure happened early enough that we know the destination hasn't
3115          * accessed block devices, so we're safe to recover.
3116          */
3117         Error *local_err = NULL;
3118
3119         bdrv_activate_all(&local_err);
3120         if (local_err) {
3121             error_report_err(local_err);
3122         }
3123     }
3124     qemu_mutex_unlock_iothread();
3125     return -1;
3126 }
3127
3128 /**
3129  * migration_maybe_pause: Pause if required to by
3130  * migrate_pause_before_switchover called with the iothread locked
3131  * Returns: 0 on success
3132  */
3133 static int migration_maybe_pause(MigrationState *s,
3134                                  int *current_active_state,
3135                                  int new_state)
3136 {
3137     if (!migrate_pause_before_switchover()) {
3138         return 0;
3139     }
3140
3141     /* Since leaving this state is not atomic with posting the semaphore
3142      * it's possible that someone could have issued multiple migrate_continue
3143      * and the semaphore is incorrectly positive at this point;
3144      * the docs say it's undefined to reinit a semaphore that's already
3145      * init'd, so use timedwait to eat up any existing posts.
3146      */
3147     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
3148         /* This block intentionally left blank */
3149     }
3150
3151     /*
3152      * If the migration is cancelled when it is in the completion phase,
3153      * the migration state is set to MIGRATION_STATUS_CANCELLING.
3154      * So we don't need to wait a semaphore, otherwise we would always
3155      * wait for the 'pause_sem' semaphore.
3156      */
3157     if (s->state != MIGRATION_STATUS_CANCELLING) {
3158         qemu_mutex_unlock_iothread();
3159         migrate_set_state(&s->state, *current_active_state,
3160                           MIGRATION_STATUS_PRE_SWITCHOVER);
3161         qemu_sem_wait(&s->pause_sem);
3162         migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
3163                           new_state);
3164         *current_active_state = new_state;
3165         qemu_mutex_lock_iothread();
3166     }
3167
3168     return s->state == new_state ? 0 : -EINVAL;
3169 }
3170
3171 /**
3172  * migration_completion: Used by migration_thread when there's not much left.
3173  *   The caller 'breaks' the loop when this returns.
3174  *
3175  * @s: Current migration state
3176  */
3177 static void migration_completion(MigrationState *s)
3178 {
3179     int ret;
3180     int current_active_state = s->state;
3181
3182     if (s->state == MIGRATION_STATUS_ACTIVE) {
3183         qemu_mutex_lock_iothread();
3184         s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3185         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3186         s->vm_was_running = runstate_is_running();
3187         ret = global_state_store();
3188
3189         if (!ret) {
3190             bool inactivate = !migrate_colo_enabled();
3191             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
3192             trace_migration_completion_vm_stop(ret);
3193             if (ret >= 0) {
3194                 ret = migration_maybe_pause(s, &current_active_state,
3195                                             MIGRATION_STATUS_DEVICE);
3196             }
3197             if (ret >= 0) {
3198                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
3199                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
3200                                                          inactivate);
3201             }
3202             if (inactivate && ret >= 0) {
3203                 s->block_inactive = true;
3204             }
3205         }
3206         qemu_mutex_unlock_iothread();
3207
3208         if (ret < 0) {
3209             goto fail;
3210         }
3211     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3212         trace_migration_completion_postcopy_end();
3213
3214         qemu_mutex_lock_iothread();
3215         qemu_savevm_state_complete_postcopy(s->to_dst_file);
3216         qemu_mutex_unlock_iothread();
3217
3218         trace_migration_completion_postcopy_end_after_complete();
3219     } else {
3220         goto fail;
3221     }
3222
3223     /*
3224      * If rp was opened we must clean up the thread before
3225      * cleaning everything else up (since if there are no failures
3226      * it will wait for the destination to send it's status in
3227      * a SHUT command).
3228      */
3229     if (s->rp_state.rp_thread_created) {
3230         int rp_error;
3231         trace_migration_return_path_end_before();
3232         rp_error = await_return_path_close_on_source(s);
3233         trace_migration_return_path_end_after(rp_error);
3234         if (rp_error) {
3235             goto fail_invalidate;
3236         }
3237     }
3238
3239     if (qemu_file_get_error(s->to_dst_file)) {
3240         trace_migration_completion_file_err();
3241         goto fail_invalidate;
3242     }
3243
3244     if (migrate_colo_enabled() && s->state == MIGRATION_STATUS_ACTIVE) {
3245         /* COLO does not support postcopy */
3246         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
3247                           MIGRATION_STATUS_COLO);
3248     } else {
3249         migrate_set_state(&s->state, current_active_state,
3250                           MIGRATION_STATUS_COMPLETED);
3251     }
3252
3253     return;
3254
3255 fail_invalidate:
3256     /* If not doing postcopy, vm_start() will be called: let's regain
3257      * control on images.
3258      */
3259     if (s->state == MIGRATION_STATUS_ACTIVE ||
3260         s->state == MIGRATION_STATUS_DEVICE) {
3261         Error *local_err = NULL;
3262
3263         qemu_mutex_lock_iothread();
3264         bdrv_activate_all(&local_err);
3265         if (local_err) {
3266             error_report_err(local_err);
3267         } else {
3268             s->block_inactive = false;
3269         }
3270         qemu_mutex_unlock_iothread();
3271     }
3272
3273 fail:
3274     migrate_set_state(&s->state, current_active_state,
3275                       MIGRATION_STATUS_FAILED);
3276 }
3277
3278 /**
3279  * bg_migration_completion: Used by bg_migration_thread when after all the
3280  *   RAM has been saved. The caller 'breaks' the loop when this returns.
3281  *
3282  * @s: Current migration state
3283  */
3284 static void bg_migration_completion(MigrationState *s)
3285 {
3286     int current_active_state = s->state;
3287
3288     /*
3289      * Stop tracking RAM writes - un-protect memory, un-register UFFD
3290      * memory ranges, flush kernel wait queues and wake up threads
3291      * waiting for write fault to be resolved.
3292      */
3293     ram_write_tracking_stop();
3294
3295     if (s->state == MIGRATION_STATUS_ACTIVE) {
3296         /*
3297          * By this moment we have RAM content saved into the migration stream.
3298          * The next step is to flush the non-RAM content (device state)
3299          * right after the ram content. The device state has been stored into
3300          * the temporary buffer before RAM saving started.
3301          */
3302         qemu_put_buffer(s->to_dst_file, s->bioc->data, s->bioc->usage);
3303         qemu_fflush(s->to_dst_file);
3304     } else if (s->state == MIGRATION_STATUS_CANCELLING) {
3305         goto fail;
3306     }
3307
3308     if (qemu_file_get_error(s->to_dst_file)) {
3309         trace_migration_completion_file_err();
3310         goto fail;
3311     }
3312
3313     migrate_set_state(&s->state, current_active_state,
3314                       MIGRATION_STATUS_COMPLETED);
3315     return;
3316
3317 fail:
3318     migrate_set_state(&s->state, current_active_state,
3319                       MIGRATION_STATUS_FAILED);
3320 }
3321
3322 bool migrate_colo_enabled(void)
3323 {
3324     MigrationState *s = migrate_get_current();
3325     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
3326 }
3327
3328 typedef enum MigThrError {
3329     /* No error detected */
3330     MIG_THR_ERR_NONE = 0,
3331     /* Detected error, but resumed successfully */
3332     MIG_THR_ERR_RECOVERED = 1,
3333     /* Detected fatal error, need to exit */
3334     MIG_THR_ERR_FATAL = 2,
3335 } MigThrError;
3336
3337 static int postcopy_resume_handshake(MigrationState *s)
3338 {
3339     qemu_savevm_send_postcopy_resume(s->to_dst_file);
3340
3341     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3342         qemu_sem_wait(&s->rp_state.rp_sem);
3343     }
3344
3345     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3346         return 0;
3347     }
3348
3349     return -1;
3350 }
3351
3352 /* Return zero if success, or <0 for error */
3353 static int postcopy_do_resume(MigrationState *s)
3354 {
3355     int ret;
3356
3357     /*
3358      * Call all the resume_prepare() hooks, so that modules can be
3359      * ready for the migration resume.
3360      */
3361     ret = qemu_savevm_state_resume_prepare(s);
3362     if (ret) {
3363         error_report("%s: resume_prepare() failure detected: %d",
3364                      __func__, ret);
3365         return ret;
3366     }
3367
3368     /*
3369      * Last handshake with destination on the resume (destination will
3370      * switch to postcopy-active afterwards)
3371      */
3372     ret = postcopy_resume_handshake(s);
3373     if (ret) {
3374         error_report("%s: handshake failed: %d", __func__, ret);
3375         return ret;
3376     }
3377
3378     return 0;
3379 }
3380
3381 /*
3382  * We don't return until we are in a safe state to continue current
3383  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
3384  * MIG_THR_ERR_FATAL if unrecovery failure happened.
3385  */
3386 static MigThrError postcopy_pause(MigrationState *s)
3387 {
3388     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3389
3390     while (true) {
3391         QEMUFile *file;
3392
3393         /*
3394          * Current channel is possibly broken. Release it.  Note that this is
3395          * guaranteed even without lock because to_dst_file should only be
3396          * modified by the migration thread.  That also guarantees that the
3397          * unregister of yank is safe too without the lock.  It should be safe
3398          * even to be within the qemu_file_lock, but we didn't do that to avoid
3399          * taking more mutex (yank_lock) within qemu_file_lock.  TL;DR: we make
3400          * the qemu_file_lock critical section as small as possible.
3401          */
3402         assert(s->to_dst_file);
3403         migration_ioc_unregister_yank_from_file(s->to_dst_file);
3404         qemu_mutex_lock(&s->qemu_file_lock);
3405         file = s->to_dst_file;
3406         s->to_dst_file = NULL;
3407         qemu_mutex_unlock(&s->qemu_file_lock);
3408
3409         qemu_file_shutdown(file);
3410         qemu_fclose(file);
3411
3412         migrate_set_state(&s->state, s->state,
3413                           MIGRATION_STATUS_POSTCOPY_PAUSED);
3414
3415         error_report("Detected IO failure for postcopy. "
3416                      "Migration paused.");
3417
3418         /*
3419          * We wait until things fixed up. Then someone will setup the
3420          * status back for us.
3421          */
3422         while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
3423             qemu_sem_wait(&s->postcopy_pause_sem);
3424         }
3425
3426         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
3427             /* Woken up by a recover procedure. Give it a shot */
3428
3429             /*
3430              * Firstly, let's wake up the return path now, with a new
3431              * return path channel.
3432              */
3433             qemu_sem_post(&s->postcopy_pause_rp_sem);
3434
3435             /* Do the resume logic */
3436             if (postcopy_do_resume(s) == 0) {
3437                 /* Let's continue! */
3438                 trace_postcopy_pause_continued();
3439                 return MIG_THR_ERR_RECOVERED;
3440             } else {
3441                 /*
3442                  * Something wrong happened during the recovery, let's
3443                  * pause again. Pause is always better than throwing
3444                  * data away.
3445                  */
3446                 continue;
3447             }
3448         } else {
3449             /* This is not right... Time to quit. */
3450             return MIG_THR_ERR_FATAL;
3451         }
3452     }
3453 }
3454
3455 static MigThrError migration_detect_error(MigrationState *s)
3456 {
3457     int ret;
3458     int state = s->state;
3459     Error *local_error = NULL;
3460
3461     if (state == MIGRATION_STATUS_CANCELLING ||
3462         state == MIGRATION_STATUS_CANCELLED) {
3463         /* End the migration, but don't set the state to failed */
3464         return MIG_THR_ERR_FATAL;
3465     }
3466
3467     /* Try to detect any file errors */
3468     ret = qemu_file_get_error_obj(s->to_dst_file, &local_error);
3469     if (!ret) {
3470         /* Everything is fine */
3471         assert(!local_error);
3472         return MIG_THR_ERR_NONE;
3473     }
3474
3475     if (local_error) {
3476         migrate_set_error(s, local_error);
3477         error_free(local_error);
3478     }
3479
3480     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret) {
3481         /*
3482          * For postcopy, we allow the network to be down for a
3483          * while. After that, it can be continued by a
3484          * recovery phase.
3485          */
3486         return postcopy_pause(s);
3487     } else {
3488         /*
3489          * For precopy (or postcopy with error outside IO), we fail
3490          * with no time.
3491          */
3492         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
3493         trace_migration_thread_file_err();
3494
3495         /* Time to stop the migration, now. */
3496         return MIG_THR_ERR_FATAL;
3497     }
3498 }
3499
3500 /* How many bytes have we transferred since the beginning of the migration */
3501 static uint64_t migration_total_bytes(MigrationState *s)
3502 {
3503     return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes;
3504 }
3505
3506 static void migration_calculate_complete(MigrationState *s)
3507 {
3508     uint64_t bytes = migration_total_bytes(s);
3509     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3510     int64_t transfer_time;
3511
3512     s->total_time = end_time - s->start_time;
3513     if (!s->downtime) {
3514         /*
3515          * It's still not set, so we are precopy migration.  For
3516          * postcopy, downtime is calculated during postcopy_start().
3517          */
3518         s->downtime = end_time - s->downtime_start;
3519     }
3520
3521     transfer_time = s->total_time - s->setup_time;
3522     if (transfer_time) {
3523         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3524     }
3525 }
3526
3527 static void update_iteration_initial_status(MigrationState *s)
3528 {
3529     /*
3530      * Update these three fields at the same time to avoid mismatch info lead
3531      * wrong speed calculation.
3532      */
3533     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3534     s->iteration_initial_bytes = migration_total_bytes(s);
3535     s->iteration_initial_pages = ram_get_total_transferred_pages();
3536 }
3537
3538 static void migration_update_counters(MigrationState *s,
3539                                       int64_t current_time)
3540 {
3541     uint64_t transferred, transferred_pages, time_spent;
3542     uint64_t current_bytes; /* bytes transferred since the beginning */
3543     double bandwidth;
3544
3545     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3546         return;
3547     }
3548
3549     current_bytes = migration_total_bytes(s);
3550     transferred = current_bytes - s->iteration_initial_bytes;
3551     time_spent = current_time - s->iteration_start_time;
3552     bandwidth = (double)transferred / time_spent;
3553     s->threshold_size = bandwidth * s->parameters.downtime_limit;
3554
3555     s->mbps = (((double) transferred * 8.0) /
3556                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3557
3558     transferred_pages = ram_get_total_transferred_pages() -
3559                             s->iteration_initial_pages;
3560     s->pages_per_second = (double) transferred_pages /
3561                              (((double) time_spent / 1000.0));
3562
3563     /*
3564      * if we haven't sent anything, we don't want to
3565      * recalculate. 10000 is a small enough number for our purposes
3566      */
3567     if (ram_counters.dirty_pages_rate && transferred > 10000) {
3568         s->expected_downtime = ram_counters.remaining / bandwidth;
3569     }
3570
3571     qemu_file_reset_rate_limit(s->to_dst_file);
3572
3573     update_iteration_initial_status(s);
3574
3575     trace_migrate_transferred(transferred, time_spent,
3576                               bandwidth, s->threshold_size);
3577 }
3578
3579 /* Migration thread iteration status */
3580 typedef enum {
3581     MIG_ITERATE_RESUME,         /* Resume current iteration */
3582     MIG_ITERATE_SKIP,           /* Skip current iteration */
3583     MIG_ITERATE_BREAK,          /* Break the loop */
3584 } MigIterateState;
3585
3586 /*
3587  * Return true if continue to the next iteration directly, false
3588  * otherwise.
3589  */
3590 static MigIterateState migration_iteration_run(MigrationState *s)
3591 {
3592     uint64_t pending_size, pend_pre, pend_compat, pend_post;
3593     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3594
3595     qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
3596                               &pend_compat, &pend_post);
3597     pending_size = pend_pre + pend_compat + pend_post;
3598
3599     trace_migrate_pending(pending_size, s->threshold_size,
3600                           pend_pre, pend_compat, pend_post);
3601
3602     if (pending_size && pending_size >= s->threshold_size) {
3603         /* Still a significant amount to transfer */
3604         if (!in_postcopy && pend_pre <= s->threshold_size &&
3605             qatomic_read(&s->start_postcopy)) {
3606             if (postcopy_start(s)) {
3607                 error_report("%s: postcopy failed to start", __func__);
3608             }
3609             return MIG_ITERATE_SKIP;
3610         }
3611         /* Just another iteration step */
3612         qemu_savevm_state_iterate(s->to_dst_file, in_postcopy);
3613     } else {
3614         trace_migration_thread_low_pending(pending_size);
3615         migration_completion(s);
3616         return MIG_ITERATE_BREAK;
3617     }
3618
3619     return MIG_ITERATE_RESUME;
3620 }
3621
3622 static void migration_iteration_finish(MigrationState *s)
3623 {
3624     /* If we enabled cpu throttling for auto-converge, turn it off. */
3625     cpu_throttle_stop();
3626
3627     qemu_mutex_lock_iothread();
3628     switch (s->state) {
3629     case MIGRATION_STATUS_COMPLETED:
3630         migration_calculate_complete(s);
3631         runstate_set(RUN_STATE_POSTMIGRATE);
3632         break;
3633     case MIGRATION_STATUS_COLO:
3634         if (!migrate_colo_enabled()) {
3635             error_report("%s: critical error: calling COLO code without "
3636                          "COLO enabled", __func__);
3637         }
3638         migrate_start_colo_process(s);
3639         s->vm_was_running = true;
3640         /* Fallthrough */
3641     case MIGRATION_STATUS_FAILED:
3642     case MIGRATION_STATUS_CANCELLED:
3643     case MIGRATION_STATUS_CANCELLING:
3644         if (s->vm_was_running) {
3645             if (!runstate_check(RUN_STATE_SHUTDOWN)) {
3646                 vm_start();
3647             }
3648         } else {
3649             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3650                 runstate_set(RUN_STATE_POSTMIGRATE);
3651             }
3652         }
3653         break;
3654
3655     default:
3656         /* Should not reach here, but if so, forgive the VM. */
3657         error_report("%s: Unknown ending state %d", __func__, s->state);
3658         break;
3659     }
3660     migrate_fd_cleanup_schedule(s);
3661     qemu_mutex_unlock_iothread();
3662 }
3663
3664 static void bg_migration_iteration_finish(MigrationState *s)
3665 {
3666     qemu_mutex_lock_iothread();
3667     switch (s->state) {
3668     case MIGRATION_STATUS_COMPLETED:
3669         migration_calculate_complete(s);
3670         break;
3671
3672     case MIGRATION_STATUS_ACTIVE:
3673     case MIGRATION_STATUS_FAILED:
3674     case MIGRATION_STATUS_CANCELLED:
3675     case MIGRATION_STATUS_CANCELLING:
3676         break;
3677
3678     default:
3679         /* Should not reach here, but if so, forgive the VM. */
3680         error_report("%s: Unknown ending state %d", __func__, s->state);
3681         break;
3682     }
3683
3684     migrate_fd_cleanup_schedule(s);
3685     qemu_mutex_unlock_iothread();
3686 }
3687
3688 /*
3689  * Return true if continue to the next iteration directly, false
3690  * otherwise.
3691  */
3692 static MigIterateState bg_migration_iteration_run(MigrationState *s)
3693 {
3694     int res;
3695
3696     res = qemu_savevm_state_iterate(s->to_dst_file, false);
3697     if (res > 0) {
3698         bg_migration_completion(s);
3699         return MIG_ITERATE_BREAK;
3700     }
3701
3702     return MIG_ITERATE_RESUME;
3703 }
3704
3705 void migration_make_urgent_request(void)
3706 {
3707     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3708 }
3709
3710 void migration_consume_urgent_request(void)
3711 {
3712     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3713 }
3714
3715 /* Returns true if the rate limiting was broken by an urgent request */
3716 bool migration_rate_limit(void)
3717 {
3718     int64_t now = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3719     MigrationState *s = migrate_get_current();
3720
3721     bool urgent = false;
3722     migration_update_counters(s, now);
3723     if (qemu_file_rate_limit(s->to_dst_file)) {
3724
3725         if (qemu_file_get_error(s->to_dst_file)) {
3726             return false;
3727         }
3728         /*
3729          * Wait for a delay to do rate limiting OR
3730          * something urgent to post the semaphore.
3731          */
3732         int ms = s->iteration_start_time + BUFFER_DELAY - now;
3733         trace_migration_rate_limit_pre(ms);
3734         if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3735             /*
3736              * We were woken by one or more urgent things but
3737              * the timedwait will have consumed one of them.
3738              * The service routine for the urgent wake will dec
3739              * the semaphore itself for each item it consumes,
3740              * so add this one we just eat back.
3741              */
3742             qemu_sem_post(&s->rate_limit_sem);
3743             urgent = true;
3744         }
3745         trace_migration_rate_limit_post(urgent);
3746     }
3747     return urgent;
3748 }
3749
3750 /*
3751  * if failover devices are present, wait they are completely
3752  * unplugged
3753  */
3754
3755 static void qemu_savevm_wait_unplug(MigrationState *s, int old_state,
3756                                     int new_state)
3757 {
3758     if (qemu_savevm_state_guest_unplug_pending()) {
3759         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_WAIT_UNPLUG);
3760
3761         while (s->state == MIGRATION_STATUS_WAIT_UNPLUG &&
3762                qemu_savevm_state_guest_unplug_pending()) {
3763             qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3764         }
3765         if (s->state != MIGRATION_STATUS_WAIT_UNPLUG) {
3766             int timeout = 120; /* 30 seconds */
3767             /*
3768              * migration has been canceled
3769              * but as we have started an unplug we must wait the end
3770              * to be able to plug back the card
3771              */
3772             while (timeout-- && qemu_savevm_state_guest_unplug_pending()) {
3773                 qemu_sem_timedwait(&s->wait_unplug_sem, 250);
3774             }
3775             if (qemu_savevm_state_guest_unplug_pending() &&
3776                 !qtest_enabled()) {
3777                 warn_report("migration: partially unplugged device on "
3778                             "failure");
3779             }
3780         }
3781
3782         migrate_set_state(&s->state, MIGRATION_STATUS_WAIT_UNPLUG, new_state);
3783     } else {
3784         migrate_set_state(&s->state, old_state, new_state);
3785     }
3786 }
3787
3788 /*
3789  * Master migration thread on the source VM.
3790  * It drives the migration and pumps the data down the outgoing channel.
3791  */
3792 static void *migration_thread(void *opaque)
3793 {
3794     MigrationState *s = opaque;
3795     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3796     MigThrError thr_error;
3797     bool urgent = false;
3798
3799     rcu_register_thread();
3800
3801     object_ref(OBJECT(s));
3802     update_iteration_initial_status(s);
3803
3804     qemu_savevm_state_header(s->to_dst_file);
3805
3806     /*
3807      * If we opened the return path, we need to make sure dst has it
3808      * opened as well.
3809      */
3810     if (s->rp_state.rp_thread_created) {
3811         /* Now tell the dest that it should open its end so it can reply */
3812         qemu_savevm_send_open_return_path(s->to_dst_file);
3813
3814         /* And do a ping that will make stuff easier to debug */
3815         qemu_savevm_send_ping(s->to_dst_file, 1);
3816     }
3817
3818     if (migrate_postcopy()) {
3819         /*
3820          * Tell the destination that we *might* want to do postcopy later;
3821          * if the other end can't do postcopy it should fail now, nice and
3822          * early.
3823          */
3824         qemu_savevm_send_postcopy_advise(s->to_dst_file);
3825     }
3826
3827     if (migrate_colo_enabled()) {
3828         /* Notify migration destination that we enable COLO */
3829         qemu_savevm_send_colo_enable(s->to_dst_file);
3830     }
3831
3832     qemu_savevm_state_setup(s->to_dst_file);
3833
3834     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3835                                MIGRATION_STATUS_ACTIVE);
3836
3837     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3838
3839     trace_migration_thread_setup_complete();
3840
3841     while (migration_is_active(s)) {
3842         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
3843             MigIterateState iter_state = migration_iteration_run(s);
3844             if (iter_state == MIG_ITERATE_SKIP) {
3845                 continue;
3846             } else if (iter_state == MIG_ITERATE_BREAK) {
3847                 break;
3848             }
3849         }
3850
3851         /*
3852          * Try to detect any kind of failures, and see whether we
3853          * should stop the migration now.
3854          */
3855         thr_error = migration_detect_error(s);
3856         if (thr_error == MIG_THR_ERR_FATAL) {
3857             /* Stop migration */
3858             break;
3859         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
3860             /*
3861              * Just recovered from a e.g. network failure, reset all
3862              * the local variables. This is important to avoid
3863              * breaking transferred_bytes and bandwidth calculation
3864              */
3865             update_iteration_initial_status(s);
3866         }
3867
3868         urgent = migration_rate_limit();
3869     }
3870
3871     trace_migration_thread_after_loop();
3872     migration_iteration_finish(s);
3873     object_unref(OBJECT(s));
3874     rcu_unregister_thread();
3875     return NULL;
3876 }
3877
3878 static void bg_migration_vm_start_bh(void *opaque)
3879 {
3880     MigrationState *s = opaque;
3881
3882     qemu_bh_delete(s->vm_start_bh);
3883     s->vm_start_bh = NULL;
3884
3885     vm_start();
3886     s->downtime = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - s->downtime_start;
3887 }
3888
3889 /**
3890  * Background snapshot thread, based on live migration code.
3891  * This is an alternative implementation of live migration mechanism
3892  * introduced specifically to support background snapshots.
3893  *
3894  * It takes advantage of userfault_fd write protection mechanism introduced
3895  * in v5.7 kernel. Compared to existing dirty page logging migration much
3896  * lesser stream traffic is produced resulting in smaller snapshot images,
3897  * simply cause of no page duplicates can get into the stream.
3898  *
3899  * Another key point is that generated vmstate stream reflects machine state
3900  * 'frozen' at the beginning of snapshot creation compared to dirty page logging
3901  * mechanism, which effectively results in that saved snapshot is the state of VM
3902  * at the end of the process.
3903  */
3904 static void *bg_migration_thread(void *opaque)
3905 {
3906     MigrationState *s = opaque;
3907     int64_t setup_start;
3908     MigThrError thr_error;
3909     QEMUFile *fb;
3910     bool early_fail = true;
3911
3912     rcu_register_thread();
3913     object_ref(OBJECT(s));
3914
3915     qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
3916
3917     setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3918     /*
3919      * We want to save vmstate for the moment when migration has been
3920      * initiated but also we want to save RAM content while VM is running.
3921      * The RAM content should appear first in the vmstate. So, we first
3922      * stash the non-RAM part of the vmstate to the temporary buffer,
3923      * then write RAM part of the vmstate to the migration stream
3924      * with vCPUs running and, finally, write stashed non-RAM part of
3925      * the vmstate from the buffer to the migration stream.
3926      */
3927     s->bioc = qio_channel_buffer_new(512 * 1024);
3928     qio_channel_set_name(QIO_CHANNEL(s->bioc), "vmstate-buffer");
3929     fb = qemu_fopen_channel_output(QIO_CHANNEL(s->bioc));
3930     object_unref(OBJECT(s->bioc));
3931
3932     update_iteration_initial_status(s);
3933
3934     /*
3935      * Prepare for tracking memory writes with UFFD-WP - populate
3936      * RAM pages before protecting.
3937      */
3938 #ifdef __linux__
3939     ram_write_tracking_prepare();
3940 #endif
3941
3942     qemu_savevm_state_header(s->to_dst_file);
3943     qemu_savevm_state_setup(s->to_dst_file);
3944
3945     qemu_savevm_wait_unplug(s, MIGRATION_STATUS_SETUP,
3946                                MIGRATION_STATUS_ACTIVE);
3947
3948     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3949
3950     trace_migration_thread_setup_complete();
3951     s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3952
3953     qemu_mutex_lock_iothread();
3954
3955     /*
3956      * If VM is currently in suspended state, then, to make a valid runstate
3957      * transition in vm_stop_force_state() we need to wakeup it up.
3958      */
3959     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
3960     s->vm_was_running = runstate_is_running();
3961
3962     if (global_state_store()) {
3963         goto fail;
3964     }
3965     /* Forcibly stop VM before saving state of vCPUs and devices */
3966     if (vm_stop_force_state(RUN_STATE_PAUSED)) {
3967         goto fail;
3968     }
3969     /*
3970      * Put vCPUs in sync with shadow context structures, then
3971      * save their state to channel-buffer along with devices.
3972      */
3973     cpu_synchronize_all_states();
3974     if (qemu_savevm_state_complete_precopy_non_iterable(fb, false, false)) {
3975         goto fail;
3976     }
3977     /*
3978      * Since we are going to get non-iterable state data directly
3979      * from s->bioc->data, explicit flush is needed here.
3980      */
3981     qemu_fflush(fb);
3982
3983     /* Now initialize UFFD context and start tracking RAM writes */
3984     if (ram_write_tracking_start()) {
3985         goto fail;
3986     }
3987     early_fail = false;
3988
3989     /*
3990      * Start VM from BH handler to avoid write-fault lock here.
3991      * UFFD-WP protection for the whole RAM is already enabled so
3992      * calling VM state change notifiers from vm_start() would initiate
3993      * writes to virtio VQs memory which is in write-protected region.
3994      */
3995     s->vm_start_bh = qemu_bh_new(bg_migration_vm_start_bh, s);
3996     qemu_bh_schedule(s->vm_start_bh);
3997
3998     qemu_mutex_unlock_iothread();
3999
4000     while (migration_is_active(s)) {
4001         MigIterateState iter_state = bg_migration_iteration_run(s);
4002         if (iter_state == MIG_ITERATE_SKIP) {
4003             continue;
4004         } else if (iter_state == MIG_ITERATE_BREAK) {
4005             break;
4006         }
4007
4008         /*
4009          * Try to detect any kind of failures, and see whether we
4010          * should stop the migration now.
4011          */
4012         thr_error = migration_detect_error(s);
4013         if (thr_error == MIG_THR_ERR_FATAL) {
4014             /* Stop migration */
4015             break;
4016         }
4017
4018         migration_update_counters(s, qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
4019     }
4020
4021     trace_migration_thread_after_loop();
4022
4023 fail:
4024     if (early_fail) {
4025         migrate_set_state(&s->state, MIGRATION_STATUS_ACTIVE,
4026                 MIGRATION_STATUS_FAILED);
4027         qemu_mutex_unlock_iothread();
4028     }
4029
4030     bg_migration_iteration_finish(s);
4031
4032     qemu_fclose(fb);
4033     object_unref(OBJECT(s));
4034     rcu_unregister_thread();
4035
4036     return NULL;
4037 }
4038
4039 void migrate_fd_connect(MigrationState *s, Error *error_in)
4040 {
4041     Error *local_err = NULL;
4042     int64_t rate_limit;
4043     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
4044
4045     /*
4046      * If there's a previous error, free it and prepare for another one.
4047      * Meanwhile if migration completes successfully, there won't have an error
4048      * dumped when calling migrate_fd_cleanup().
4049      */
4050     migrate_error_free(s);
4051
4052     s->expected_downtime = s->parameters.downtime_limit;
4053     if (resume) {
4054         assert(s->cleanup_bh);
4055     } else {
4056         assert(!s->cleanup_bh);
4057         s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
4058     }
4059     if (error_in) {
4060         migrate_fd_error(s, error_in);
4061         if (resume) {
4062             /*
4063              * Don't do cleanup for resume if channel is invalid, but only dump
4064              * the error.  We wait for another channel connect from the user.
4065              * The error_report still gives HMP user a hint on what failed.
4066              * It's normally done in migrate_fd_cleanup(), but call it here
4067              * explicitly.
4068              */
4069             error_report_err(error_copy(s->error));
4070         } else {
4071             migrate_fd_cleanup(s);
4072         }
4073         return;
4074     }
4075
4076     if (resume) {
4077         /* This is a resumed migration */
4078         rate_limit = s->parameters.max_postcopy_bandwidth /
4079             XFER_LIMIT_RATIO;
4080     } else {
4081         /* This is a fresh new migration */
4082         rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
4083
4084         /* Notify before starting migration thread */
4085         notifier_list_notify(&migration_state_notifiers, s);
4086     }
4087
4088     qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
4089     qemu_file_set_blocking(s->to_dst_file, true);
4090
4091     /*
4092      * Open the return path. For postcopy, it is used exclusively. For
4093      * precopy, only if user specified "return-path" capability would
4094      * QEMU uses the return path.
4095      */
4096     if (migrate_postcopy_ram() || migrate_use_return_path()) {
4097         if (open_return_path_on_source(s, !resume)) {
4098             error_report("Unable to open return-path for postcopy");
4099             migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
4100             migrate_fd_cleanup(s);
4101             return;
4102         }
4103     }
4104
4105     if (resume) {
4106         /* Wakeup the main migration thread to do the recovery */
4107         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
4108                           MIGRATION_STATUS_POSTCOPY_RECOVER);
4109         qemu_sem_post(&s->postcopy_pause_sem);
4110         return;
4111     }
4112
4113     if (multifd_save_setup(&local_err) != 0) {
4114         error_report_err(local_err);
4115         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
4116                           MIGRATION_STATUS_FAILED);
4117         migrate_fd_cleanup(s);
4118         return;
4119     }
4120
4121     if (migrate_background_snapshot()) {
4122         qemu_thread_create(&s->thread, "bg_snapshot",
4123                 bg_migration_thread, s, QEMU_THREAD_JOINABLE);
4124     } else {
4125         qemu_thread_create(&s->thread, "live_migration",
4126                 migration_thread, s, QEMU_THREAD_JOINABLE);
4127     }
4128     s->migration_thread_running = true;
4129 }
4130
4131 void migration_global_dump(Monitor *mon)
4132 {
4133     MigrationState *ms = migrate_get_current();
4134
4135     monitor_printf(mon, "globals:\n");
4136     monitor_printf(mon, "store-global-state: %s\n",
4137                    ms->store_global_state ? "on" : "off");
4138     monitor_printf(mon, "only-migratable: %s\n",
4139                    only_migratable ? "on" : "off");
4140     monitor_printf(mon, "send-configuration: %s\n",
4141                    ms->send_configuration ? "on" : "off");
4142     monitor_printf(mon, "send-section-footer: %s\n",
4143                    ms->send_section_footer ? "on" : "off");
4144     monitor_printf(mon, "decompress-error-check: %s\n",
4145                    ms->decompress_error_check ? "on" : "off");
4146     monitor_printf(mon, "clear-bitmap-shift: %u\n",
4147                    ms->clear_bitmap_shift);
4148 }
4149
4150 #define DEFINE_PROP_MIG_CAP(name, x)             \
4151     DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
4152
4153 static Property migration_properties[] = {
4154     DEFINE_PROP_BOOL("store-global-state", MigrationState,
4155                      store_global_state, true),
4156     DEFINE_PROP_BOOL("send-configuration", MigrationState,
4157                      send_configuration, true),
4158     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
4159                      send_section_footer, true),
4160     DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
4161                       decompress_error_check, true),
4162     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
4163                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
4164
4165     /* Migration parameters */
4166     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
4167                       parameters.compress_level,
4168                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
4169     DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
4170                       parameters.compress_threads,
4171                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
4172     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
4173                       parameters.compress_wait_thread, true),
4174     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
4175                       parameters.decompress_threads,
4176                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
4177     DEFINE_PROP_UINT8("x-throttle-trigger-threshold", MigrationState,
4178                       parameters.throttle_trigger_threshold,
4179                       DEFAULT_MIGRATE_THROTTLE_TRIGGER_THRESHOLD),
4180     DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
4181                       parameters.cpu_throttle_initial,
4182                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
4183     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
4184                       parameters.cpu_throttle_increment,
4185                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
4186     DEFINE_PROP_BOOL("x-cpu-throttle-tailslow", MigrationState,
4187                       parameters.cpu_throttle_tailslow, false),
4188     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
4189                       parameters.max_bandwidth, MAX_THROTTLE),
4190     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
4191                       parameters.downtime_limit,
4192                       DEFAULT_MIGRATE_SET_DOWNTIME),
4193     DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
4194                       parameters.x_checkpoint_delay,
4195                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
4196     DEFINE_PROP_UINT8("multifd-channels", MigrationState,
4197                       parameters.multifd_channels,
4198                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
4199     DEFINE_PROP_MULTIFD_COMPRESSION("multifd-compression", MigrationState,
4200                       parameters.multifd_compression,
4201                       DEFAULT_MIGRATE_MULTIFD_COMPRESSION),
4202     DEFINE_PROP_UINT8("multifd-zlib-level", MigrationState,
4203                       parameters.multifd_zlib_level,
4204                       DEFAULT_MIGRATE_MULTIFD_ZLIB_LEVEL),
4205     DEFINE_PROP_UINT8("multifd-zstd-level", MigrationState,
4206                       parameters.multifd_zstd_level,
4207                       DEFAULT_MIGRATE_MULTIFD_ZSTD_LEVEL),
4208     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
4209                       parameters.xbzrle_cache_size,
4210                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
4211     DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
4212                       parameters.max_postcopy_bandwidth,
4213                       DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
4214     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
4215                       parameters.max_cpu_throttle,
4216                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
4217     DEFINE_PROP_SIZE("announce-initial", MigrationState,
4218                       parameters.announce_initial,
4219                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
4220     DEFINE_PROP_SIZE("announce-max", MigrationState,
4221                       parameters.announce_max,
4222                       DEFAULT_MIGRATE_ANNOUNCE_MAX),
4223     DEFINE_PROP_SIZE("announce-rounds", MigrationState,
4224                       parameters.announce_rounds,
4225                       DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
4226     DEFINE_PROP_SIZE("announce-step", MigrationState,
4227                       parameters.announce_step,
4228                       DEFAULT_MIGRATE_ANNOUNCE_STEP),
4229
4230     /* Migration capabilities */
4231     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
4232     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
4233     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
4234     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
4235     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
4236     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
4237     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
4238     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
4239     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
4240     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
4241     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
4242     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
4243     DEFINE_PROP_MIG_CAP("x-background-snapshot",
4244             MIGRATION_CAPABILITY_BACKGROUND_SNAPSHOT),
4245
4246     DEFINE_PROP_END_OF_LIST(),
4247 };
4248
4249 static void migration_class_init(ObjectClass *klass, void *data)
4250 {
4251     DeviceClass *dc = DEVICE_CLASS(klass);
4252
4253     dc->user_creatable = false;
4254     device_class_set_props(dc, migration_properties);
4255 }
4256
4257 static void migration_instance_finalize(Object *obj)
4258 {
4259     MigrationState *ms = MIGRATION_OBJ(obj);
4260     MigrationParameters *params = &ms->parameters;
4261
4262     qemu_mutex_destroy(&ms->error_mutex);
4263     qemu_mutex_destroy(&ms->qemu_file_lock);
4264     g_free(params->tls_hostname);
4265     g_free(params->tls_creds);
4266     qemu_sem_destroy(&ms->wait_unplug_sem);
4267     qemu_sem_destroy(&ms->rate_limit_sem);
4268     qemu_sem_destroy(&ms->pause_sem);
4269     qemu_sem_destroy(&ms->postcopy_pause_sem);
4270     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
4271     qemu_sem_destroy(&ms->rp_state.rp_sem);
4272     error_free(ms->error);
4273 }
4274
4275 static void migration_instance_init(Object *obj)
4276 {
4277     MigrationState *ms = MIGRATION_OBJ(obj);
4278     MigrationParameters *params = &ms->parameters;
4279
4280     ms->state = MIGRATION_STATUS_NONE;
4281     ms->mbps = -1;
4282     ms->pages_per_second = -1;
4283     qemu_sem_init(&ms->pause_sem, 0);
4284     qemu_mutex_init(&ms->error_mutex);
4285
4286     params->tls_hostname = g_strdup("");
4287     params->tls_creds = g_strdup("");
4288
4289     /* Set has_* up only for parameter checks */
4290     params->has_compress_level = true;
4291     params->has_compress_threads = true;
4292     params->has_decompress_threads = true;
4293     params->has_throttle_trigger_threshold = true;
4294     params->has_cpu_throttle_initial = true;
4295     params->has_cpu_throttle_increment = true;
4296     params->has_cpu_throttle_tailslow = true;
4297     params->has_max_bandwidth = true;
4298     params->has_downtime_limit = true;
4299     params->has_x_checkpoint_delay = true;
4300     params->has_block_incremental = true;
4301     params->has_multifd_channels = true;
4302     params->has_multifd_compression = true;
4303     params->has_multifd_zlib_level = true;
4304     params->has_multifd_zstd_level = true;
4305     params->has_xbzrle_cache_size = true;
4306     params->has_max_postcopy_bandwidth = true;
4307     params->has_max_cpu_throttle = true;
4308     params->has_announce_initial = true;
4309     params->has_announce_max = true;
4310     params->has_announce_rounds = true;
4311     params->has_announce_step = true;
4312
4313     qemu_sem_init(&ms->postcopy_pause_sem, 0);
4314     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
4315     qemu_sem_init(&ms->rp_state.rp_sem, 0);
4316     qemu_sem_init(&ms->rate_limit_sem, 0);
4317     qemu_sem_init(&ms->wait_unplug_sem, 0);
4318     qemu_mutex_init(&ms->qemu_file_lock);
4319 }
4320
4321 /*
4322  * Return true if check pass, false otherwise. Error will be put
4323  * inside errp if provided.
4324  */
4325 static bool migration_object_check(MigrationState *ms, Error **errp)
4326 {
4327     MigrationCapabilityStatusList *head = NULL;
4328     /* Assuming all off */
4329     bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
4330     int i;
4331
4332     if (!migrate_params_check(&ms->parameters, errp)) {
4333         return false;
4334     }
4335
4336     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
4337         if (ms->enabled_capabilities[i]) {
4338             QAPI_LIST_PREPEND(head, migrate_cap_add(i, true));
4339         }
4340     }
4341
4342     ret = migrate_caps_check(cap_list, head, errp);
4343
4344     /* It works with head == NULL */
4345     qapi_free_MigrationCapabilityStatusList(head);
4346
4347     return ret;
4348 }
4349
4350 static const TypeInfo migration_type = {
4351     .name = TYPE_MIGRATION,
4352     /*
4353      * NOTE: TYPE_MIGRATION is not really a device, as the object is
4354      * not created using qdev_new(), it is not attached to the qdev
4355      * device tree, and it is never realized.
4356      *
4357      * TODO: Make this TYPE_OBJECT once QOM provides something like
4358      * TYPE_DEVICE's "-global" properties.
4359      */
4360     .parent = TYPE_DEVICE,
4361     .class_init = migration_class_init,
4362     .class_size = sizeof(MigrationClass),
4363     .instance_size = sizeof(MigrationState),
4364     .instance_init = migration_instance_init,
4365     .instance_finalize = migration_instance_finalize,
4366 };
4367
4368 static void register_migration_types(void)
4369 {
4370     type_register_static(&migration_type);
4371 }
4372
4373 type_init(register_migration_types);
This page took 0.275863 seconds and 4 git commands to generate.