]> Git Repo - qemu.git/blob - migration/migration.c
Include qemu/main-loop.h less
[qemu.git] / migration / migration.c
1 /*
2  * QEMU live migration
3  *
4  * Copyright IBM, Corp. 2008
5  *
6  * Authors:
7  *  Anthony Liguori   <[email protected]>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2.  See
10  * the COPYING file in the top-level directory.
11  *
12  * Contributions after 2012-01-13 are licensed under the terms of the
13  * GNU GPL, version 2 or (at your option) any later version.
14  */
15
16 #include "qemu/osdep.h"
17 #include "qemu/cutils.h"
18 #include "qemu/error-report.h"
19 #include "qemu/main-loop.h"
20 #include "migration/blocker.h"
21 #include "exec.h"
22 #include "fd.h"
23 #include "socket.h"
24 #include "rdma.h"
25 #include "ram.h"
26 #include "migration/global_state.h"
27 #include "migration/misc.h"
28 #include "migration.h"
29 #include "savevm.h"
30 #include "qemu-file-channel.h"
31 #include "qemu-file.h"
32 #include "migration/vmstate.h"
33 #include "block/block.h"
34 #include "qapi/error.h"
35 #include "qapi/clone-visitor.h"
36 #include "qapi/qapi-visit-sockets.h"
37 #include "qapi/qapi-commands-migration.h"
38 #include "qapi/qapi-events-migration.h"
39 #include "qapi/qmp/qerror.h"
40 #include "qapi/qmp/qnull.h"
41 #include "qemu/rcu.h"
42 #include "block.h"
43 #include "postcopy-ram.h"
44 #include "qemu/thread.h"
45 #include "trace.h"
46 #include "exec/target_page.h"
47 #include "io/channel-buffer.h"
48 #include "migration/colo.h"
49 #include "hw/boards.h"
50 #include "monitor/monitor.h"
51 #include "net/announce.h"
52
53 #define MAX_THROTTLE  (32 << 20)      /* Migration transfer speed throttling */
54
55 /* Amount of time to allocate to each "chunk" of bandwidth-throttled
56  * data. */
57 #define BUFFER_DELAY     100
58 #define XFER_LIMIT_RATIO (1000 / BUFFER_DELAY)
59
60 /* Time in milliseconds we are allowed to stop the source,
61  * for sending the last part */
62 #define DEFAULT_MIGRATE_SET_DOWNTIME 300
63
64 /* Maximum migrate downtime set to 2000 seconds */
65 #define MAX_MIGRATE_DOWNTIME_SECONDS 2000
66 #define MAX_MIGRATE_DOWNTIME (MAX_MIGRATE_DOWNTIME_SECONDS * 1000)
67
68 /* Default compression thread count */
69 #define DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT 8
70 /* Default decompression thread count, usually decompression is at
71  * least 4 times as fast as compression.*/
72 #define DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT 2
73 /*0: means nocompress, 1: best speed, ... 9: best compress ratio */
74 #define DEFAULT_MIGRATE_COMPRESS_LEVEL 1
75 /* Define default autoconverge cpu throttle migration parameters */
76 #define DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL 20
77 #define DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT 10
78 #define DEFAULT_MIGRATE_MAX_CPU_THROTTLE 99
79
80 /* Migration XBZRLE default cache size */
81 #define DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE (64 * 1024 * 1024)
82
83 /* The delay time (in ms) between two COLO checkpoints */
84 #define DEFAULT_MIGRATE_X_CHECKPOINT_DELAY (200 * 100)
85 #define DEFAULT_MIGRATE_MULTIFD_CHANNELS 2
86
87 /* Background transfer rate for postcopy, 0 means unlimited, note
88  * that page requests can still exceed this limit.
89  */
90 #define DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH 0
91
92 /*
93  * Parameters for self_announce_delay giving a stream of RARP/ARP
94  * packets after migration.
95  */
96 #define DEFAULT_MIGRATE_ANNOUNCE_INITIAL  50
97 #define DEFAULT_MIGRATE_ANNOUNCE_MAX     550
98 #define DEFAULT_MIGRATE_ANNOUNCE_ROUNDS    5
99 #define DEFAULT_MIGRATE_ANNOUNCE_STEP    100
100
101 static NotifierList migration_state_notifiers =
102     NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
103
104 static bool deferred_incoming;
105
106 /* Messages sent on the return path from destination to source */
107 enum mig_rp_message_type {
108     MIG_RP_MSG_INVALID = 0,  /* Must be 0 */
109     MIG_RP_MSG_SHUT,         /* sibling will not send any more RP messages */
110     MIG_RP_MSG_PONG,         /* Response to a PING; data (seq: be32 ) */
111
112     MIG_RP_MSG_REQ_PAGES_ID, /* data (start: be64, len: be32, id: string) */
113     MIG_RP_MSG_REQ_PAGES,    /* data (start: be64, len: be32) */
114     MIG_RP_MSG_RECV_BITMAP,  /* send recved_bitmap back to source */
115     MIG_RP_MSG_RESUME_ACK,   /* tell source that we are ready to resume */
116
117     MIG_RP_MSG_MAX
118 };
119
120 /* When we add fault tolerance, we could have several
121    migrations at once.  For now we don't need to add
122    dynamic creation of migration */
123
124 static MigrationState *current_migration;
125 static MigrationIncomingState *current_incoming;
126
127 static bool migration_object_check(MigrationState *ms, Error **errp);
128 static int migration_maybe_pause(MigrationState *s,
129                                  int *current_active_state,
130                                  int new_state);
131 static void migrate_fd_cancel(MigrationState *s);
132
133 void migration_object_init(void)
134 {
135     MachineState *ms = MACHINE(qdev_get_machine());
136     Error *err = NULL;
137
138     /* This can only be called once. */
139     assert(!current_migration);
140     current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
141
142     /*
143      * Init the migrate incoming object as well no matter whether
144      * we'll use it or not.
145      */
146     assert(!current_incoming);
147     current_incoming = g_new0(MigrationIncomingState, 1);
148     current_incoming->state = MIGRATION_STATUS_NONE;
149     current_incoming->postcopy_remote_fds =
150         g_array_new(FALSE, TRUE, sizeof(struct PostCopyFD));
151     qemu_mutex_init(&current_incoming->rp_mutex);
152     qemu_event_init(&current_incoming->main_thread_load_event, false);
153     qemu_sem_init(&current_incoming->postcopy_pause_sem_dst, 0);
154     qemu_sem_init(&current_incoming->postcopy_pause_sem_fault, 0);
155
156     init_dirty_bitmap_incoming_migration();
157
158     if (!migration_object_check(current_migration, &err)) {
159         error_report_err(err);
160         exit(1);
161     }
162
163     /*
164      * We cannot really do this in migration_instance_init() since at
165      * that time global properties are not yet applied, then this
166      * value will be definitely replaced by something else.
167      */
168     if (ms->enforce_config_section) {
169         current_migration->send_configuration = true;
170     }
171 }
172
173 void migration_shutdown(void)
174 {
175     /*
176      * Cancel the current migration - that will (eventually)
177      * stop the migration using this structure
178      */
179     migrate_fd_cancel(current_migration);
180     object_unref(OBJECT(current_migration));
181 }
182
183 /* For outgoing */
184 MigrationState *migrate_get_current(void)
185 {
186     /* This can only be called after the object created. */
187     assert(current_migration);
188     return current_migration;
189 }
190
191 MigrationIncomingState *migration_incoming_get_current(void)
192 {
193     assert(current_incoming);
194     return current_incoming;
195 }
196
197 void migration_incoming_state_destroy(void)
198 {
199     struct MigrationIncomingState *mis = migration_incoming_get_current();
200
201     if (mis->to_src_file) {
202         /* Tell source that we are done */
203         migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
204         qemu_fclose(mis->to_src_file);
205         mis->to_src_file = NULL;
206     }
207
208     if (mis->from_src_file) {
209         qemu_fclose(mis->from_src_file);
210         mis->from_src_file = NULL;
211     }
212     if (mis->postcopy_remote_fds) {
213         g_array_free(mis->postcopy_remote_fds, TRUE);
214         mis->postcopy_remote_fds = NULL;
215     }
216
217     qemu_event_reset(&mis->main_thread_load_event);
218
219     if (mis->socket_address_list) {
220         qapi_free_SocketAddressList(mis->socket_address_list);
221         mis->socket_address_list = NULL;
222     }
223 }
224
225 static void migrate_generate_event(int new_state)
226 {
227     if (migrate_use_events()) {
228         qapi_event_send_migration(new_state);
229     }
230 }
231
232 static bool migrate_late_block_activate(void)
233 {
234     MigrationState *s;
235
236     s = migrate_get_current();
237
238     return s->enabled_capabilities[
239         MIGRATION_CAPABILITY_LATE_BLOCK_ACTIVATE];
240 }
241
242 /*
243  * Called on -incoming with a defer: uri.
244  * The migration can be started later after any parameters have been
245  * changed.
246  */
247 static void deferred_incoming_migration(Error **errp)
248 {
249     if (deferred_incoming) {
250         error_setg(errp, "Incoming migration already deferred");
251     }
252     deferred_incoming = true;
253 }
254
255 /*
256  * Send a message on the return channel back to the source
257  * of the migration.
258  */
259 static int migrate_send_rp_message(MigrationIncomingState *mis,
260                                    enum mig_rp_message_type message_type,
261                                    uint16_t len, void *data)
262 {
263     int ret = 0;
264
265     trace_migrate_send_rp_message((int)message_type, len);
266     qemu_mutex_lock(&mis->rp_mutex);
267
268     /*
269      * It's possible that the file handle got lost due to network
270      * failures.
271      */
272     if (!mis->to_src_file) {
273         ret = -EIO;
274         goto error;
275     }
276
277     qemu_put_be16(mis->to_src_file, (unsigned int)message_type);
278     qemu_put_be16(mis->to_src_file, len);
279     qemu_put_buffer(mis->to_src_file, data, len);
280     qemu_fflush(mis->to_src_file);
281
282     /* It's possible that qemu file got error during sending */
283     ret = qemu_file_get_error(mis->to_src_file);
284
285 error:
286     qemu_mutex_unlock(&mis->rp_mutex);
287     return ret;
288 }
289
290 /* Request a range of pages from the source VM at the given
291  * start address.
292  *   rbname: Name of the RAMBlock to request the page in, if NULL it's the same
293  *           as the last request (a name must have been given previously)
294  *   Start: Address offset within the RB
295  *   Len: Length in bytes required - must be a multiple of pagesize
296  */
297 int migrate_send_rp_req_pages(MigrationIncomingState *mis, const char *rbname,
298                               ram_addr_t start, size_t len)
299 {
300     uint8_t bufc[12 + 1 + 255]; /* start (8), len (4), rbname up to 256 */
301     size_t msglen = 12; /* start + len */
302     enum mig_rp_message_type msg_type;
303
304     *(uint64_t *)bufc = cpu_to_be64((uint64_t)start);
305     *(uint32_t *)(bufc + 8) = cpu_to_be32((uint32_t)len);
306
307     if (rbname) {
308         int rbname_len = strlen(rbname);
309         assert(rbname_len < 256);
310
311         bufc[msglen++] = rbname_len;
312         memcpy(bufc + msglen, rbname, rbname_len);
313         msglen += rbname_len;
314         msg_type = MIG_RP_MSG_REQ_PAGES_ID;
315     } else {
316         msg_type = MIG_RP_MSG_REQ_PAGES;
317     }
318
319     return migrate_send_rp_message(mis, msg_type, msglen, bufc);
320 }
321
322 static bool migration_colo_enabled;
323 bool migration_incoming_colo_enabled(void)
324 {
325     return migration_colo_enabled;
326 }
327
328 void migration_incoming_disable_colo(void)
329 {
330     migration_colo_enabled = false;
331 }
332
333 void migration_incoming_enable_colo(void)
334 {
335     migration_colo_enabled = true;
336 }
337
338 void migrate_add_address(SocketAddress *address)
339 {
340     MigrationIncomingState *mis = migration_incoming_get_current();
341     SocketAddressList *addrs;
342
343     addrs = g_new0(SocketAddressList, 1);
344     addrs->next = mis->socket_address_list;
345     mis->socket_address_list = addrs;
346     addrs->value = QAPI_CLONE(SocketAddress, address);
347 }
348
349 void qemu_start_incoming_migration(const char *uri, Error **errp)
350 {
351     const char *p;
352
353     qapi_event_send_migration(MIGRATION_STATUS_SETUP);
354     if (!strcmp(uri, "defer")) {
355         deferred_incoming_migration(errp);
356     } else if (strstart(uri, "tcp:", &p)) {
357         tcp_start_incoming_migration(p, errp);
358 #ifdef CONFIG_RDMA
359     } else if (strstart(uri, "rdma:", &p)) {
360         rdma_start_incoming_migration(p, errp);
361 #endif
362     } else if (strstart(uri, "exec:", &p)) {
363         exec_start_incoming_migration(p, errp);
364     } else if (strstart(uri, "unix:", &p)) {
365         unix_start_incoming_migration(p, errp);
366     } else if (strstart(uri, "fd:", &p)) {
367         fd_start_incoming_migration(p, errp);
368     } else {
369         error_setg(errp, "unknown migration protocol: %s", uri);
370     }
371 }
372
373 static void process_incoming_migration_bh(void *opaque)
374 {
375     Error *local_err = NULL;
376     MigrationIncomingState *mis = opaque;
377
378     /* If capability late_block_activate is set:
379      * Only fire up the block code now if we're going to restart the
380      * VM, else 'cont' will do it.
381      * This causes file locking to happen; so we don't want it to happen
382      * unless we really are starting the VM.
383      */
384     if (!migrate_late_block_activate() ||
385          (autostart && (!global_state_received() ||
386             global_state_get_runstate() == RUN_STATE_RUNNING))) {
387         /* Make sure all file formats flush their mutable metadata.
388          * If we get an error here, just don't restart the VM yet. */
389         bdrv_invalidate_cache_all(&local_err);
390         if (local_err) {
391             error_report_err(local_err);
392             local_err = NULL;
393             autostart = false;
394         }
395     }
396
397     /*
398      * This must happen after all error conditions are dealt with and
399      * we're sure the VM is going to be running on this host.
400      */
401     qemu_announce_self(&mis->announce_timer, migrate_announce_params());
402
403     if (multifd_load_cleanup(&local_err) != 0) {
404         error_report_err(local_err);
405         autostart = false;
406     }
407     /* If global state section was not received or we are in running
408        state, we need to obey autostart. Any other state is set with
409        runstate_set. */
410
411     dirty_bitmap_mig_before_vm_start();
412
413     if (!global_state_received() ||
414         global_state_get_runstate() == RUN_STATE_RUNNING) {
415         if (autostart) {
416             vm_start();
417         } else {
418             runstate_set(RUN_STATE_PAUSED);
419         }
420     } else if (migration_incoming_colo_enabled()) {
421         migration_incoming_disable_colo();
422         vm_start();
423     } else {
424         runstate_set(global_state_get_runstate());
425     }
426     /*
427      * This must happen after any state changes since as soon as an external
428      * observer sees this event they might start to prod at the VM assuming
429      * it's ready to use.
430      */
431     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
432                       MIGRATION_STATUS_COMPLETED);
433     qemu_bh_delete(mis->bh);
434     migration_incoming_state_destroy();
435 }
436
437 static void process_incoming_migration_co(void *opaque)
438 {
439     MigrationIncomingState *mis = migration_incoming_get_current();
440     PostcopyState ps;
441     int ret;
442     Error *local_err = NULL;
443
444     assert(mis->from_src_file);
445     mis->migration_incoming_co = qemu_coroutine_self();
446     mis->largest_page_size = qemu_ram_pagesize_largest();
447     postcopy_state_set(POSTCOPY_INCOMING_NONE);
448     migrate_set_state(&mis->state, MIGRATION_STATUS_NONE,
449                       MIGRATION_STATUS_ACTIVE);
450     ret = qemu_loadvm_state(mis->from_src_file);
451
452     ps = postcopy_state_get();
453     trace_process_incoming_migration_co_end(ret, ps);
454     if (ps != POSTCOPY_INCOMING_NONE) {
455         if (ps == POSTCOPY_INCOMING_ADVISE) {
456             /*
457              * Where a migration had postcopy enabled (and thus went to advise)
458              * but managed to complete within the precopy period, we can use
459              * the normal exit.
460              */
461             postcopy_ram_incoming_cleanup(mis);
462         } else if (ret >= 0) {
463             /*
464              * Postcopy was started, cleanup should happen at the end of the
465              * postcopy thread.
466              */
467             trace_process_incoming_migration_co_postcopy_end_main();
468             return;
469         }
470         /* Else if something went wrong then just fall out of the normal exit */
471     }
472
473     /* we get COLO info, and know if we are in COLO mode */
474     if (!ret && migration_incoming_colo_enabled()) {
475         /* Make sure all file formats flush their mutable metadata */
476         bdrv_invalidate_cache_all(&local_err);
477         if (local_err) {
478             error_report_err(local_err);
479             goto fail;
480         }
481
482         if (colo_init_ram_cache() < 0) {
483             error_report("Init ram cache failed");
484             goto fail;
485         }
486
487         qemu_thread_create(&mis->colo_incoming_thread, "COLO incoming",
488              colo_process_incoming_thread, mis, QEMU_THREAD_JOINABLE);
489         mis->have_colo_incoming_thread = true;
490         qemu_coroutine_yield();
491
492         /* Wait checkpoint incoming thread exit before free resource */
493         qemu_thread_join(&mis->colo_incoming_thread);
494         /* We hold the global iothread lock, so it is safe here */
495         colo_release_ram_cache();
496     }
497
498     if (ret < 0) {
499         error_report("load of migration failed: %s", strerror(-ret));
500         goto fail;
501     }
502     mis->bh = qemu_bh_new(process_incoming_migration_bh, mis);
503     qemu_bh_schedule(mis->bh);
504     mis->migration_incoming_co = NULL;
505     return;
506 fail:
507     local_err = NULL;
508     migrate_set_state(&mis->state, MIGRATION_STATUS_ACTIVE,
509                       MIGRATION_STATUS_FAILED);
510     qemu_fclose(mis->from_src_file);
511     if (multifd_load_cleanup(&local_err) != 0) {
512         error_report_err(local_err);
513     }
514     exit(EXIT_FAILURE);
515 }
516
517 static void migration_incoming_setup(QEMUFile *f)
518 {
519     MigrationIncomingState *mis = migration_incoming_get_current();
520
521     if (multifd_load_setup() != 0) {
522         /* We haven't been able to create multifd threads
523            nothing better to do */
524         exit(EXIT_FAILURE);
525     }
526
527     if (!mis->from_src_file) {
528         mis->from_src_file = f;
529     }
530     qemu_file_set_blocking(f, false);
531 }
532
533 void migration_incoming_process(void)
534 {
535     Coroutine *co = qemu_coroutine_create(process_incoming_migration_co, NULL);
536     qemu_coroutine_enter(co);
537 }
538
539 /* Returns true if recovered from a paused migration, otherwise false */
540 static bool postcopy_try_recover(QEMUFile *f)
541 {
542     MigrationIncomingState *mis = migration_incoming_get_current();
543
544     if (mis->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
545         /* Resumed from a paused postcopy migration */
546
547         mis->from_src_file = f;
548         /* Postcopy has standalone thread to do vm load */
549         qemu_file_set_blocking(f, true);
550
551         /* Re-configure the return path */
552         mis->to_src_file = qemu_file_get_return_path(f);
553
554         migrate_set_state(&mis->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
555                           MIGRATION_STATUS_POSTCOPY_RECOVER);
556
557         /*
558          * Here, we only wake up the main loading thread (while the
559          * fault thread will still be waiting), so that we can receive
560          * commands from source now, and answer it if needed. The
561          * fault thread will be woken up afterwards until we are sure
562          * that source is ready to reply to page requests.
563          */
564         qemu_sem_post(&mis->postcopy_pause_sem_dst);
565         return true;
566     }
567
568     return false;
569 }
570
571 void migration_fd_process_incoming(QEMUFile *f)
572 {
573     if (postcopy_try_recover(f)) {
574         return;
575     }
576
577     migration_incoming_setup(f);
578     migration_incoming_process();
579 }
580
581 void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
582 {
583     MigrationIncomingState *mis = migration_incoming_get_current();
584     bool start_migration;
585
586     if (!mis->from_src_file) {
587         /* The first connection (multifd may have multiple) */
588         QEMUFile *f = qemu_fopen_channel_input(ioc);
589
590         /* If it's a recovery, we're done */
591         if (postcopy_try_recover(f)) {
592             return;
593         }
594
595         migration_incoming_setup(f);
596
597         /*
598          * Common migration only needs one channel, so we can start
599          * right now.  Multifd needs more than one channel, we wait.
600          */
601         start_migration = !migrate_use_multifd();
602     } else {
603         Error *local_err = NULL;
604         /* Multiple connections */
605         assert(migrate_use_multifd());
606         start_migration = multifd_recv_new_channel(ioc, &local_err);
607         if (local_err) {
608             error_propagate(errp, local_err);
609             return;
610         }
611     }
612
613     if (start_migration) {
614         migration_incoming_process();
615     }
616 }
617
618 /**
619  * @migration_has_all_channels: We have received all channels that we need
620  *
621  * Returns true when we have got connections to all the channels that
622  * we need for migration.
623  */
624 bool migration_has_all_channels(void)
625 {
626     MigrationIncomingState *mis = migration_incoming_get_current();
627     bool all_channels;
628
629     all_channels = multifd_recv_all_channels_created();
630
631     return all_channels && mis->from_src_file != NULL;
632 }
633
634 /*
635  * Send a 'SHUT' message on the return channel with the given value
636  * to indicate that we've finished with the RP.  Non-0 value indicates
637  * error.
638  */
639 void migrate_send_rp_shut(MigrationIncomingState *mis,
640                           uint32_t value)
641 {
642     uint32_t buf;
643
644     buf = cpu_to_be32(value);
645     migrate_send_rp_message(mis, MIG_RP_MSG_SHUT, sizeof(buf), &buf);
646 }
647
648 /*
649  * Send a 'PONG' message on the return channel with the given value
650  * (normally in response to a 'PING')
651  */
652 void migrate_send_rp_pong(MigrationIncomingState *mis,
653                           uint32_t value)
654 {
655     uint32_t buf;
656
657     buf = cpu_to_be32(value);
658     migrate_send_rp_message(mis, MIG_RP_MSG_PONG, sizeof(buf), &buf);
659 }
660
661 void migrate_send_rp_recv_bitmap(MigrationIncomingState *mis,
662                                  char *block_name)
663 {
664     char buf[512];
665     int len;
666     int64_t res;
667
668     /*
669      * First, we send the header part. It contains only the len of
670      * idstr, and the idstr itself.
671      */
672     len = strlen(block_name);
673     buf[0] = len;
674     memcpy(buf + 1, block_name, len);
675
676     if (mis->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
677         error_report("%s: MSG_RP_RECV_BITMAP only used for recovery",
678                      __func__);
679         return;
680     }
681
682     migrate_send_rp_message(mis, MIG_RP_MSG_RECV_BITMAP, len + 1, buf);
683
684     /*
685      * Next, we dump the received bitmap to the stream.
686      *
687      * TODO: currently we are safe since we are the only one that is
688      * using the to_src_file handle (fault thread is still paused),
689      * and it's ok even not taking the mutex. However the best way is
690      * to take the lock before sending the message header, and release
691      * the lock after sending the bitmap.
692      */
693     qemu_mutex_lock(&mis->rp_mutex);
694     res = ramblock_recv_bitmap_send(mis->to_src_file, block_name);
695     qemu_mutex_unlock(&mis->rp_mutex);
696
697     trace_migrate_send_rp_recv_bitmap(block_name, res);
698 }
699
700 void migrate_send_rp_resume_ack(MigrationIncomingState *mis, uint32_t value)
701 {
702     uint32_t buf;
703
704     buf = cpu_to_be32(value);
705     migrate_send_rp_message(mis, MIG_RP_MSG_RESUME_ACK, sizeof(buf), &buf);
706 }
707
708 MigrationCapabilityStatusList *qmp_query_migrate_capabilities(Error **errp)
709 {
710     MigrationCapabilityStatusList *head = NULL;
711     MigrationCapabilityStatusList *caps;
712     MigrationState *s = migrate_get_current();
713     int i;
714
715     caps = NULL; /* silence compiler warning */
716     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
717 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
718         if (i == MIGRATION_CAPABILITY_BLOCK) {
719             continue;
720         }
721 #endif
722         if (head == NULL) {
723             head = g_malloc0(sizeof(*caps));
724             caps = head;
725         } else {
726             caps->next = g_malloc0(sizeof(*caps));
727             caps = caps->next;
728         }
729         caps->value =
730             g_malloc(sizeof(*caps->value));
731         caps->value->capability = i;
732         caps->value->state = s->enabled_capabilities[i];
733     }
734
735     return head;
736 }
737
738 MigrationParameters *qmp_query_migrate_parameters(Error **errp)
739 {
740     MigrationParameters *params;
741     MigrationState *s = migrate_get_current();
742
743     /* TODO use QAPI_CLONE() instead of duplicating it inline */
744     params = g_malloc0(sizeof(*params));
745     params->has_compress_level = true;
746     params->compress_level = s->parameters.compress_level;
747     params->has_compress_threads = true;
748     params->compress_threads = s->parameters.compress_threads;
749     params->has_compress_wait_thread = true;
750     params->compress_wait_thread = s->parameters.compress_wait_thread;
751     params->has_decompress_threads = true;
752     params->decompress_threads = s->parameters.decompress_threads;
753     params->has_cpu_throttle_initial = true;
754     params->cpu_throttle_initial = s->parameters.cpu_throttle_initial;
755     params->has_cpu_throttle_increment = true;
756     params->cpu_throttle_increment = s->parameters.cpu_throttle_increment;
757     params->has_tls_creds = true;
758     params->tls_creds = g_strdup(s->parameters.tls_creds);
759     params->has_tls_hostname = true;
760     params->tls_hostname = g_strdup(s->parameters.tls_hostname);
761     params->has_tls_authz = true;
762     params->tls_authz = g_strdup(s->parameters.tls_authz);
763     params->has_max_bandwidth = true;
764     params->max_bandwidth = s->parameters.max_bandwidth;
765     params->has_downtime_limit = true;
766     params->downtime_limit = s->parameters.downtime_limit;
767     params->has_x_checkpoint_delay = true;
768     params->x_checkpoint_delay = s->parameters.x_checkpoint_delay;
769     params->has_block_incremental = true;
770     params->block_incremental = s->parameters.block_incremental;
771     params->has_multifd_channels = true;
772     params->multifd_channels = s->parameters.multifd_channels;
773     params->has_xbzrle_cache_size = true;
774     params->xbzrle_cache_size = s->parameters.xbzrle_cache_size;
775     params->has_max_postcopy_bandwidth = true;
776     params->max_postcopy_bandwidth = s->parameters.max_postcopy_bandwidth;
777     params->has_max_cpu_throttle = true;
778     params->max_cpu_throttle = s->parameters.max_cpu_throttle;
779     params->has_announce_initial = true;
780     params->announce_initial = s->parameters.announce_initial;
781     params->has_announce_max = true;
782     params->announce_max = s->parameters.announce_max;
783     params->has_announce_rounds = true;
784     params->announce_rounds = s->parameters.announce_rounds;
785     params->has_announce_step = true;
786     params->announce_step = s->parameters.announce_step;
787
788     return params;
789 }
790
791 AnnounceParameters *migrate_announce_params(void)
792 {
793     static AnnounceParameters ap;
794
795     MigrationState *s = migrate_get_current();
796
797     ap.initial = s->parameters.announce_initial;
798     ap.max = s->parameters.announce_max;
799     ap.rounds = s->parameters.announce_rounds;
800     ap.step = s->parameters.announce_step;
801
802     return &ap;
803 }
804
805 /*
806  * Return true if we're already in the middle of a migration
807  * (i.e. any of the active or setup states)
808  */
809 bool migration_is_setup_or_active(int state)
810 {
811     switch (state) {
812     case MIGRATION_STATUS_ACTIVE:
813     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
814     case MIGRATION_STATUS_POSTCOPY_PAUSED:
815     case MIGRATION_STATUS_POSTCOPY_RECOVER:
816     case MIGRATION_STATUS_SETUP:
817     case MIGRATION_STATUS_PRE_SWITCHOVER:
818     case MIGRATION_STATUS_DEVICE:
819         return true;
820
821     default:
822         return false;
823
824     }
825 }
826
827 static void populate_ram_info(MigrationInfo *info, MigrationState *s)
828 {
829     info->has_ram = true;
830     info->ram = g_malloc0(sizeof(*info->ram));
831     info->ram->transferred = ram_counters.transferred;
832     info->ram->total = ram_bytes_total();
833     info->ram->duplicate = ram_counters.duplicate;
834     /* legacy value.  It is not used anymore */
835     info->ram->skipped = 0;
836     info->ram->normal = ram_counters.normal;
837     info->ram->normal_bytes = ram_counters.normal *
838         qemu_target_page_size();
839     info->ram->mbps = s->mbps;
840     info->ram->dirty_sync_count = ram_counters.dirty_sync_count;
841     info->ram->postcopy_requests = ram_counters.postcopy_requests;
842     info->ram->page_size = qemu_target_page_size();
843     info->ram->multifd_bytes = ram_counters.multifd_bytes;
844     info->ram->pages_per_second = s->pages_per_second;
845
846     if (migrate_use_xbzrle()) {
847         info->has_xbzrle_cache = true;
848         info->xbzrle_cache = g_malloc0(sizeof(*info->xbzrle_cache));
849         info->xbzrle_cache->cache_size = migrate_xbzrle_cache_size();
850         info->xbzrle_cache->bytes = xbzrle_counters.bytes;
851         info->xbzrle_cache->pages = xbzrle_counters.pages;
852         info->xbzrle_cache->cache_miss = xbzrle_counters.cache_miss;
853         info->xbzrle_cache->cache_miss_rate = xbzrle_counters.cache_miss_rate;
854         info->xbzrle_cache->overflow = xbzrle_counters.overflow;
855     }
856
857     if (migrate_use_compression()) {
858         info->has_compression = true;
859         info->compression = g_malloc0(sizeof(*info->compression));
860         info->compression->pages = compression_counters.pages;
861         info->compression->busy = compression_counters.busy;
862         info->compression->busy_rate = compression_counters.busy_rate;
863         info->compression->compressed_size =
864                                     compression_counters.compressed_size;
865         info->compression->compression_rate =
866                                     compression_counters.compression_rate;
867     }
868
869     if (cpu_throttle_active()) {
870         info->has_cpu_throttle_percentage = true;
871         info->cpu_throttle_percentage = cpu_throttle_get_percentage();
872     }
873
874     if (s->state != MIGRATION_STATUS_COMPLETED) {
875         info->ram->remaining = ram_bytes_remaining();
876         info->ram->dirty_pages_rate = ram_counters.dirty_pages_rate;
877     }
878 }
879
880 static void populate_disk_info(MigrationInfo *info)
881 {
882     if (blk_mig_active()) {
883         info->has_disk = true;
884         info->disk = g_malloc0(sizeof(*info->disk));
885         info->disk->transferred = blk_mig_bytes_transferred();
886         info->disk->remaining = blk_mig_bytes_remaining();
887         info->disk->total = blk_mig_bytes_total();
888     }
889 }
890
891 static void fill_source_migration_info(MigrationInfo *info)
892 {
893     MigrationState *s = migrate_get_current();
894
895     switch (s->state) {
896     case MIGRATION_STATUS_NONE:
897         /* no migration has happened ever */
898         /* do not overwrite destination migration status */
899         return;
900         break;
901     case MIGRATION_STATUS_SETUP:
902         info->has_status = true;
903         info->has_total_time = false;
904         break;
905     case MIGRATION_STATUS_ACTIVE:
906     case MIGRATION_STATUS_CANCELLING:
907     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
908     case MIGRATION_STATUS_PRE_SWITCHOVER:
909     case MIGRATION_STATUS_DEVICE:
910     case MIGRATION_STATUS_POSTCOPY_PAUSED:
911     case MIGRATION_STATUS_POSTCOPY_RECOVER:
912          /* TODO add some postcopy stats */
913         info->has_status = true;
914         info->has_total_time = true;
915         info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
916             - s->start_time;
917         info->has_expected_downtime = true;
918         info->expected_downtime = s->expected_downtime;
919         info->has_setup_time = true;
920         info->setup_time = s->setup_time;
921
922         populate_ram_info(info, s);
923         populate_disk_info(info);
924         break;
925     case MIGRATION_STATUS_COLO:
926         info->has_status = true;
927         /* TODO: display COLO specific information (checkpoint info etc.) */
928         break;
929     case MIGRATION_STATUS_COMPLETED:
930         info->has_status = true;
931         info->has_total_time = true;
932         info->total_time = s->total_time;
933         info->has_downtime = true;
934         info->downtime = s->downtime;
935         info->has_setup_time = true;
936         info->setup_time = s->setup_time;
937
938         populate_ram_info(info, s);
939         break;
940     case MIGRATION_STATUS_FAILED:
941         info->has_status = true;
942         if (s->error) {
943             info->has_error_desc = true;
944             info->error_desc = g_strdup(error_get_pretty(s->error));
945         }
946         break;
947     case MIGRATION_STATUS_CANCELLED:
948         info->has_status = true;
949         break;
950     }
951     info->status = s->state;
952 }
953
954 /**
955  * @migration_caps_check - check capability validity
956  *
957  * @cap_list: old capability list, array of bool
958  * @params: new capabilities to be applied soon
959  * @errp: set *errp if the check failed, with reason
960  *
961  * Returns true if check passed, otherwise false.
962  */
963 static bool migrate_caps_check(bool *cap_list,
964                                MigrationCapabilityStatusList *params,
965                                Error **errp)
966 {
967     MigrationCapabilityStatusList *cap;
968     bool old_postcopy_cap;
969     MigrationIncomingState *mis = migration_incoming_get_current();
970
971     old_postcopy_cap = cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM];
972
973     for (cap = params; cap; cap = cap->next) {
974         cap_list[cap->value->capability] = cap->value->state;
975     }
976
977 #ifndef CONFIG_LIVE_BLOCK_MIGRATION
978     if (cap_list[MIGRATION_CAPABILITY_BLOCK]) {
979         error_setg(errp, "QEMU compiled without old-style (blk/-b, inc/-i) "
980                    "block migration");
981         error_append_hint(errp, "Use drive_mirror+NBD instead.\n");
982         return false;
983     }
984 #endif
985
986 #ifndef CONFIG_REPLICATION
987     if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
988         error_setg(errp, "QEMU compiled without replication module"
989                    " can't enable COLO");
990         error_append_hint(errp, "Please enable replication before COLO.\n");
991         return false;
992     }
993 #endif
994
995     if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
996         if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
997             /* The decompression threads asynchronously write into RAM
998              * rather than use the atomic copies needed to avoid
999              * userfaulting.  It should be possible to fix the decompression
1000              * threads for compatibility in future.
1001              */
1002             error_setg(errp, "Postcopy is not currently compatible "
1003                        "with compression");
1004             return false;
1005         }
1006
1007         /* This check is reasonably expensive, so only when it's being
1008          * set the first time, also it's only the destination that needs
1009          * special support.
1010          */
1011         if (!old_postcopy_cap && runstate_check(RUN_STATE_INMIGRATE) &&
1012             !postcopy_ram_supported_by_host(mis)) {
1013             /* postcopy_ram_supported_by_host will have emitted a more
1014              * detailed message
1015              */
1016             error_setg(errp, "Postcopy is not supported");
1017             return false;
1018         }
1019
1020         if (cap_list[MIGRATION_CAPABILITY_X_IGNORE_SHARED]) {
1021             error_setg(errp, "Postcopy is not compatible with ignore-shared");
1022             return false;
1023         }
1024     }
1025
1026     return true;
1027 }
1028
1029 static void fill_destination_migration_info(MigrationInfo *info)
1030 {
1031     MigrationIncomingState *mis = migration_incoming_get_current();
1032
1033     if (mis->socket_address_list) {
1034         info->has_socket_address = true;
1035         info->socket_address =
1036             QAPI_CLONE(SocketAddressList, mis->socket_address_list);
1037     }
1038
1039     switch (mis->state) {
1040     case MIGRATION_STATUS_NONE:
1041         return;
1042         break;
1043     case MIGRATION_STATUS_SETUP:
1044     case MIGRATION_STATUS_CANCELLING:
1045     case MIGRATION_STATUS_CANCELLED:
1046     case MIGRATION_STATUS_ACTIVE:
1047     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1048     case MIGRATION_STATUS_POSTCOPY_PAUSED:
1049     case MIGRATION_STATUS_POSTCOPY_RECOVER:
1050     case MIGRATION_STATUS_FAILED:
1051     case MIGRATION_STATUS_COLO:
1052         info->has_status = true;
1053         break;
1054     case MIGRATION_STATUS_COMPLETED:
1055         info->has_status = true;
1056         fill_destination_postcopy_migration_info(info);
1057         break;
1058     }
1059     info->status = mis->state;
1060 }
1061
1062 MigrationInfo *qmp_query_migrate(Error **errp)
1063 {
1064     MigrationInfo *info = g_malloc0(sizeof(*info));
1065
1066     fill_destination_migration_info(info);
1067     fill_source_migration_info(info);
1068
1069     return info;
1070 }
1071
1072 void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params,
1073                                   Error **errp)
1074 {
1075     MigrationState *s = migrate_get_current();
1076     MigrationCapabilityStatusList *cap;
1077     bool cap_list[MIGRATION_CAPABILITY__MAX];
1078
1079     if (migration_is_setup_or_active(s->state)) {
1080         error_setg(errp, QERR_MIGRATION_ACTIVE);
1081         return;
1082     }
1083
1084     memcpy(cap_list, s->enabled_capabilities, sizeof(cap_list));
1085     if (!migrate_caps_check(cap_list, params, errp)) {
1086         return;
1087     }
1088
1089     for (cap = params; cap; cap = cap->next) {
1090         s->enabled_capabilities[cap->value->capability] = cap->value->state;
1091     }
1092 }
1093
1094 /*
1095  * Check whether the parameters are valid. Error will be put into errp
1096  * (if provided). Return true if valid, otherwise false.
1097  */
1098 static bool migrate_params_check(MigrationParameters *params, Error **errp)
1099 {
1100     if (params->has_compress_level &&
1101         (params->compress_level > 9)) {
1102         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "compress_level",
1103                    "is invalid, it should be in the range of 0 to 9");
1104         return false;
1105     }
1106
1107     if (params->has_compress_threads && (params->compress_threads < 1)) {
1108         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1109                    "compress_threads",
1110                    "is invalid, it should be in the range of 1 to 255");
1111         return false;
1112     }
1113
1114     if (params->has_decompress_threads && (params->decompress_threads < 1)) {
1115         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1116                    "decompress_threads",
1117                    "is invalid, it should be in the range of 1 to 255");
1118         return false;
1119     }
1120
1121     if (params->has_cpu_throttle_initial &&
1122         (params->cpu_throttle_initial < 1 ||
1123          params->cpu_throttle_initial > 99)) {
1124         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1125                    "cpu_throttle_initial",
1126                    "an integer in the range of 1 to 99");
1127         return false;
1128     }
1129
1130     if (params->has_cpu_throttle_increment &&
1131         (params->cpu_throttle_increment < 1 ||
1132          params->cpu_throttle_increment > 99)) {
1133         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1134                    "cpu_throttle_increment",
1135                    "an integer in the range of 1 to 99");
1136         return false;
1137     }
1138
1139     if (params->has_max_bandwidth && (params->max_bandwidth > SIZE_MAX)) {
1140         error_setg(errp, "Parameter 'max_bandwidth' expects an integer in the"
1141                          " range of 0 to %zu bytes/second", SIZE_MAX);
1142         return false;
1143     }
1144
1145     if (params->has_downtime_limit &&
1146         (params->downtime_limit > MAX_MIGRATE_DOWNTIME)) {
1147         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
1148                          "the range of 0 to %d milliseconds",
1149                          MAX_MIGRATE_DOWNTIME);
1150         return false;
1151     }
1152
1153     /* x_checkpoint_delay is now always positive */
1154
1155     if (params->has_multifd_channels && (params->multifd_channels < 1)) {
1156         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1157                    "multifd_channels",
1158                    "is invalid, it should be in the range of 1 to 255");
1159         return false;
1160     }
1161
1162     if (params->has_xbzrle_cache_size &&
1163         (params->xbzrle_cache_size < qemu_target_page_size() ||
1164          !is_power_of_2(params->xbzrle_cache_size))) {
1165         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1166                    "xbzrle_cache_size",
1167                    "is invalid, it should be bigger than target page size"
1168                    " and a power of two");
1169         return false;
1170     }
1171
1172     if (params->has_max_cpu_throttle &&
1173         (params->max_cpu_throttle < params->cpu_throttle_initial ||
1174          params->max_cpu_throttle > 99)) {
1175         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1176                    "max_cpu_throttle",
1177                    "an integer in the range of cpu_throttle_initial to 99");
1178         return false;
1179     }
1180
1181     if (params->has_announce_initial &&
1182         params->announce_initial > 100000) {
1183         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1184                    "announce_initial",
1185                    "is invalid, it must be less than 100000 ms");
1186         return false;
1187     }
1188     if (params->has_announce_max &&
1189         params->announce_max > 100000) {
1190         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1191                    "announce_max",
1192                    "is invalid, it must be less than 100000 ms");
1193        return false;
1194     }
1195     if (params->has_announce_rounds &&
1196         params->announce_rounds > 1000) {
1197         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1198                    "announce_rounds",
1199                    "is invalid, it must be in the range of 0 to 1000");
1200        return false;
1201     }
1202     if (params->has_announce_step &&
1203         (params->announce_step < 1 ||
1204         params->announce_step > 10000)) {
1205         error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
1206                    "announce_step",
1207                    "is invalid, it must be in the range of 1 to 10000 ms");
1208        return false;
1209     }
1210     return true;
1211 }
1212
1213 static void migrate_params_test_apply(MigrateSetParameters *params,
1214                                       MigrationParameters *dest)
1215 {
1216     *dest = migrate_get_current()->parameters;
1217
1218     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1219
1220     if (params->has_compress_level) {
1221         dest->compress_level = params->compress_level;
1222     }
1223
1224     if (params->has_compress_threads) {
1225         dest->compress_threads = params->compress_threads;
1226     }
1227
1228     if (params->has_compress_wait_thread) {
1229         dest->compress_wait_thread = params->compress_wait_thread;
1230     }
1231
1232     if (params->has_decompress_threads) {
1233         dest->decompress_threads = params->decompress_threads;
1234     }
1235
1236     if (params->has_cpu_throttle_initial) {
1237         dest->cpu_throttle_initial = params->cpu_throttle_initial;
1238     }
1239
1240     if (params->has_cpu_throttle_increment) {
1241         dest->cpu_throttle_increment = params->cpu_throttle_increment;
1242     }
1243
1244     if (params->has_tls_creds) {
1245         assert(params->tls_creds->type == QTYPE_QSTRING);
1246         dest->tls_creds = g_strdup(params->tls_creds->u.s);
1247     }
1248
1249     if (params->has_tls_hostname) {
1250         assert(params->tls_hostname->type == QTYPE_QSTRING);
1251         dest->tls_hostname = g_strdup(params->tls_hostname->u.s);
1252     }
1253
1254     if (params->has_max_bandwidth) {
1255         dest->max_bandwidth = params->max_bandwidth;
1256     }
1257
1258     if (params->has_downtime_limit) {
1259         dest->downtime_limit = params->downtime_limit;
1260     }
1261
1262     if (params->has_x_checkpoint_delay) {
1263         dest->x_checkpoint_delay = params->x_checkpoint_delay;
1264     }
1265
1266     if (params->has_block_incremental) {
1267         dest->block_incremental = params->block_incremental;
1268     }
1269     if (params->has_multifd_channels) {
1270         dest->multifd_channels = params->multifd_channels;
1271     }
1272     if (params->has_xbzrle_cache_size) {
1273         dest->xbzrle_cache_size = params->xbzrle_cache_size;
1274     }
1275     if (params->has_max_postcopy_bandwidth) {
1276         dest->max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1277     }
1278     if (params->has_max_cpu_throttle) {
1279         dest->max_cpu_throttle = params->max_cpu_throttle;
1280     }
1281     if (params->has_announce_initial) {
1282         dest->announce_initial = params->announce_initial;
1283     }
1284     if (params->has_announce_max) {
1285         dest->announce_max = params->announce_max;
1286     }
1287     if (params->has_announce_rounds) {
1288         dest->announce_rounds = params->announce_rounds;
1289     }
1290     if (params->has_announce_step) {
1291         dest->announce_step = params->announce_step;
1292     }
1293 }
1294
1295 static void migrate_params_apply(MigrateSetParameters *params, Error **errp)
1296 {
1297     MigrationState *s = migrate_get_current();
1298
1299     /* TODO use QAPI_CLONE() instead of duplicating it inline */
1300
1301     if (params->has_compress_level) {
1302         s->parameters.compress_level = params->compress_level;
1303     }
1304
1305     if (params->has_compress_threads) {
1306         s->parameters.compress_threads = params->compress_threads;
1307     }
1308
1309     if (params->has_compress_wait_thread) {
1310         s->parameters.compress_wait_thread = params->compress_wait_thread;
1311     }
1312
1313     if (params->has_decompress_threads) {
1314         s->parameters.decompress_threads = params->decompress_threads;
1315     }
1316
1317     if (params->has_cpu_throttle_initial) {
1318         s->parameters.cpu_throttle_initial = params->cpu_throttle_initial;
1319     }
1320
1321     if (params->has_cpu_throttle_increment) {
1322         s->parameters.cpu_throttle_increment = params->cpu_throttle_increment;
1323     }
1324
1325     if (params->has_tls_creds) {
1326         g_free(s->parameters.tls_creds);
1327         assert(params->tls_creds->type == QTYPE_QSTRING);
1328         s->parameters.tls_creds = g_strdup(params->tls_creds->u.s);
1329     }
1330
1331     if (params->has_tls_hostname) {
1332         g_free(s->parameters.tls_hostname);
1333         assert(params->tls_hostname->type == QTYPE_QSTRING);
1334         s->parameters.tls_hostname = g_strdup(params->tls_hostname->u.s);
1335     }
1336
1337     if (params->has_tls_authz) {
1338         g_free(s->parameters.tls_authz);
1339         assert(params->tls_authz->type == QTYPE_QSTRING);
1340         s->parameters.tls_authz = g_strdup(params->tls_authz->u.s);
1341     }
1342
1343     if (params->has_max_bandwidth) {
1344         s->parameters.max_bandwidth = params->max_bandwidth;
1345         if (s->to_dst_file && !migration_in_postcopy()) {
1346             qemu_file_set_rate_limit(s->to_dst_file,
1347                                 s->parameters.max_bandwidth / XFER_LIMIT_RATIO);
1348         }
1349     }
1350
1351     if (params->has_downtime_limit) {
1352         s->parameters.downtime_limit = params->downtime_limit;
1353     }
1354
1355     if (params->has_x_checkpoint_delay) {
1356         s->parameters.x_checkpoint_delay = params->x_checkpoint_delay;
1357         if (migration_in_colo_state()) {
1358             colo_checkpoint_notify(s);
1359         }
1360     }
1361
1362     if (params->has_block_incremental) {
1363         s->parameters.block_incremental = params->block_incremental;
1364     }
1365     if (params->has_multifd_channels) {
1366         s->parameters.multifd_channels = params->multifd_channels;
1367     }
1368     if (params->has_xbzrle_cache_size) {
1369         s->parameters.xbzrle_cache_size = params->xbzrle_cache_size;
1370         xbzrle_cache_resize(params->xbzrle_cache_size, errp);
1371     }
1372     if (params->has_max_postcopy_bandwidth) {
1373         s->parameters.max_postcopy_bandwidth = params->max_postcopy_bandwidth;
1374         if (s->to_dst_file && migration_in_postcopy()) {
1375             qemu_file_set_rate_limit(s->to_dst_file,
1376                     s->parameters.max_postcopy_bandwidth / XFER_LIMIT_RATIO);
1377         }
1378     }
1379     if (params->has_max_cpu_throttle) {
1380         s->parameters.max_cpu_throttle = params->max_cpu_throttle;
1381     }
1382     if (params->has_announce_initial) {
1383         s->parameters.announce_initial = params->announce_initial;
1384     }
1385     if (params->has_announce_max) {
1386         s->parameters.announce_max = params->announce_max;
1387     }
1388     if (params->has_announce_rounds) {
1389         s->parameters.announce_rounds = params->announce_rounds;
1390     }
1391     if (params->has_announce_step) {
1392         s->parameters.announce_step = params->announce_step;
1393     }
1394 }
1395
1396 void qmp_migrate_set_parameters(MigrateSetParameters *params, Error **errp)
1397 {
1398     MigrationParameters tmp;
1399
1400     /* TODO Rewrite "" to null instead */
1401     if (params->has_tls_creds
1402         && params->tls_creds->type == QTYPE_QNULL) {
1403         qobject_unref(params->tls_creds->u.n);
1404         params->tls_creds->type = QTYPE_QSTRING;
1405         params->tls_creds->u.s = strdup("");
1406     }
1407     /* TODO Rewrite "" to null instead */
1408     if (params->has_tls_hostname
1409         && params->tls_hostname->type == QTYPE_QNULL) {
1410         qobject_unref(params->tls_hostname->u.n);
1411         params->tls_hostname->type = QTYPE_QSTRING;
1412         params->tls_hostname->u.s = strdup("");
1413     }
1414
1415     migrate_params_test_apply(params, &tmp);
1416
1417     if (!migrate_params_check(&tmp, errp)) {
1418         /* Invalid parameter */
1419         return;
1420     }
1421
1422     migrate_params_apply(params, errp);
1423 }
1424
1425
1426 void qmp_migrate_start_postcopy(Error **errp)
1427 {
1428     MigrationState *s = migrate_get_current();
1429
1430     if (!migrate_postcopy()) {
1431         error_setg(errp, "Enable postcopy with migrate_set_capability before"
1432                          " the start of migration");
1433         return;
1434     }
1435
1436     if (s->state == MIGRATION_STATUS_NONE) {
1437         error_setg(errp, "Postcopy must be started after migration has been"
1438                          " started");
1439         return;
1440     }
1441     /*
1442      * we don't error if migration has finished since that would be racy
1443      * with issuing this command.
1444      */
1445     atomic_set(&s->start_postcopy, true);
1446 }
1447
1448 /* shared migration helpers */
1449
1450 void migrate_set_state(int *state, int old_state, int new_state)
1451 {
1452     assert(new_state < MIGRATION_STATUS__MAX);
1453     if (atomic_cmpxchg(state, old_state, new_state) == old_state) {
1454         trace_migrate_set_state(MigrationStatus_str(new_state));
1455         migrate_generate_event(new_state);
1456     }
1457 }
1458
1459 static MigrationCapabilityStatusList *migrate_cap_add(
1460     MigrationCapabilityStatusList *list,
1461     MigrationCapability index,
1462     bool state)
1463 {
1464     MigrationCapabilityStatusList *cap;
1465
1466     cap = g_new0(MigrationCapabilityStatusList, 1);
1467     cap->value = g_new0(MigrationCapabilityStatus, 1);
1468     cap->value->capability = index;
1469     cap->value->state = state;
1470     cap->next = list;
1471
1472     return cap;
1473 }
1474
1475 void migrate_set_block_enabled(bool value, Error **errp)
1476 {
1477     MigrationCapabilityStatusList *cap;
1478
1479     cap = migrate_cap_add(NULL, MIGRATION_CAPABILITY_BLOCK, value);
1480     qmp_migrate_set_capabilities(cap, errp);
1481     qapi_free_MigrationCapabilityStatusList(cap);
1482 }
1483
1484 static void migrate_set_block_incremental(MigrationState *s, bool value)
1485 {
1486     s->parameters.block_incremental = value;
1487 }
1488
1489 static void block_cleanup_parameters(MigrationState *s)
1490 {
1491     if (s->must_remove_block_options) {
1492         /* setting to false can never fail */
1493         migrate_set_block_enabled(false, &error_abort);
1494         migrate_set_block_incremental(s, false);
1495         s->must_remove_block_options = false;
1496     }
1497 }
1498
1499 static void migrate_fd_cleanup(MigrationState *s)
1500 {
1501     qemu_bh_delete(s->cleanup_bh);
1502     s->cleanup_bh = NULL;
1503
1504     qemu_savevm_state_cleanup();
1505
1506     if (s->to_dst_file) {
1507         QEMUFile *tmp;
1508
1509         trace_migrate_fd_cleanup();
1510         qemu_mutex_unlock_iothread();
1511         if (s->migration_thread_running) {
1512             qemu_thread_join(&s->thread);
1513             s->migration_thread_running = false;
1514         }
1515         qemu_mutex_lock_iothread();
1516
1517         multifd_save_cleanup();
1518         qemu_mutex_lock(&s->qemu_file_lock);
1519         tmp = s->to_dst_file;
1520         s->to_dst_file = NULL;
1521         qemu_mutex_unlock(&s->qemu_file_lock);
1522         /*
1523          * Close the file handle without the lock to make sure the
1524          * critical section won't block for long.
1525          */
1526         qemu_fclose(tmp);
1527     }
1528
1529     assert((s->state != MIGRATION_STATUS_ACTIVE) &&
1530            (s->state != MIGRATION_STATUS_POSTCOPY_ACTIVE));
1531
1532     if (s->state == MIGRATION_STATUS_CANCELLING) {
1533         migrate_set_state(&s->state, MIGRATION_STATUS_CANCELLING,
1534                           MIGRATION_STATUS_CANCELLED);
1535     }
1536
1537     if (s->error) {
1538         /* It is used on info migrate.  We can't free it */
1539         error_report_err(error_copy(s->error));
1540     }
1541     notifier_list_notify(&migration_state_notifiers, s);
1542     block_cleanup_parameters(s);
1543 }
1544
1545 static void migrate_fd_cleanup_schedule(MigrationState *s)
1546 {
1547     /*
1548      * Ref the state for bh, because it may be called when
1549      * there're already no other refs
1550      */
1551     object_ref(OBJECT(s));
1552     qemu_bh_schedule(s->cleanup_bh);
1553 }
1554
1555 static void migrate_fd_cleanup_bh(void *opaque)
1556 {
1557     MigrationState *s = opaque;
1558     migrate_fd_cleanup(s);
1559     object_unref(OBJECT(s));
1560 }
1561
1562 void migrate_set_error(MigrationState *s, const Error *error)
1563 {
1564     qemu_mutex_lock(&s->error_mutex);
1565     if (!s->error) {
1566         s->error = error_copy(error);
1567     }
1568     qemu_mutex_unlock(&s->error_mutex);
1569 }
1570
1571 void migrate_fd_error(MigrationState *s, const Error *error)
1572 {
1573     trace_migrate_fd_error(error_get_pretty(error));
1574     assert(s->to_dst_file == NULL);
1575     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1576                       MIGRATION_STATUS_FAILED);
1577     migrate_set_error(s, error);
1578 }
1579
1580 static void migrate_fd_cancel(MigrationState *s)
1581 {
1582     int old_state ;
1583     QEMUFile *f = migrate_get_current()->to_dst_file;
1584     trace_migrate_fd_cancel();
1585
1586     if (s->rp_state.from_dst_file) {
1587         /* shutdown the rp socket, so causing the rp thread to shutdown */
1588         qemu_file_shutdown(s->rp_state.from_dst_file);
1589     }
1590
1591     do {
1592         old_state = s->state;
1593         if (!migration_is_setup_or_active(old_state)) {
1594             break;
1595         }
1596         /* If the migration is paused, kick it out of the pause */
1597         if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
1598             qemu_sem_post(&s->pause_sem);
1599         }
1600         migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
1601     } while (s->state != MIGRATION_STATUS_CANCELLING);
1602
1603     /*
1604      * If we're unlucky the migration code might be stuck somewhere in a
1605      * send/write while the network has failed and is waiting to timeout;
1606      * if we've got shutdown(2) available then we can force it to quit.
1607      * The outgoing qemu file gets closed in migrate_fd_cleanup that is
1608      * called in a bh, so there is no race against this cancel.
1609      */
1610     if (s->state == MIGRATION_STATUS_CANCELLING && f) {
1611         qemu_file_shutdown(f);
1612     }
1613     if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
1614         Error *local_err = NULL;
1615
1616         bdrv_invalidate_cache_all(&local_err);
1617         if (local_err) {
1618             error_report_err(local_err);
1619         } else {
1620             s->block_inactive = false;
1621         }
1622     }
1623 }
1624
1625 void add_migration_state_change_notifier(Notifier *notify)
1626 {
1627     notifier_list_add(&migration_state_notifiers, notify);
1628 }
1629
1630 void remove_migration_state_change_notifier(Notifier *notify)
1631 {
1632     notifier_remove(notify);
1633 }
1634
1635 bool migration_in_setup(MigrationState *s)
1636 {
1637     return s->state == MIGRATION_STATUS_SETUP;
1638 }
1639
1640 bool migration_has_finished(MigrationState *s)
1641 {
1642     return s->state == MIGRATION_STATUS_COMPLETED;
1643 }
1644
1645 bool migration_has_failed(MigrationState *s)
1646 {
1647     return (s->state == MIGRATION_STATUS_CANCELLED ||
1648             s->state == MIGRATION_STATUS_FAILED);
1649 }
1650
1651 bool migration_in_postcopy(void)
1652 {
1653     MigrationState *s = migrate_get_current();
1654
1655     return (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
1656 }
1657
1658 bool migration_in_postcopy_after_devices(MigrationState *s)
1659 {
1660     return migration_in_postcopy() && s->postcopy_after_devices;
1661 }
1662
1663 bool migration_is_idle(void)
1664 {
1665     MigrationState *s = current_migration;
1666
1667     if (!s) {
1668         return true;
1669     }
1670
1671     switch (s->state) {
1672     case MIGRATION_STATUS_NONE:
1673     case MIGRATION_STATUS_CANCELLED:
1674     case MIGRATION_STATUS_COMPLETED:
1675     case MIGRATION_STATUS_FAILED:
1676         return true;
1677     case MIGRATION_STATUS_SETUP:
1678     case MIGRATION_STATUS_CANCELLING:
1679     case MIGRATION_STATUS_ACTIVE:
1680     case MIGRATION_STATUS_POSTCOPY_ACTIVE:
1681     case MIGRATION_STATUS_COLO:
1682     case MIGRATION_STATUS_PRE_SWITCHOVER:
1683     case MIGRATION_STATUS_DEVICE:
1684         return false;
1685     case MIGRATION_STATUS__MAX:
1686         g_assert_not_reached();
1687     }
1688
1689     return false;
1690 }
1691
1692 void migrate_init(MigrationState *s)
1693 {
1694     /*
1695      * Reinitialise all migration state, except
1696      * parameters/capabilities that the user set, and
1697      * locks.
1698      */
1699     s->bytes_xfer = 0;
1700     s->cleanup_bh = 0;
1701     s->to_dst_file = NULL;
1702     s->state = MIGRATION_STATUS_NONE;
1703     s->rp_state.from_dst_file = NULL;
1704     s->rp_state.error = false;
1705     s->mbps = 0.0;
1706     s->pages_per_second = 0.0;
1707     s->downtime = 0;
1708     s->expected_downtime = 0;
1709     s->setup_time = 0;
1710     s->start_postcopy = false;
1711     s->postcopy_after_devices = false;
1712     s->migration_thread_running = false;
1713     error_free(s->error);
1714     s->error = NULL;
1715
1716     migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
1717
1718     s->start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1719     s->total_time = 0;
1720     s->vm_was_running = false;
1721     s->iteration_initial_bytes = 0;
1722     s->threshold_size = 0;
1723 }
1724
1725 static GSList *migration_blockers;
1726
1727 int migrate_add_blocker(Error *reason, Error **errp)
1728 {
1729     if (only_migratable) {
1730         error_propagate_prepend(errp, error_copy(reason),
1731                                 "disallowing migration blocker "
1732                                 "(--only-migratable) for: ");
1733         return -EACCES;
1734     }
1735
1736     if (migration_is_idle()) {
1737         migration_blockers = g_slist_prepend(migration_blockers, reason);
1738         return 0;
1739     }
1740
1741     error_propagate_prepend(errp, error_copy(reason),
1742                             "disallowing migration blocker "
1743                             "(migration in progress) for: ");
1744     return -EBUSY;
1745 }
1746
1747 void migrate_del_blocker(Error *reason)
1748 {
1749     migration_blockers = g_slist_remove(migration_blockers, reason);
1750 }
1751
1752 void qmp_migrate_incoming(const char *uri, Error **errp)
1753 {
1754     Error *local_err = NULL;
1755     static bool once = true;
1756
1757     if (!deferred_incoming) {
1758         error_setg(errp, "For use with '-incoming defer'");
1759         return;
1760     }
1761     if (!once) {
1762         error_setg(errp, "The incoming migration has already been started");
1763     }
1764
1765     qemu_start_incoming_migration(uri, &local_err);
1766
1767     if (local_err) {
1768         error_propagate(errp, local_err);
1769         return;
1770     }
1771
1772     once = false;
1773 }
1774
1775 void qmp_migrate_recover(const char *uri, Error **errp)
1776 {
1777     MigrationIncomingState *mis = migration_incoming_get_current();
1778
1779     if (mis->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1780         error_setg(errp, "Migrate recover can only be run "
1781                    "when postcopy is paused.");
1782         return;
1783     }
1784
1785     if (atomic_cmpxchg(&mis->postcopy_recover_triggered,
1786                        false, true) == true) {
1787         error_setg(errp, "Migrate recovery is triggered already");
1788         return;
1789     }
1790
1791     /*
1792      * Note that this call will never start a real migration; it will
1793      * only re-setup the migration stream and poke existing migration
1794      * to continue using that newly established channel.
1795      */
1796     qemu_start_incoming_migration(uri, errp);
1797 }
1798
1799 void qmp_migrate_pause(Error **errp)
1800 {
1801     MigrationState *ms = migrate_get_current();
1802     MigrationIncomingState *mis = migration_incoming_get_current();
1803     int ret;
1804
1805     if (ms->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1806         /* Source side, during postcopy */
1807         qemu_mutex_lock(&ms->qemu_file_lock);
1808         ret = qemu_file_shutdown(ms->to_dst_file);
1809         qemu_mutex_unlock(&ms->qemu_file_lock);
1810         if (ret) {
1811             error_setg(errp, "Failed to pause source migration");
1812         }
1813         return;
1814     }
1815
1816     if (mis->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
1817         ret = qemu_file_shutdown(mis->from_src_file);
1818         if (ret) {
1819             error_setg(errp, "Failed to pause destination migration");
1820         }
1821         return;
1822     }
1823
1824     error_setg(errp, "migrate-pause is currently only supported "
1825                "during postcopy-active state");
1826 }
1827
1828 bool migration_is_blocked(Error **errp)
1829 {
1830     if (qemu_savevm_state_blocked(errp)) {
1831         return true;
1832     }
1833
1834     if (migration_blockers) {
1835         error_propagate(errp, error_copy(migration_blockers->data));
1836         return true;
1837     }
1838
1839     return false;
1840 }
1841
1842 /* Returns true if continue to migrate, or false if error detected */
1843 static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
1844                             bool resume, Error **errp)
1845 {
1846     Error *local_err = NULL;
1847
1848     if (resume) {
1849         if (s->state != MIGRATION_STATUS_POSTCOPY_PAUSED) {
1850             error_setg(errp, "Cannot resume if there is no "
1851                        "paused migration");
1852             return false;
1853         }
1854
1855         /*
1856          * Postcopy recovery won't work well with release-ram
1857          * capability since release-ram will drop the page buffer as
1858          * long as the page is put into the send buffer.  So if there
1859          * is a network failure happened, any page buffers that have
1860          * not yet reached the destination VM but have already been
1861          * sent from the source VM will be lost forever.  Let's refuse
1862          * the client from resuming such a postcopy migration.
1863          * Luckily release-ram was designed to only be used when src
1864          * and destination VMs are on the same host, so it should be
1865          * fine.
1866          */
1867         if (migrate_release_ram()) {
1868             error_setg(errp, "Postcopy recovery cannot work "
1869                        "when release-ram capability is set");
1870             return false;
1871         }
1872
1873         /* This is a resume, skip init status */
1874         return true;
1875     }
1876
1877     if (migration_is_setup_or_active(s->state) ||
1878         s->state == MIGRATION_STATUS_CANCELLING ||
1879         s->state == MIGRATION_STATUS_COLO) {
1880         error_setg(errp, QERR_MIGRATION_ACTIVE);
1881         return false;
1882     }
1883
1884     if (runstate_check(RUN_STATE_INMIGRATE)) {
1885         error_setg(errp, "Guest is waiting for an incoming migration");
1886         return false;
1887     }
1888
1889     if (migration_is_blocked(errp)) {
1890         return false;
1891     }
1892
1893     if (blk || blk_inc) {
1894         if (migrate_use_block() || migrate_use_block_incremental()) {
1895             error_setg(errp, "Command options are incompatible with "
1896                        "current migration capabilities");
1897             return false;
1898         }
1899         migrate_set_block_enabled(true, &local_err);
1900         if (local_err) {
1901             error_propagate(errp, local_err);
1902             return false;
1903         }
1904         s->must_remove_block_options = true;
1905     }
1906
1907     if (blk_inc) {
1908         migrate_set_block_incremental(s, true);
1909     }
1910
1911     migrate_init(s);
1912
1913     return true;
1914 }
1915
1916 void qmp_migrate(const char *uri, bool has_blk, bool blk,
1917                  bool has_inc, bool inc, bool has_detach, bool detach,
1918                  bool has_resume, bool resume, Error **errp)
1919 {
1920     Error *local_err = NULL;
1921     MigrationState *s = migrate_get_current();
1922     const char *p;
1923
1924     if (!migrate_prepare(s, has_blk && blk, has_inc && inc,
1925                          has_resume && resume, errp)) {
1926         /* Error detected, put into errp */
1927         return;
1928     }
1929
1930     if (strstart(uri, "tcp:", &p)) {
1931         tcp_start_outgoing_migration(s, p, &local_err);
1932 #ifdef CONFIG_RDMA
1933     } else if (strstart(uri, "rdma:", &p)) {
1934         rdma_start_outgoing_migration(s, p, &local_err);
1935 #endif
1936     } else if (strstart(uri, "exec:", &p)) {
1937         exec_start_outgoing_migration(s, p, &local_err);
1938     } else if (strstart(uri, "unix:", &p)) {
1939         unix_start_outgoing_migration(s, p, &local_err);
1940     } else if (strstart(uri, "fd:", &p)) {
1941         fd_start_outgoing_migration(s, p, &local_err);
1942     } else {
1943         error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
1944                    "a valid migration protocol");
1945         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
1946                           MIGRATION_STATUS_FAILED);
1947         block_cleanup_parameters(s);
1948         return;
1949     }
1950
1951     if (local_err) {
1952         migrate_fd_error(s, local_err);
1953         error_propagate(errp, local_err);
1954         return;
1955     }
1956 }
1957
1958 void qmp_migrate_cancel(Error **errp)
1959 {
1960     migrate_fd_cancel(migrate_get_current());
1961 }
1962
1963 void qmp_migrate_continue(MigrationStatus state, Error **errp)
1964 {
1965     MigrationState *s = migrate_get_current();
1966     if (s->state != state) {
1967         error_setg(errp,  "Migration not in expected state: %s",
1968                    MigrationStatus_str(s->state));
1969         return;
1970     }
1971     qemu_sem_post(&s->pause_sem);
1972 }
1973
1974 void qmp_migrate_set_cache_size(int64_t value, Error **errp)
1975 {
1976     MigrateSetParameters p = {
1977         .has_xbzrle_cache_size = true,
1978         .xbzrle_cache_size = value,
1979     };
1980
1981     qmp_migrate_set_parameters(&p, errp);
1982 }
1983
1984 int64_t qmp_query_migrate_cache_size(Error **errp)
1985 {
1986     return migrate_xbzrle_cache_size();
1987 }
1988
1989 void qmp_migrate_set_speed(int64_t value, Error **errp)
1990 {
1991     MigrateSetParameters p = {
1992         .has_max_bandwidth = true,
1993         .max_bandwidth = value,
1994     };
1995
1996     qmp_migrate_set_parameters(&p, errp);
1997 }
1998
1999 void qmp_migrate_set_downtime(double value, Error **errp)
2000 {
2001     if (value < 0 || value > MAX_MIGRATE_DOWNTIME_SECONDS) {
2002         error_setg(errp, "Parameter 'downtime_limit' expects an integer in "
2003                          "the range of 0 to %d seconds",
2004                          MAX_MIGRATE_DOWNTIME_SECONDS);
2005         return;
2006     }
2007
2008     value *= 1000; /* Convert to milliseconds */
2009     value = MAX(0, MIN(INT64_MAX, value));
2010
2011     MigrateSetParameters p = {
2012         .has_downtime_limit = true,
2013         .downtime_limit = value,
2014     };
2015
2016     qmp_migrate_set_parameters(&p, errp);
2017 }
2018
2019 bool migrate_release_ram(void)
2020 {
2021     MigrationState *s;
2022
2023     s = migrate_get_current();
2024
2025     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
2026 }
2027
2028 bool migrate_postcopy_ram(void)
2029 {
2030     MigrationState *s;
2031
2032     s = migrate_get_current();
2033
2034     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
2035 }
2036
2037 bool migrate_postcopy(void)
2038 {
2039     return migrate_postcopy_ram() || migrate_dirty_bitmaps();
2040 }
2041
2042 bool migrate_auto_converge(void)
2043 {
2044     MigrationState *s;
2045
2046     s = migrate_get_current();
2047
2048     return s->enabled_capabilities[MIGRATION_CAPABILITY_AUTO_CONVERGE];
2049 }
2050
2051 bool migrate_zero_blocks(void)
2052 {
2053     MigrationState *s;
2054
2055     s = migrate_get_current();
2056
2057     return s->enabled_capabilities[MIGRATION_CAPABILITY_ZERO_BLOCKS];
2058 }
2059
2060 bool migrate_postcopy_blocktime(void)
2061 {
2062     MigrationState *s;
2063
2064     s = migrate_get_current();
2065
2066     return s->enabled_capabilities[MIGRATION_CAPABILITY_POSTCOPY_BLOCKTIME];
2067 }
2068
2069 bool migrate_use_compression(void)
2070 {
2071     MigrationState *s;
2072
2073     s = migrate_get_current();
2074
2075     return s->enabled_capabilities[MIGRATION_CAPABILITY_COMPRESS];
2076 }
2077
2078 int migrate_compress_level(void)
2079 {
2080     MigrationState *s;
2081
2082     s = migrate_get_current();
2083
2084     return s->parameters.compress_level;
2085 }
2086
2087 int migrate_compress_threads(void)
2088 {
2089     MigrationState *s;
2090
2091     s = migrate_get_current();
2092
2093     return s->parameters.compress_threads;
2094 }
2095
2096 int migrate_compress_wait_thread(void)
2097 {
2098     MigrationState *s;
2099
2100     s = migrate_get_current();
2101
2102     return s->parameters.compress_wait_thread;
2103 }
2104
2105 int migrate_decompress_threads(void)
2106 {
2107     MigrationState *s;
2108
2109     s = migrate_get_current();
2110
2111     return s->parameters.decompress_threads;
2112 }
2113
2114 bool migrate_dirty_bitmaps(void)
2115 {
2116     MigrationState *s;
2117
2118     s = migrate_get_current();
2119
2120     return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
2121 }
2122
2123 bool migrate_ignore_shared(void)
2124 {
2125     MigrationState *s;
2126
2127     s = migrate_get_current();
2128
2129     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_IGNORE_SHARED];
2130 }
2131
2132 bool migrate_use_events(void)
2133 {
2134     MigrationState *s;
2135
2136     s = migrate_get_current();
2137
2138     return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
2139 }
2140
2141 bool migrate_use_multifd(void)
2142 {
2143     MigrationState *s;
2144
2145     s = migrate_get_current();
2146
2147     return s->enabled_capabilities[MIGRATION_CAPABILITY_MULTIFD];
2148 }
2149
2150 bool migrate_pause_before_switchover(void)
2151 {
2152     MigrationState *s;
2153
2154     s = migrate_get_current();
2155
2156     return s->enabled_capabilities[
2157         MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
2158 }
2159
2160 int migrate_multifd_channels(void)
2161 {
2162     MigrationState *s;
2163
2164     s = migrate_get_current();
2165
2166     return s->parameters.multifd_channels;
2167 }
2168
2169 int migrate_use_xbzrle(void)
2170 {
2171     MigrationState *s;
2172
2173     s = migrate_get_current();
2174
2175     return s->enabled_capabilities[MIGRATION_CAPABILITY_XBZRLE];
2176 }
2177
2178 int64_t migrate_xbzrle_cache_size(void)
2179 {
2180     MigrationState *s;
2181
2182     s = migrate_get_current();
2183
2184     return s->parameters.xbzrle_cache_size;
2185 }
2186
2187 static int64_t migrate_max_postcopy_bandwidth(void)
2188 {
2189     MigrationState *s;
2190
2191     s = migrate_get_current();
2192
2193     return s->parameters.max_postcopy_bandwidth;
2194 }
2195
2196 bool migrate_use_block(void)
2197 {
2198     MigrationState *s;
2199
2200     s = migrate_get_current();
2201
2202     return s->enabled_capabilities[MIGRATION_CAPABILITY_BLOCK];
2203 }
2204
2205 bool migrate_use_return_path(void)
2206 {
2207     MigrationState *s;
2208
2209     s = migrate_get_current();
2210
2211     return s->enabled_capabilities[MIGRATION_CAPABILITY_RETURN_PATH];
2212 }
2213
2214 bool migrate_use_block_incremental(void)
2215 {
2216     MigrationState *s;
2217
2218     s = migrate_get_current();
2219
2220     return s->parameters.block_incremental;
2221 }
2222
2223 /* migration thread support */
2224 /*
2225  * Something bad happened to the RP stream, mark an error
2226  * The caller shall print or trace something to indicate why
2227  */
2228 static void mark_source_rp_bad(MigrationState *s)
2229 {
2230     s->rp_state.error = true;
2231 }
2232
2233 static struct rp_cmd_args {
2234     ssize_t     len; /* -1 = variable */
2235     const char *name;
2236 } rp_cmd_args[] = {
2237     [MIG_RP_MSG_INVALID]        = { .len = -1, .name = "INVALID" },
2238     [MIG_RP_MSG_SHUT]           = { .len =  4, .name = "SHUT" },
2239     [MIG_RP_MSG_PONG]           = { .len =  4, .name = "PONG" },
2240     [MIG_RP_MSG_REQ_PAGES]      = { .len = 12, .name = "REQ_PAGES" },
2241     [MIG_RP_MSG_REQ_PAGES_ID]   = { .len = -1, .name = "REQ_PAGES_ID" },
2242     [MIG_RP_MSG_RECV_BITMAP]    = { .len = -1, .name = "RECV_BITMAP" },
2243     [MIG_RP_MSG_RESUME_ACK]     = { .len =  4, .name = "RESUME_ACK" },
2244     [MIG_RP_MSG_MAX]            = { .len = -1, .name = "MAX" },
2245 };
2246
2247 /*
2248  * Process a request for pages received on the return path,
2249  * We're allowed to send more than requested (e.g. to round to our page size)
2250  * and we don't need to send pages that have already been sent.
2251  */
2252 static void migrate_handle_rp_req_pages(MigrationState *ms, const char* rbname,
2253                                        ram_addr_t start, size_t len)
2254 {
2255     long our_host_ps = getpagesize();
2256
2257     trace_migrate_handle_rp_req_pages(rbname, start, len);
2258
2259     /*
2260      * Since we currently insist on matching page sizes, just sanity check
2261      * we're being asked for whole host pages.
2262      */
2263     if (start & (our_host_ps-1) ||
2264        (len & (our_host_ps-1))) {
2265         error_report("%s: Misaligned page request, start: " RAM_ADDR_FMT
2266                      " len: %zd", __func__, start, len);
2267         mark_source_rp_bad(ms);
2268         return;
2269     }
2270
2271     if (ram_save_queue_pages(rbname, start, len)) {
2272         mark_source_rp_bad(ms);
2273     }
2274 }
2275
2276 /* Return true to retry, false to quit */
2277 static bool postcopy_pause_return_path_thread(MigrationState *s)
2278 {
2279     trace_postcopy_pause_return_path();
2280
2281     qemu_sem_wait(&s->postcopy_pause_rp_sem);
2282
2283     trace_postcopy_pause_return_path_continued();
2284
2285     return true;
2286 }
2287
2288 static int migrate_handle_rp_recv_bitmap(MigrationState *s, char *block_name)
2289 {
2290     RAMBlock *block = qemu_ram_block_by_name(block_name);
2291
2292     if (!block) {
2293         error_report("%s: invalid block name '%s'", __func__, block_name);
2294         return -EINVAL;
2295     }
2296
2297     /* Fetch the received bitmap and refresh the dirty bitmap */
2298     return ram_dirty_bitmap_reload(s, block);
2299 }
2300
2301 static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value)
2302 {
2303     trace_source_return_path_thread_resume_ack(value);
2304
2305     if (value != MIGRATION_RESUME_ACK_VALUE) {
2306         error_report("%s: illegal resume_ack value %"PRIu32,
2307                      __func__, value);
2308         return -1;
2309     }
2310
2311     /* Now both sides are active. */
2312     migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_RECOVER,
2313                       MIGRATION_STATUS_POSTCOPY_ACTIVE);
2314
2315     /* Notify send thread that time to continue send pages */
2316     qemu_sem_post(&s->rp_state.rp_sem);
2317
2318     return 0;
2319 }
2320
2321 /*
2322  * Handles messages sent on the return path towards the source VM
2323  *
2324  */
2325 static void *source_return_path_thread(void *opaque)
2326 {
2327     MigrationState *ms = opaque;
2328     QEMUFile *rp = ms->rp_state.from_dst_file;
2329     uint16_t header_len, header_type;
2330     uint8_t buf[512];
2331     uint32_t tmp32, sibling_error;
2332     ram_addr_t start = 0; /* =0 to silence warning */
2333     size_t  len = 0, expected_len;
2334     int res;
2335
2336     trace_source_return_path_thread_entry();
2337     rcu_register_thread();
2338
2339 retry:
2340     while (!ms->rp_state.error && !qemu_file_get_error(rp) &&
2341            migration_is_setup_or_active(ms->state)) {
2342         trace_source_return_path_thread_loop_top();
2343         header_type = qemu_get_be16(rp);
2344         header_len = qemu_get_be16(rp);
2345
2346         if (qemu_file_get_error(rp)) {
2347             mark_source_rp_bad(ms);
2348             goto out;
2349         }
2350
2351         if (header_type >= MIG_RP_MSG_MAX ||
2352             header_type == MIG_RP_MSG_INVALID) {
2353             error_report("RP: Received invalid message 0x%04x length 0x%04x",
2354                     header_type, header_len);
2355             mark_source_rp_bad(ms);
2356             goto out;
2357         }
2358
2359         if ((rp_cmd_args[header_type].len != -1 &&
2360             header_len != rp_cmd_args[header_type].len) ||
2361             header_len > sizeof(buf)) {
2362             error_report("RP: Received '%s' message (0x%04x) with"
2363                     "incorrect length %d expecting %zu",
2364                     rp_cmd_args[header_type].name, header_type, header_len,
2365                     (size_t)rp_cmd_args[header_type].len);
2366             mark_source_rp_bad(ms);
2367             goto out;
2368         }
2369
2370         /* We know we've got a valid header by this point */
2371         res = qemu_get_buffer(rp, buf, header_len);
2372         if (res != header_len) {
2373             error_report("RP: Failed reading data for message 0x%04x"
2374                          " read %d expected %d",
2375                          header_type, res, header_len);
2376             mark_source_rp_bad(ms);
2377             goto out;
2378         }
2379
2380         /* OK, we have the message and the data */
2381         switch (header_type) {
2382         case MIG_RP_MSG_SHUT:
2383             sibling_error = ldl_be_p(buf);
2384             trace_source_return_path_thread_shut(sibling_error);
2385             if (sibling_error) {
2386                 error_report("RP: Sibling indicated error %d", sibling_error);
2387                 mark_source_rp_bad(ms);
2388             }
2389             /*
2390              * We'll let the main thread deal with closing the RP
2391              * we could do a shutdown(2) on it, but we're the only user
2392              * anyway, so there's nothing gained.
2393              */
2394             goto out;
2395
2396         case MIG_RP_MSG_PONG:
2397             tmp32 = ldl_be_p(buf);
2398             trace_source_return_path_thread_pong(tmp32);
2399             break;
2400
2401         case MIG_RP_MSG_REQ_PAGES:
2402             start = ldq_be_p(buf);
2403             len = ldl_be_p(buf + 8);
2404             migrate_handle_rp_req_pages(ms, NULL, start, len);
2405             break;
2406
2407         case MIG_RP_MSG_REQ_PAGES_ID:
2408             expected_len = 12 + 1; /* header + termination */
2409
2410             if (header_len >= expected_len) {
2411                 start = ldq_be_p(buf);
2412                 len = ldl_be_p(buf + 8);
2413                 /* Now we expect an idstr */
2414                 tmp32 = buf[12]; /* Length of the following idstr */
2415                 buf[13 + tmp32] = '\0';
2416                 expected_len += tmp32;
2417             }
2418             if (header_len != expected_len) {
2419                 error_report("RP: Req_Page_id with length %d expecting %zd",
2420                         header_len, expected_len);
2421                 mark_source_rp_bad(ms);
2422                 goto out;
2423             }
2424             migrate_handle_rp_req_pages(ms, (char *)&buf[13], start, len);
2425             break;
2426
2427         case MIG_RP_MSG_RECV_BITMAP:
2428             if (header_len < 1) {
2429                 error_report("%s: missing block name", __func__);
2430                 mark_source_rp_bad(ms);
2431                 goto out;
2432             }
2433             /* Format: len (1B) + idstr (<255B). This ends the idstr. */
2434             buf[buf[0] + 1] = '\0';
2435             if (migrate_handle_rp_recv_bitmap(ms, (char *)(buf + 1))) {
2436                 mark_source_rp_bad(ms);
2437                 goto out;
2438             }
2439             break;
2440
2441         case MIG_RP_MSG_RESUME_ACK:
2442             tmp32 = ldl_be_p(buf);
2443             if (migrate_handle_rp_resume_ack(ms, tmp32)) {
2444                 mark_source_rp_bad(ms);
2445                 goto out;
2446             }
2447             break;
2448
2449         default:
2450             break;
2451         }
2452     }
2453
2454 out:
2455     res = qemu_file_get_error(rp);
2456     if (res) {
2457         if (res == -EIO) {
2458             /*
2459              * Maybe there is something we can do: it looks like a
2460              * network down issue, and we pause for a recovery.
2461              */
2462             if (postcopy_pause_return_path_thread(ms)) {
2463                 /* Reload rp, reset the rest */
2464                 if (rp != ms->rp_state.from_dst_file) {
2465                     qemu_fclose(rp);
2466                     rp = ms->rp_state.from_dst_file;
2467                 }
2468                 ms->rp_state.error = false;
2469                 goto retry;
2470             }
2471         }
2472
2473         trace_source_return_path_thread_bad_end();
2474         mark_source_rp_bad(ms);
2475     }
2476
2477     trace_source_return_path_thread_end();
2478     ms->rp_state.from_dst_file = NULL;
2479     qemu_fclose(rp);
2480     rcu_unregister_thread();
2481     return NULL;
2482 }
2483
2484 static int open_return_path_on_source(MigrationState *ms,
2485                                       bool create_thread)
2486 {
2487
2488     ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file);
2489     if (!ms->rp_state.from_dst_file) {
2490         return -1;
2491     }
2492
2493     trace_open_return_path_on_source();
2494
2495     if (!create_thread) {
2496         /* We're done */
2497         return 0;
2498     }
2499
2500     qemu_thread_create(&ms->rp_state.rp_thread, "return path",
2501                        source_return_path_thread, ms, QEMU_THREAD_JOINABLE);
2502
2503     trace_open_return_path_on_source_continue();
2504
2505     return 0;
2506 }
2507
2508 /* Returns 0 if the RP was ok, otherwise there was an error on the RP */
2509 static int await_return_path_close_on_source(MigrationState *ms)
2510 {
2511     /*
2512      * If this is a normal exit then the destination will send a SHUT and the
2513      * rp_thread will exit, however if there's an error we need to cause
2514      * it to exit.
2515      */
2516     if (qemu_file_get_error(ms->to_dst_file) && ms->rp_state.from_dst_file) {
2517         /*
2518          * shutdown(2), if we have it, will cause it to unblock if it's stuck
2519          * waiting for the destination.
2520          */
2521         qemu_file_shutdown(ms->rp_state.from_dst_file);
2522         mark_source_rp_bad(ms);
2523     }
2524     trace_await_return_path_close_on_source_joining();
2525     qemu_thread_join(&ms->rp_state.rp_thread);
2526     trace_await_return_path_close_on_source_close();
2527     return ms->rp_state.error;
2528 }
2529
2530 /*
2531  * Switch from normal iteration to postcopy
2532  * Returns non-0 on error
2533  */
2534 static int postcopy_start(MigrationState *ms)
2535 {
2536     int ret;
2537     QIOChannelBuffer *bioc;
2538     QEMUFile *fb;
2539     int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2540     int64_t bandwidth = migrate_max_postcopy_bandwidth();
2541     bool restart_block = false;
2542     int cur_state = MIGRATION_STATUS_ACTIVE;
2543     if (!migrate_pause_before_switchover()) {
2544         migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
2545                           MIGRATION_STATUS_POSTCOPY_ACTIVE);
2546     }
2547
2548     trace_postcopy_start();
2549     qemu_mutex_lock_iothread();
2550     trace_postcopy_start_set_run();
2551
2552     qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2553     global_state_store();
2554     ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2555     if (ret < 0) {
2556         goto fail;
2557     }
2558
2559     ret = migration_maybe_pause(ms, &cur_state,
2560                                 MIGRATION_STATUS_POSTCOPY_ACTIVE);
2561     if (ret < 0) {
2562         goto fail;
2563     }
2564
2565     ret = bdrv_inactivate_all();
2566     if (ret < 0) {
2567         goto fail;
2568     }
2569     restart_block = true;
2570
2571     /*
2572      * Cause any non-postcopiable, but iterative devices to
2573      * send out their final data.
2574      */
2575     qemu_savevm_state_complete_precopy(ms->to_dst_file, true, false);
2576
2577     /*
2578      * in Finish migrate and with the io-lock held everything should
2579      * be quiet, but we've potentially still got dirty pages and we
2580      * need to tell the destination to throw any pages it's already received
2581      * that are dirty
2582      */
2583     if (migrate_postcopy_ram()) {
2584         if (ram_postcopy_send_discard_bitmap(ms)) {
2585             error_report("postcopy send discard bitmap failed");
2586             goto fail;
2587         }
2588     }
2589
2590     /*
2591      * send rest of state - note things that are doing postcopy
2592      * will notice we're in POSTCOPY_ACTIVE and not actually
2593      * wrap their state up here
2594      */
2595     /* 0 max-postcopy-bandwidth means unlimited */
2596     if (!bandwidth) {
2597         qemu_file_set_rate_limit(ms->to_dst_file, INT64_MAX);
2598     } else {
2599         qemu_file_set_rate_limit(ms->to_dst_file, bandwidth / XFER_LIMIT_RATIO);
2600     }
2601     if (migrate_postcopy_ram()) {
2602         /* Ping just for debugging, helps line traces up */
2603         qemu_savevm_send_ping(ms->to_dst_file, 2);
2604     }
2605
2606     /*
2607      * While loading the device state we may trigger page transfer
2608      * requests and the fd must be free to process those, and thus
2609      * the destination must read the whole device state off the fd before
2610      * it starts processing it.  Unfortunately the ad-hoc migration format
2611      * doesn't allow the destination to know the size to read without fully
2612      * parsing it through each devices load-state code (especially the open
2613      * coded devices that use get/put).
2614      * So we wrap the device state up in a package with a length at the start;
2615      * to do this we use a qemu_buf to hold the whole of the device state.
2616      */
2617     bioc = qio_channel_buffer_new(4096);
2618     qio_channel_set_name(QIO_CHANNEL(bioc), "migration-postcopy-buffer");
2619     fb = qemu_fopen_channel_output(QIO_CHANNEL(bioc));
2620     object_unref(OBJECT(bioc));
2621
2622     /*
2623      * Make sure the receiver can get incoming pages before we send the rest
2624      * of the state
2625      */
2626     qemu_savevm_send_postcopy_listen(fb);
2627
2628     qemu_savevm_state_complete_precopy(fb, false, false);
2629     if (migrate_postcopy_ram()) {
2630         qemu_savevm_send_ping(fb, 3);
2631     }
2632
2633     qemu_savevm_send_postcopy_run(fb);
2634
2635     /* <><> end of stuff going into the package */
2636
2637     /* Last point of recovery; as soon as we send the package the destination
2638      * can open devices and potentially start running.
2639      * Lets just check again we've not got any errors.
2640      */
2641     ret = qemu_file_get_error(ms->to_dst_file);
2642     if (ret) {
2643         error_report("postcopy_start: Migration stream errored (pre package)");
2644         goto fail_closefb;
2645     }
2646
2647     restart_block = false;
2648
2649     /* Now send that blob */
2650     if (qemu_savevm_send_packaged(ms->to_dst_file, bioc->data, bioc->usage)) {
2651         goto fail_closefb;
2652     }
2653     qemu_fclose(fb);
2654
2655     /* Send a notify to give a chance for anything that needs to happen
2656      * at the transition to postcopy and after the device state; in particular
2657      * spice needs to trigger a transition now
2658      */
2659     ms->postcopy_after_devices = true;
2660     notifier_list_notify(&migration_state_notifiers, ms);
2661
2662     ms->downtime =  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - time_at_stop;
2663
2664     qemu_mutex_unlock_iothread();
2665
2666     if (migrate_postcopy_ram()) {
2667         /*
2668          * Although this ping is just for debug, it could potentially be
2669          * used for getting a better measurement of downtime at the source.
2670          */
2671         qemu_savevm_send_ping(ms->to_dst_file, 4);
2672     }
2673
2674     if (migrate_release_ram()) {
2675         ram_postcopy_migrated_memory_release(ms);
2676     }
2677
2678     ret = qemu_file_get_error(ms->to_dst_file);
2679     if (ret) {
2680         error_report("postcopy_start: Migration stream errored");
2681         migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2682                               MIGRATION_STATUS_FAILED);
2683     }
2684
2685     return ret;
2686
2687 fail_closefb:
2688     qemu_fclose(fb);
2689 fail:
2690     migrate_set_state(&ms->state, MIGRATION_STATUS_POSTCOPY_ACTIVE,
2691                           MIGRATION_STATUS_FAILED);
2692     if (restart_block) {
2693         /* A failure happened early enough that we know the destination hasn't
2694          * accessed block devices, so we're safe to recover.
2695          */
2696         Error *local_err = NULL;
2697
2698         bdrv_invalidate_cache_all(&local_err);
2699         if (local_err) {
2700             error_report_err(local_err);
2701         }
2702     }
2703     qemu_mutex_unlock_iothread();
2704     return -1;
2705 }
2706
2707 /**
2708  * migration_maybe_pause: Pause if required to by
2709  * migrate_pause_before_switchover called with the iothread locked
2710  * Returns: 0 on success
2711  */
2712 static int migration_maybe_pause(MigrationState *s,
2713                                  int *current_active_state,
2714                                  int new_state)
2715 {
2716     if (!migrate_pause_before_switchover()) {
2717         return 0;
2718     }
2719
2720     /* Since leaving this state is not atomic with posting the semaphore
2721      * it's possible that someone could have issued multiple migrate_continue
2722      * and the semaphore is incorrectly positive at this point;
2723      * the docs say it's undefined to reinit a semaphore that's already
2724      * init'd, so use timedwait to eat up any existing posts.
2725      */
2726     while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
2727         /* This block intentionally left blank */
2728     }
2729
2730     qemu_mutex_unlock_iothread();
2731     migrate_set_state(&s->state, *current_active_state,
2732                       MIGRATION_STATUS_PRE_SWITCHOVER);
2733     qemu_sem_wait(&s->pause_sem);
2734     migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
2735                       new_state);
2736     *current_active_state = new_state;
2737     qemu_mutex_lock_iothread();
2738
2739     return s->state == new_state ? 0 : -EINVAL;
2740 }
2741
2742 /**
2743  * migration_completion: Used by migration_thread when there's not much left.
2744  *   The caller 'breaks' the loop when this returns.
2745  *
2746  * @s: Current migration state
2747  */
2748 static void migration_completion(MigrationState *s)
2749 {
2750     int ret;
2751     int current_active_state = s->state;
2752
2753     if (s->state == MIGRATION_STATUS_ACTIVE) {
2754         qemu_mutex_lock_iothread();
2755         s->downtime_start = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
2756         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
2757         s->vm_was_running = runstate_is_running();
2758         ret = global_state_store();
2759
2760         if (!ret) {
2761             bool inactivate = !migrate_colo_enabled();
2762             ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
2763             if (ret >= 0) {
2764                 ret = migration_maybe_pause(s, &current_active_state,
2765                                             MIGRATION_STATUS_DEVICE);
2766             }
2767             if (ret >= 0) {
2768                 qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
2769                 ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
2770                                                          inactivate);
2771             }
2772             if (inactivate && ret >= 0) {
2773                 s->block_inactive = true;
2774             }
2775         }
2776         qemu_mutex_unlock_iothread();
2777
2778         if (ret < 0) {
2779             goto fail;
2780         }
2781     } else if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2782         trace_migration_completion_postcopy_end();
2783
2784         qemu_savevm_state_complete_postcopy(s->to_dst_file);
2785         trace_migration_completion_postcopy_end_after_complete();
2786     }
2787
2788     /*
2789      * If rp was opened we must clean up the thread before
2790      * cleaning everything else up (since if there are no failures
2791      * it will wait for the destination to send it's status in
2792      * a SHUT command).
2793      */
2794     if (s->rp_state.from_dst_file) {
2795         int rp_error;
2796         trace_migration_return_path_end_before();
2797         rp_error = await_return_path_close_on_source(s);
2798         trace_migration_return_path_end_after(rp_error);
2799         if (rp_error) {
2800             goto fail_invalidate;
2801         }
2802     }
2803
2804     if (qemu_file_get_error(s->to_dst_file)) {
2805         trace_migration_completion_file_err();
2806         goto fail_invalidate;
2807     }
2808
2809     if (!migrate_colo_enabled()) {
2810         migrate_set_state(&s->state, current_active_state,
2811                           MIGRATION_STATUS_COMPLETED);
2812     }
2813
2814     return;
2815
2816 fail_invalidate:
2817     /* If not doing postcopy, vm_start() will be called: let's regain
2818      * control on images.
2819      */
2820     if (s->state == MIGRATION_STATUS_ACTIVE ||
2821         s->state == MIGRATION_STATUS_DEVICE) {
2822         Error *local_err = NULL;
2823
2824         qemu_mutex_lock_iothread();
2825         bdrv_invalidate_cache_all(&local_err);
2826         if (local_err) {
2827             error_report_err(local_err);
2828         } else {
2829             s->block_inactive = false;
2830         }
2831         qemu_mutex_unlock_iothread();
2832     }
2833
2834 fail:
2835     migrate_set_state(&s->state, current_active_state,
2836                       MIGRATION_STATUS_FAILED);
2837 }
2838
2839 bool migrate_colo_enabled(void)
2840 {
2841     MigrationState *s = migrate_get_current();
2842     return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
2843 }
2844
2845 typedef enum MigThrError {
2846     /* No error detected */
2847     MIG_THR_ERR_NONE = 0,
2848     /* Detected error, but resumed successfully */
2849     MIG_THR_ERR_RECOVERED = 1,
2850     /* Detected fatal error, need to exit */
2851     MIG_THR_ERR_FATAL = 2,
2852 } MigThrError;
2853
2854 static int postcopy_resume_handshake(MigrationState *s)
2855 {
2856     qemu_savevm_send_postcopy_resume(s->to_dst_file);
2857
2858     while (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2859         qemu_sem_wait(&s->rp_state.rp_sem);
2860     }
2861
2862     if (s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
2863         return 0;
2864     }
2865
2866     return -1;
2867 }
2868
2869 /* Return zero if success, or <0 for error */
2870 static int postcopy_do_resume(MigrationState *s)
2871 {
2872     int ret;
2873
2874     /*
2875      * Call all the resume_prepare() hooks, so that modules can be
2876      * ready for the migration resume.
2877      */
2878     ret = qemu_savevm_state_resume_prepare(s);
2879     if (ret) {
2880         error_report("%s: resume_prepare() failure detected: %d",
2881                      __func__, ret);
2882         return ret;
2883     }
2884
2885     /*
2886      * Last handshake with destination on the resume (destination will
2887      * switch to postcopy-active afterwards)
2888      */
2889     ret = postcopy_resume_handshake(s);
2890     if (ret) {
2891         error_report("%s: handshake failed: %d", __func__, ret);
2892         return ret;
2893     }
2894
2895     return 0;
2896 }
2897
2898 /*
2899  * We don't return until we are in a safe state to continue current
2900  * postcopy migration.  Returns MIG_THR_ERR_RECOVERED if recovered, or
2901  * MIG_THR_ERR_FATAL if unrecovery failure happened.
2902  */
2903 static MigThrError postcopy_pause(MigrationState *s)
2904 {
2905     assert(s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
2906
2907     while (true) {
2908         QEMUFile *file;
2909
2910         migrate_set_state(&s->state, s->state,
2911                           MIGRATION_STATUS_POSTCOPY_PAUSED);
2912
2913         /* Current channel is possibly broken. Release it. */
2914         assert(s->to_dst_file);
2915         qemu_mutex_lock(&s->qemu_file_lock);
2916         file = s->to_dst_file;
2917         s->to_dst_file = NULL;
2918         qemu_mutex_unlock(&s->qemu_file_lock);
2919
2920         qemu_file_shutdown(file);
2921         qemu_fclose(file);
2922
2923         error_report("Detected IO failure for postcopy. "
2924                      "Migration paused.");
2925
2926         /*
2927          * We wait until things fixed up. Then someone will setup the
2928          * status back for us.
2929          */
2930         while (s->state == MIGRATION_STATUS_POSTCOPY_PAUSED) {
2931             qemu_sem_wait(&s->postcopy_pause_sem);
2932         }
2933
2934         if (s->state == MIGRATION_STATUS_POSTCOPY_RECOVER) {
2935             /* Woken up by a recover procedure. Give it a shot */
2936
2937             /*
2938              * Firstly, let's wake up the return path now, with a new
2939              * return path channel.
2940              */
2941             qemu_sem_post(&s->postcopy_pause_rp_sem);
2942
2943             /* Do the resume logic */
2944             if (postcopy_do_resume(s) == 0) {
2945                 /* Let's continue! */
2946                 trace_postcopy_pause_continued();
2947                 return MIG_THR_ERR_RECOVERED;
2948             } else {
2949                 /*
2950                  * Something wrong happened during the recovery, let's
2951                  * pause again. Pause is always better than throwing
2952                  * data away.
2953                  */
2954                 continue;
2955             }
2956         } else {
2957             /* This is not right... Time to quit. */
2958             return MIG_THR_ERR_FATAL;
2959         }
2960     }
2961 }
2962
2963 static MigThrError migration_detect_error(MigrationState *s)
2964 {
2965     int ret;
2966     int state = s->state;
2967
2968     if (state == MIGRATION_STATUS_CANCELLING ||
2969         state == MIGRATION_STATUS_CANCELLED) {
2970         /* End the migration, but don't set the state to failed */
2971         return MIG_THR_ERR_FATAL;
2972     }
2973
2974     /* Try to detect any file errors */
2975     ret = qemu_file_get_error(s->to_dst_file);
2976
2977     if (!ret) {
2978         /* Everything is fine */
2979         return MIG_THR_ERR_NONE;
2980     }
2981
2982     if (state == MIGRATION_STATUS_POSTCOPY_ACTIVE && ret == -EIO) {
2983         /*
2984          * For postcopy, we allow the network to be down for a
2985          * while. After that, it can be continued by a
2986          * recovery phase.
2987          */
2988         return postcopy_pause(s);
2989     } else {
2990         /*
2991          * For precopy (or postcopy with error outside IO), we fail
2992          * with no time.
2993          */
2994         migrate_set_state(&s->state, state, MIGRATION_STATUS_FAILED);
2995         trace_migration_thread_file_err();
2996
2997         /* Time to stop the migration, now. */
2998         return MIG_THR_ERR_FATAL;
2999     }
3000 }
3001
3002 /* How many bytes have we transferred since the beggining of the migration */
3003 static uint64_t migration_total_bytes(MigrationState *s)
3004 {
3005     return qemu_ftell(s->to_dst_file) + ram_counters.multifd_bytes;
3006 }
3007
3008 static void migration_calculate_complete(MigrationState *s)
3009 {
3010     uint64_t bytes = migration_total_bytes(s);
3011     int64_t end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3012     int64_t transfer_time;
3013
3014     s->total_time = end_time - s->start_time;
3015     if (!s->downtime) {
3016         /*
3017          * It's still not set, so we are precopy migration.  For
3018          * postcopy, downtime is calculated during postcopy_start().
3019          */
3020         s->downtime = end_time - s->downtime_start;
3021     }
3022
3023     transfer_time = s->total_time - s->setup_time;
3024     if (transfer_time) {
3025         s->mbps = ((double) bytes * 8.0) / transfer_time / 1000;
3026     }
3027 }
3028
3029 static void migration_update_counters(MigrationState *s,
3030                                       int64_t current_time)
3031 {
3032     uint64_t transferred, transferred_pages, time_spent;
3033     uint64_t current_bytes; /* bytes transferred since the beginning */
3034     double bandwidth;
3035
3036     if (current_time < s->iteration_start_time + BUFFER_DELAY) {
3037         return;
3038     }
3039
3040     current_bytes = migration_total_bytes(s);
3041     transferred = current_bytes - s->iteration_initial_bytes;
3042     time_spent = current_time - s->iteration_start_time;
3043     bandwidth = (double)transferred / time_spent;
3044     s->threshold_size = bandwidth * s->parameters.downtime_limit;
3045
3046     s->mbps = (((double) transferred * 8.0) /
3047                ((double) time_spent / 1000.0)) / 1000.0 / 1000.0;
3048
3049     transferred_pages = ram_get_total_transferred_pages() -
3050                             s->iteration_initial_pages;
3051     s->pages_per_second = (double) transferred_pages /
3052                              (((double) time_spent / 1000.0));
3053
3054     /*
3055      * if we haven't sent anything, we don't want to
3056      * recalculate. 10000 is a small enough number for our purposes
3057      */
3058     if (ram_counters.dirty_pages_rate && transferred > 10000) {
3059         s->expected_downtime = ram_counters.remaining / bandwidth;
3060     }
3061
3062     qemu_file_reset_rate_limit(s->to_dst_file);
3063
3064     s->iteration_start_time = current_time;
3065     s->iteration_initial_bytes = current_bytes;
3066     s->iteration_initial_pages = ram_get_total_transferred_pages();
3067
3068     trace_migrate_transferred(transferred, time_spent,
3069                               bandwidth, s->threshold_size);
3070 }
3071
3072 /* Migration thread iteration status */
3073 typedef enum {
3074     MIG_ITERATE_RESUME,         /* Resume current iteration */
3075     MIG_ITERATE_SKIP,           /* Skip current iteration */
3076     MIG_ITERATE_BREAK,          /* Break the loop */
3077 } MigIterateState;
3078
3079 /*
3080  * Return true if continue to the next iteration directly, false
3081  * otherwise.
3082  */
3083 static MigIterateState migration_iteration_run(MigrationState *s)
3084 {
3085     uint64_t pending_size, pend_pre, pend_compat, pend_post;
3086     bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
3087
3088     qemu_savevm_state_pending(s->to_dst_file, s->threshold_size, &pend_pre,
3089                               &pend_compat, &pend_post);
3090     pending_size = pend_pre + pend_compat + pend_post;
3091
3092     trace_migrate_pending(pending_size, s->threshold_size,
3093                           pend_pre, pend_compat, pend_post);
3094
3095     if (pending_size && pending_size >= s->threshold_size) {
3096         /* Still a significant amount to transfer */
3097         if (migrate_postcopy() && !in_postcopy &&
3098             pend_pre <= s->threshold_size &&
3099             atomic_read(&s->start_postcopy)) {
3100             if (postcopy_start(s)) {
3101                 error_report("%s: postcopy failed to start", __func__);
3102             }
3103             return MIG_ITERATE_SKIP;
3104         }
3105         /* Just another iteration step */
3106         qemu_savevm_state_iterate(s->to_dst_file,
3107             s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE);
3108     } else {
3109         trace_migration_thread_low_pending(pending_size);
3110         migration_completion(s);
3111         return MIG_ITERATE_BREAK;
3112     }
3113
3114     return MIG_ITERATE_RESUME;
3115 }
3116
3117 static void migration_iteration_finish(MigrationState *s)
3118 {
3119     /* If we enabled cpu throttling for auto-converge, turn it off. */
3120     cpu_throttle_stop();
3121
3122     qemu_mutex_lock_iothread();
3123     switch (s->state) {
3124     case MIGRATION_STATUS_COMPLETED:
3125         migration_calculate_complete(s);
3126         runstate_set(RUN_STATE_POSTMIGRATE);
3127         break;
3128
3129     case MIGRATION_STATUS_ACTIVE:
3130         /*
3131          * We should really assert here, but since it's during
3132          * migration, let's try to reduce the usage of assertions.
3133          */
3134         if (!migrate_colo_enabled()) {
3135             error_report("%s: critical error: calling COLO code without "
3136                          "COLO enabled", __func__);
3137         }
3138         migrate_start_colo_process(s);
3139         /*
3140          * Fixme: we will run VM in COLO no matter its old running state.
3141          * After exited COLO, we will keep running.
3142          */
3143         s->vm_was_running = true;
3144         /* Fallthrough */
3145     case MIGRATION_STATUS_FAILED:
3146     case MIGRATION_STATUS_CANCELLED:
3147     case MIGRATION_STATUS_CANCELLING:
3148         if (s->vm_was_running) {
3149             vm_start();
3150         } else {
3151             if (runstate_check(RUN_STATE_FINISH_MIGRATE)) {
3152                 runstate_set(RUN_STATE_POSTMIGRATE);
3153             }
3154         }
3155         break;
3156
3157     default:
3158         /* Should not reach here, but if so, forgive the VM. */
3159         error_report("%s: Unknown ending state %d", __func__, s->state);
3160         break;
3161     }
3162     migrate_fd_cleanup_schedule(s);
3163     qemu_mutex_unlock_iothread();
3164 }
3165
3166 void migration_make_urgent_request(void)
3167 {
3168     qemu_sem_post(&migrate_get_current()->rate_limit_sem);
3169 }
3170
3171 void migration_consume_urgent_request(void)
3172 {
3173     qemu_sem_wait(&migrate_get_current()->rate_limit_sem);
3174 }
3175
3176 /*
3177  * Master migration thread on the source VM.
3178  * It drives the migration and pumps the data down the outgoing channel.
3179  */
3180 static void *migration_thread(void *opaque)
3181 {
3182     MigrationState *s = opaque;
3183     int64_t setup_start = qemu_clock_get_ms(QEMU_CLOCK_HOST);
3184     MigThrError thr_error;
3185     bool urgent = false;
3186
3187     rcu_register_thread();
3188
3189     object_ref(OBJECT(s));
3190     s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3191
3192     qemu_savevm_state_header(s->to_dst_file);
3193
3194     /*
3195      * If we opened the return path, we need to make sure dst has it
3196      * opened as well.
3197      */
3198     if (s->rp_state.from_dst_file) {
3199         /* Now tell the dest that it should open its end so it can reply */
3200         qemu_savevm_send_open_return_path(s->to_dst_file);
3201
3202         /* And do a ping that will make stuff easier to debug */
3203         qemu_savevm_send_ping(s->to_dst_file, 1);
3204     }
3205
3206     if (migrate_postcopy()) {
3207         /*
3208          * Tell the destination that we *might* want to do postcopy later;
3209          * if the other end can't do postcopy it should fail now, nice and
3210          * early.
3211          */
3212         qemu_savevm_send_postcopy_advise(s->to_dst_file);
3213     }
3214
3215     if (migrate_colo_enabled()) {
3216         /* Notify migration destination that we enable COLO */
3217         qemu_savevm_send_colo_enable(s->to_dst_file);
3218     }
3219
3220     qemu_savevm_state_setup(s->to_dst_file);
3221
3222     s->setup_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) - setup_start;
3223     migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
3224                       MIGRATION_STATUS_ACTIVE);
3225
3226     trace_migration_thread_setup_complete();
3227
3228     while (s->state == MIGRATION_STATUS_ACTIVE ||
3229            s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE) {
3230         int64_t current_time;
3231
3232         if (urgent || !qemu_file_rate_limit(s->to_dst_file)) {
3233             MigIterateState iter_state = migration_iteration_run(s);
3234             if (iter_state == MIG_ITERATE_SKIP) {
3235                 continue;
3236             } else if (iter_state == MIG_ITERATE_BREAK) {
3237                 break;
3238             }
3239         }
3240
3241         /*
3242          * Try to detect any kind of failures, and see whether we
3243          * should stop the migration now.
3244          */
3245         thr_error = migration_detect_error(s);
3246         if (thr_error == MIG_THR_ERR_FATAL) {
3247             /* Stop migration */
3248             break;
3249         } else if (thr_error == MIG_THR_ERR_RECOVERED) {
3250             /*
3251              * Just recovered from a e.g. network failure, reset all
3252              * the local variables. This is important to avoid
3253              * breaking transferred_bytes and bandwidth calculation
3254              */
3255             s->iteration_start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3256             s->iteration_initial_bytes = 0;
3257         }
3258
3259         current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
3260
3261         migration_update_counters(s, current_time);
3262
3263         urgent = false;
3264         if (qemu_file_rate_limit(s->to_dst_file)) {
3265             /* Wait for a delay to do rate limiting OR
3266              * something urgent to post the semaphore.
3267              */
3268             int ms = s->iteration_start_time + BUFFER_DELAY - current_time;
3269             trace_migration_thread_ratelimit_pre(ms);
3270             if (qemu_sem_timedwait(&s->rate_limit_sem, ms) == 0) {
3271                 /* We were worken by one or more urgent things but
3272                  * the timedwait will have consumed one of them.
3273                  * The service routine for the urgent wake will dec
3274                  * the semaphore itself for each item it consumes,
3275                  * so add this one we just eat back.
3276                  */
3277                 qemu_sem_post(&s->rate_limit_sem);
3278                 urgent = true;
3279             }
3280             trace_migration_thread_ratelimit_post(urgent);
3281         }
3282     }
3283
3284     trace_migration_thread_after_loop();
3285     migration_iteration_finish(s);
3286     object_unref(OBJECT(s));
3287     rcu_unregister_thread();
3288     return NULL;
3289 }
3290
3291 void migrate_fd_connect(MigrationState *s, Error *error_in)
3292 {
3293     int64_t rate_limit;
3294     bool resume = s->state == MIGRATION_STATUS_POSTCOPY_PAUSED;
3295
3296     s->expected_downtime = s->parameters.downtime_limit;
3297     s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup_bh, s);
3298     if (error_in) {
3299         migrate_fd_error(s, error_in);
3300         migrate_fd_cleanup(s);
3301         return;
3302     }
3303
3304     if (resume) {
3305         /* This is a resumed migration */
3306         rate_limit = INT64_MAX;
3307     } else {
3308         /* This is a fresh new migration */
3309         rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO;
3310
3311         /* Notify before starting migration thread */
3312         notifier_list_notify(&migration_state_notifiers, s);
3313     }
3314
3315     qemu_file_set_rate_limit(s->to_dst_file, rate_limit);
3316     qemu_file_set_blocking(s->to_dst_file, true);
3317
3318     /*
3319      * Open the return path. For postcopy, it is used exclusively. For
3320      * precopy, only if user specified "return-path" capability would
3321      * QEMU uses the return path.
3322      */
3323     if (migrate_postcopy_ram() || migrate_use_return_path()) {
3324         if (open_return_path_on_source(s, !resume)) {
3325             error_report("Unable to open return-path for postcopy");
3326             migrate_set_state(&s->state, s->state, MIGRATION_STATUS_FAILED);
3327             migrate_fd_cleanup(s);
3328             return;
3329         }
3330     }
3331
3332     if (resume) {
3333         /* Wakeup the main migration thread to do the recovery */
3334         migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED,
3335                           MIGRATION_STATUS_POSTCOPY_RECOVER);
3336         qemu_sem_post(&s->postcopy_pause_sem);
3337         return;
3338     }
3339
3340     if (multifd_save_setup() != 0) {
3341         migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
3342                           MIGRATION_STATUS_FAILED);
3343         migrate_fd_cleanup(s);
3344         return;
3345     }
3346     qemu_thread_create(&s->thread, "live_migration", migration_thread, s,
3347                        QEMU_THREAD_JOINABLE);
3348     s->migration_thread_running = true;
3349 }
3350
3351 void migration_global_dump(Monitor *mon)
3352 {
3353     MigrationState *ms = migrate_get_current();
3354
3355     monitor_printf(mon, "globals:\n");
3356     monitor_printf(mon, "store-global-state: %s\n",
3357                    ms->store_global_state ? "on" : "off");
3358     monitor_printf(mon, "only-migratable: %s\n",
3359                    only_migratable ? "on" : "off");
3360     monitor_printf(mon, "send-configuration: %s\n",
3361                    ms->send_configuration ? "on" : "off");
3362     monitor_printf(mon, "send-section-footer: %s\n",
3363                    ms->send_section_footer ? "on" : "off");
3364     monitor_printf(mon, "decompress-error-check: %s\n",
3365                    ms->decompress_error_check ? "on" : "off");
3366     monitor_printf(mon, "clear-bitmap-shift: %u\n",
3367                    ms->clear_bitmap_shift);
3368 }
3369
3370 #define DEFINE_PROP_MIG_CAP(name, x)             \
3371     DEFINE_PROP_BOOL(name, MigrationState, enabled_capabilities[x], false)
3372
3373 static Property migration_properties[] = {
3374     DEFINE_PROP_BOOL("store-global-state", MigrationState,
3375                      store_global_state, true),
3376     DEFINE_PROP_BOOL("send-configuration", MigrationState,
3377                      send_configuration, true),
3378     DEFINE_PROP_BOOL("send-section-footer", MigrationState,
3379                      send_section_footer, true),
3380     DEFINE_PROP_BOOL("decompress-error-check", MigrationState,
3381                       decompress_error_check, true),
3382     DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState,
3383                       clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT),
3384
3385     /* Migration parameters */
3386     DEFINE_PROP_UINT8("x-compress-level", MigrationState,
3387                       parameters.compress_level,
3388                       DEFAULT_MIGRATE_COMPRESS_LEVEL),
3389     DEFINE_PROP_UINT8("x-compress-threads", MigrationState,
3390                       parameters.compress_threads,
3391                       DEFAULT_MIGRATE_COMPRESS_THREAD_COUNT),
3392     DEFINE_PROP_BOOL("x-compress-wait-thread", MigrationState,
3393                       parameters.compress_wait_thread, true),
3394     DEFINE_PROP_UINT8("x-decompress-threads", MigrationState,
3395                       parameters.decompress_threads,
3396                       DEFAULT_MIGRATE_DECOMPRESS_THREAD_COUNT),
3397     DEFINE_PROP_UINT8("x-cpu-throttle-initial", MigrationState,
3398                       parameters.cpu_throttle_initial,
3399                       DEFAULT_MIGRATE_CPU_THROTTLE_INITIAL),
3400     DEFINE_PROP_UINT8("x-cpu-throttle-increment", MigrationState,
3401                       parameters.cpu_throttle_increment,
3402                       DEFAULT_MIGRATE_CPU_THROTTLE_INCREMENT),
3403     DEFINE_PROP_SIZE("x-max-bandwidth", MigrationState,
3404                       parameters.max_bandwidth, MAX_THROTTLE),
3405     DEFINE_PROP_UINT64("x-downtime-limit", MigrationState,
3406                       parameters.downtime_limit,
3407                       DEFAULT_MIGRATE_SET_DOWNTIME),
3408     DEFINE_PROP_UINT32("x-checkpoint-delay", MigrationState,
3409                       parameters.x_checkpoint_delay,
3410                       DEFAULT_MIGRATE_X_CHECKPOINT_DELAY),
3411     DEFINE_PROP_UINT8("multifd-channels", MigrationState,
3412                       parameters.multifd_channels,
3413                       DEFAULT_MIGRATE_MULTIFD_CHANNELS),
3414     DEFINE_PROP_SIZE("xbzrle-cache-size", MigrationState,
3415                       parameters.xbzrle_cache_size,
3416                       DEFAULT_MIGRATE_XBZRLE_CACHE_SIZE),
3417     DEFINE_PROP_SIZE("max-postcopy-bandwidth", MigrationState,
3418                       parameters.max_postcopy_bandwidth,
3419                       DEFAULT_MIGRATE_MAX_POSTCOPY_BANDWIDTH),
3420     DEFINE_PROP_UINT8("max-cpu-throttle", MigrationState,
3421                       parameters.max_cpu_throttle,
3422                       DEFAULT_MIGRATE_MAX_CPU_THROTTLE),
3423     DEFINE_PROP_SIZE("announce-initial", MigrationState,
3424                       parameters.announce_initial,
3425                       DEFAULT_MIGRATE_ANNOUNCE_INITIAL),
3426     DEFINE_PROP_SIZE("announce-max", MigrationState,
3427                       parameters.announce_max,
3428                       DEFAULT_MIGRATE_ANNOUNCE_MAX),
3429     DEFINE_PROP_SIZE("announce-rounds", MigrationState,
3430                       parameters.announce_rounds,
3431                       DEFAULT_MIGRATE_ANNOUNCE_ROUNDS),
3432     DEFINE_PROP_SIZE("announce-step", MigrationState,
3433                       parameters.announce_step,
3434                       DEFAULT_MIGRATE_ANNOUNCE_STEP),
3435
3436     /* Migration capabilities */
3437     DEFINE_PROP_MIG_CAP("x-xbzrle", MIGRATION_CAPABILITY_XBZRLE),
3438     DEFINE_PROP_MIG_CAP("x-rdma-pin-all", MIGRATION_CAPABILITY_RDMA_PIN_ALL),
3439     DEFINE_PROP_MIG_CAP("x-auto-converge", MIGRATION_CAPABILITY_AUTO_CONVERGE),
3440     DEFINE_PROP_MIG_CAP("x-zero-blocks", MIGRATION_CAPABILITY_ZERO_BLOCKS),
3441     DEFINE_PROP_MIG_CAP("x-compress", MIGRATION_CAPABILITY_COMPRESS),
3442     DEFINE_PROP_MIG_CAP("x-events", MIGRATION_CAPABILITY_EVENTS),
3443     DEFINE_PROP_MIG_CAP("x-postcopy-ram", MIGRATION_CAPABILITY_POSTCOPY_RAM),
3444     DEFINE_PROP_MIG_CAP("x-colo", MIGRATION_CAPABILITY_X_COLO),
3445     DEFINE_PROP_MIG_CAP("x-release-ram", MIGRATION_CAPABILITY_RELEASE_RAM),
3446     DEFINE_PROP_MIG_CAP("x-block", MIGRATION_CAPABILITY_BLOCK),
3447     DEFINE_PROP_MIG_CAP("x-return-path", MIGRATION_CAPABILITY_RETURN_PATH),
3448     DEFINE_PROP_MIG_CAP("x-multifd", MIGRATION_CAPABILITY_MULTIFD),
3449
3450     DEFINE_PROP_END_OF_LIST(),
3451 };
3452
3453 static void migration_class_init(ObjectClass *klass, void *data)
3454 {
3455     DeviceClass *dc = DEVICE_CLASS(klass);
3456
3457     dc->user_creatable = false;
3458     dc->props = migration_properties;
3459 }
3460
3461 static void migration_instance_finalize(Object *obj)
3462 {
3463     MigrationState *ms = MIGRATION_OBJ(obj);
3464     MigrationParameters *params = &ms->parameters;
3465
3466     qemu_mutex_destroy(&ms->error_mutex);
3467     qemu_mutex_destroy(&ms->qemu_file_lock);
3468     g_free(params->tls_hostname);
3469     g_free(params->tls_creds);
3470     qemu_sem_destroy(&ms->rate_limit_sem);
3471     qemu_sem_destroy(&ms->pause_sem);
3472     qemu_sem_destroy(&ms->postcopy_pause_sem);
3473     qemu_sem_destroy(&ms->postcopy_pause_rp_sem);
3474     qemu_sem_destroy(&ms->rp_state.rp_sem);
3475     error_free(ms->error);
3476 }
3477
3478 static void migration_instance_init(Object *obj)
3479 {
3480     MigrationState *ms = MIGRATION_OBJ(obj);
3481     MigrationParameters *params = &ms->parameters;
3482
3483     ms->state = MIGRATION_STATUS_NONE;
3484     ms->mbps = -1;
3485     ms->pages_per_second = -1;
3486     qemu_sem_init(&ms->pause_sem, 0);
3487     qemu_mutex_init(&ms->error_mutex);
3488
3489     params->tls_hostname = g_strdup("");
3490     params->tls_creds = g_strdup("");
3491
3492     /* Set has_* up only for parameter checks */
3493     params->has_compress_level = true;
3494     params->has_compress_threads = true;
3495     params->has_decompress_threads = true;
3496     params->has_cpu_throttle_initial = true;
3497     params->has_cpu_throttle_increment = true;
3498     params->has_max_bandwidth = true;
3499     params->has_downtime_limit = true;
3500     params->has_x_checkpoint_delay = true;
3501     params->has_block_incremental = true;
3502     params->has_multifd_channels = true;
3503     params->has_xbzrle_cache_size = true;
3504     params->has_max_postcopy_bandwidth = true;
3505     params->has_max_cpu_throttle = true;
3506     params->has_announce_initial = true;
3507     params->has_announce_max = true;
3508     params->has_announce_rounds = true;
3509     params->has_announce_step = true;
3510
3511     qemu_sem_init(&ms->postcopy_pause_sem, 0);
3512     qemu_sem_init(&ms->postcopy_pause_rp_sem, 0);
3513     qemu_sem_init(&ms->rp_state.rp_sem, 0);
3514     qemu_sem_init(&ms->rate_limit_sem, 0);
3515     qemu_mutex_init(&ms->qemu_file_lock);
3516 }
3517
3518 /*
3519  * Return true if check pass, false otherwise. Error will be put
3520  * inside errp if provided.
3521  */
3522 static bool migration_object_check(MigrationState *ms, Error **errp)
3523 {
3524     MigrationCapabilityStatusList *head = NULL;
3525     /* Assuming all off */
3526     bool cap_list[MIGRATION_CAPABILITY__MAX] = { 0 }, ret;
3527     int i;
3528
3529     if (!migrate_params_check(&ms->parameters, errp)) {
3530         return false;
3531     }
3532
3533     for (i = 0; i < MIGRATION_CAPABILITY__MAX; i++) {
3534         if (ms->enabled_capabilities[i]) {
3535             head = migrate_cap_add(head, i, true);
3536         }
3537     }
3538
3539     ret = migrate_caps_check(cap_list, head, errp);
3540
3541     /* It works with head == NULL */
3542     qapi_free_MigrationCapabilityStatusList(head);
3543
3544     return ret;
3545 }
3546
3547 static const TypeInfo migration_type = {
3548     .name = TYPE_MIGRATION,
3549     /*
3550      * NOTE: TYPE_MIGRATION is not really a device, as the object is
3551      * not created using qdev_create(), it is not attached to the qdev
3552      * device tree, and it is never realized.
3553      *
3554      * TODO: Make this TYPE_OBJECT once QOM provides something like
3555      * TYPE_DEVICE's "-global" properties.
3556      */
3557     .parent = TYPE_DEVICE,
3558     .class_init = migration_class_init,
3559     .class_size = sizeof(MigrationClass),
3560     .instance_size = sizeof(MigrationState),
3561     .instance_init = migration_instance_init,
3562     .instance_finalize = migration_instance_finalize,
3563 };
3564
3565 static void register_migration_types(void)
3566 {
3567     type_register_static(&migration_type);
3568 }
3569
3570 type_init(register_migration_types);
This page took 0.241225 seconds and 4 git commands to generate.