4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "qemu/osdep.h"
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
38 #include "migration.h"
40 #include "migration/register.h"
41 #include "migration/misc.h"
42 #include "qemu-file.h"
43 #include "postcopy-ram.h"
44 #include "page_cache.h"
45 #include "qemu/error-report.h"
46 #include "qapi/error.h"
47 #include "qapi/qapi-events-migration.h"
48 #include "qapi/qmp/qerror.h"
50 #include "exec/ram_addr.h"
51 #include "exec/target_page.h"
52 #include "qemu/rcu_queue.h"
53 #include "migration/colo.h"
55 #include "sysemu/sysemu.h"
56 #include "qemu/uuid.h"
59 /***********************************************************/
60 /* ram save/restore */
62 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
63 * worked for pages that where filled with the same char. We switched
64 * it to only search for the zero value. And to avoid confusion with
65 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
69 #define RAM_SAVE_FLAG_ZERO 0x02
70 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
71 #define RAM_SAVE_FLAG_PAGE 0x08
72 #define RAM_SAVE_FLAG_EOS 0x10
73 #define RAM_SAVE_FLAG_CONTINUE 0x20
74 #define RAM_SAVE_FLAG_XBZRLE 0x40
75 /* 0x80 is reserved in migration.h start with 0x100 next */
76 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78 static inline bool is_zero_range(uint8_t *p, uint64_t size)
80 return buffer_is_zero(p, size);
83 XBZRLECacheStats xbzrle_counters;
85 /* struct contains XBZRLE cache and a static page
86 used by the compression */
88 /* buffer used for XBZRLE encoding */
90 /* buffer for storing page content */
92 /* Cache for XBZRLE, Protected by lock. */
95 /* it will store a page full of zeros */
96 uint8_t *zero_target_page;
97 /* buffer used for XBZRLE decoding */
101 static void XBZRLE_cache_lock(void)
103 if (migrate_use_xbzrle())
104 qemu_mutex_lock(&XBZRLE.lock);
107 static void XBZRLE_cache_unlock(void)
109 if (migrate_use_xbzrle())
110 qemu_mutex_unlock(&XBZRLE.lock);
114 * xbzrle_cache_resize: resize the xbzrle cache
116 * This function is called from qmp_migrate_set_cache_size in main
117 * thread, possibly while a migration is in progress. A running
118 * migration may be using the cache and might finish during this call,
119 * hence changes to the cache are protected by XBZRLE.lock().
121 * Returns 0 for success or -1 for error
123 * @new_size: new cache size
124 * @errp: set *errp if the check failed, with reason
126 int xbzrle_cache_resize(int64_t new_size, Error **errp)
128 PageCache *new_cache;
131 /* Check for truncation */
132 if (new_size != (size_t)new_size) {
133 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
134 "exceeding address space");
138 if (new_size == migrate_xbzrle_cache_size()) {
145 if (XBZRLE.cache != NULL) {
146 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
152 cache_fini(XBZRLE.cache);
153 XBZRLE.cache = new_cache;
156 XBZRLE_cache_unlock();
160 /* Should be holding either ram_list.mutex, or the RCU lock. */
161 #define RAMBLOCK_FOREACH_MIGRATABLE(block) \
162 INTERNAL_RAMBLOCK_FOREACH(block) \
163 if (!qemu_ram_is_migratable(block)) {} else
165 #undef RAMBLOCK_FOREACH
167 static void ramblock_recv_map_init(void)
171 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
172 assert(!rb->receivedmap);
173 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
177 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
179 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
183 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
185 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
188 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
190 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
193 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
196 bitmap_set_atomic(rb->receivedmap,
197 ramblock_recv_bitmap_offset(host_addr, rb),
201 #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
204 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
206 * Returns >0 if success with sent bytes, or <0 if error.
208 int64_t ramblock_recv_bitmap_send(QEMUFile *file,
209 const char *block_name)
211 RAMBlock *block = qemu_ram_block_by_name(block_name);
212 unsigned long *le_bitmap, nbits;
216 error_report("%s: invalid block name: %s", __func__, block_name);
220 nbits = block->used_length >> TARGET_PAGE_BITS;
223 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
224 * machines we may need 4 more bytes for padding (see below
225 * comment). So extend it a bit before hand.
227 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
230 * Always use little endian when sending the bitmap. This is
231 * required that when source and destination VMs are not using the
232 * same endianess. (Note: big endian won't work.)
234 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
236 /* Size of the bitmap, in bytes */
240 * size is always aligned to 8 bytes for 64bit machines, but it
241 * may not be true for 32bit machines. We need this padding to
242 * make sure the migration can survive even between 32bit and
245 size = ROUND_UP(size, 8);
247 qemu_put_be64(file, size);
248 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
250 * Mark as an end, in case the middle part is screwed up due to
251 * some "misterious" reason.
253 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
258 if (qemu_file_get_error(file)) {
259 return qemu_file_get_error(file);
262 return size + sizeof(size);
266 * An outstanding page request, on the source, having been received
269 struct RAMSrcPageRequest {
274 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
277 /* State of RAM for migration */
279 /* QEMUFile used for this migration */
281 /* Last block that we have visited searching for dirty pages */
282 RAMBlock *last_seen_block;
283 /* Last block from where we have sent data */
284 RAMBlock *last_sent_block;
285 /* Last dirty target page we have sent */
286 ram_addr_t last_page;
287 /* last ram version we have seen */
288 uint32_t last_version;
289 /* We are in the first round */
291 /* How many times we have dirty too many pages */
292 int dirty_rate_high_cnt;
293 /* these variables are used for bitmap sync */
294 /* last time we did a full bitmap_sync */
295 int64_t time_last_bitmap_sync;
296 /* bytes transferred at start_time */
297 uint64_t bytes_xfer_prev;
298 /* number of dirty pages since start_time */
299 uint64_t num_dirty_pages_period;
300 /* xbzrle misses since the beginning of the period */
301 uint64_t xbzrle_cache_miss_prev;
302 /* number of iterations at the beginning of period */
303 uint64_t iterations_prev;
304 /* Iterations since start */
306 /* number of dirty bits in the bitmap */
307 uint64_t migration_dirty_pages;
308 /* protects modification of the bitmap */
309 QemuMutex bitmap_mutex;
310 /* The RAMBlock used in the last src_page_requests */
311 RAMBlock *last_req_rb;
312 /* Queue of outstanding page requests from the destination */
313 QemuMutex src_page_req_mutex;
314 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
316 typedef struct RAMState RAMState;
318 static RAMState *ram_state;
320 uint64_t ram_bytes_remaining(void)
322 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
326 MigrationStats ram_counters;
328 /* used by the search for pages to send */
329 struct PageSearchStatus {
330 /* Current block being searched */
332 /* Current page to search from */
334 /* Set once we wrap around */
337 typedef struct PageSearchStatus PageSearchStatus;
339 struct CompressParam {
348 /* internally used fields */
352 typedef struct CompressParam CompressParam;
354 struct DecompressParam {
364 typedef struct DecompressParam DecompressParam;
366 static CompressParam *comp_param;
367 static QemuThread *compress_threads;
368 /* comp_done_cond is used to wake up the migration thread when
369 * one of the compression threads has finished the compression.
370 * comp_done_lock is used to co-work with comp_done_cond.
372 static QemuMutex comp_done_lock;
373 static QemuCond comp_done_cond;
374 /* The empty QEMUFileOps will be used by file in CompressParam */
375 static const QEMUFileOps empty_ops = { };
377 static QEMUFile *decomp_file;
378 static DecompressParam *decomp_param;
379 static QemuThread *decompress_threads;
380 static QemuMutex decomp_done_lock;
381 static QemuCond decomp_done_cond;
383 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
384 ram_addr_t offset, uint8_t *source_buf);
386 static void *do_data_compress(void *opaque)
388 CompressParam *param = opaque;
392 qemu_mutex_lock(¶m->mutex);
393 while (!param->quit) {
395 block = param->block;
396 offset = param->offset;
398 qemu_mutex_unlock(¶m->mutex);
400 do_compress_ram_page(param->file, ¶m->stream, block, offset,
403 qemu_mutex_lock(&comp_done_lock);
405 qemu_cond_signal(&comp_done_cond);
406 qemu_mutex_unlock(&comp_done_lock);
408 qemu_mutex_lock(¶m->mutex);
410 qemu_cond_wait(¶m->cond, ¶m->mutex);
413 qemu_mutex_unlock(¶m->mutex);
418 static inline void terminate_compression_threads(void)
420 int idx, thread_count;
422 thread_count = migrate_compress_threads();
424 for (idx = 0; idx < thread_count; idx++) {
425 qemu_mutex_lock(&comp_param[idx].mutex);
426 comp_param[idx].quit = true;
427 qemu_cond_signal(&comp_param[idx].cond);
428 qemu_mutex_unlock(&comp_param[idx].mutex);
432 static void compress_threads_save_cleanup(void)
436 if (!migrate_use_compression()) {
439 terminate_compression_threads();
440 thread_count = migrate_compress_threads();
441 for (i = 0; i < thread_count; i++) {
443 * we use it as a indicator which shows if the thread is
444 * properly init'd or not
446 if (!comp_param[i].file) {
449 qemu_thread_join(compress_threads + i);
450 qemu_mutex_destroy(&comp_param[i].mutex);
451 qemu_cond_destroy(&comp_param[i].cond);
452 deflateEnd(&comp_param[i].stream);
453 g_free(comp_param[i].originbuf);
454 qemu_fclose(comp_param[i].file);
455 comp_param[i].file = NULL;
457 qemu_mutex_destroy(&comp_done_lock);
458 qemu_cond_destroy(&comp_done_cond);
459 g_free(compress_threads);
461 compress_threads = NULL;
465 static int compress_threads_save_setup(void)
469 if (!migrate_use_compression()) {
472 thread_count = migrate_compress_threads();
473 compress_threads = g_new0(QemuThread, thread_count);
474 comp_param = g_new0(CompressParam, thread_count);
475 qemu_cond_init(&comp_done_cond);
476 qemu_mutex_init(&comp_done_lock);
477 for (i = 0; i < thread_count; i++) {
478 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
479 if (!comp_param[i].originbuf) {
483 if (deflateInit(&comp_param[i].stream,
484 migrate_compress_level()) != Z_OK) {
485 g_free(comp_param[i].originbuf);
489 /* comp_param[i].file is just used as a dummy buffer to save data,
490 * set its ops to empty.
492 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
493 comp_param[i].done = true;
494 comp_param[i].quit = false;
495 qemu_mutex_init(&comp_param[i].mutex);
496 qemu_cond_init(&comp_param[i].cond);
497 qemu_thread_create(compress_threads + i, "compress",
498 do_data_compress, comp_param + i,
499 QEMU_THREAD_JOINABLE);
504 compress_threads_save_cleanup();
510 #define MULTIFD_MAGIC 0x11223344U
511 #define MULTIFD_VERSION 1
516 unsigned char uuid[16]; /* QemuUUID */
518 } __attribute__((packed)) MultiFDInit_t;
529 } __attribute__((packed)) MultiFDPacket_t;
532 /* number of used pages */
534 /* number of allocated pages */
536 /* global number of generated multifd packets */
538 /* offset of each page */
540 /* pointer to each page */
546 /* this fields are not changed once the thread is created */
549 /* channel thread name */
551 /* channel thread id */
553 /* communication channel */
555 /* sem where to wait for more work */
557 /* this mutex protects the following parameters */
559 /* is this channel thread running */
561 /* should this thread finish */
563 /* thread has work to do */
565 /* array of pages to sent */
566 MultiFDPages_t *pages;
567 /* packet allocated len */
569 /* pointer to the packet */
570 MultiFDPacket_t *packet;
571 /* multifd flags for each packet */
573 /* global number of generated multifd packets */
575 /* thread local variables */
576 /* packets sent through this channel */
577 uint64_t num_packets;
578 /* pages sent through this channel */
583 /* this fields are not changed once the thread is created */
586 /* channel thread name */
588 /* channel thread id */
590 /* communication channel */
592 /* sem where to wait for more work */
594 /* this mutex protects the following parameters */
596 /* is this channel thread running */
598 /* should this thread finish */
600 /* thread has work to do */
602 /* array of pages to receive */
603 MultiFDPages_t *pages;
604 /* packet allocated len */
606 /* pointer to the packet */
607 MultiFDPacket_t *packet;
608 /* multifd flags for each packet */
610 /* global number of generated multifd packets */
612 /* thread local variables */
613 /* packets sent through this channel */
614 uint64_t num_packets;
615 /* pages sent through this channel */
619 static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
624 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
625 msg.version = cpu_to_be32(MULTIFD_VERSION);
627 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
629 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
636 static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
641 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
646 be32_to_cpus(&msg.magic);
647 be32_to_cpus(&msg.version);
649 if (msg.magic != MULTIFD_MAGIC) {
650 error_setg(errp, "multifd: received packet magic %x "
651 "expected %x", msg.magic, MULTIFD_MAGIC);
655 if (msg.version != MULTIFD_VERSION) {
656 error_setg(errp, "multifd: received packet version %d "
657 "expected %d", msg.version, MULTIFD_VERSION);
661 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
662 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
663 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
665 error_setg(errp, "multifd: received uuid '%s' and expected "
666 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
672 if (msg.id > migrate_multifd_channels()) {
673 error_setg(errp, "multifd: received channel version %d "
674 "expected %d", msg.version, MULTIFD_VERSION);
681 static MultiFDPages_t *multifd_pages_init(size_t size)
683 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
685 pages->allocated = size;
686 pages->iov = g_new0(struct iovec, size);
687 pages->offset = g_new0(ram_addr_t, size);
692 static void multifd_pages_clear(MultiFDPages_t *pages)
695 pages->allocated = 0;
696 pages->packet_num = 0;
700 g_free(pages->offset);
701 pages->offset = NULL;
705 static void multifd_send_fill_packet(MultiFDSendParams *p)
707 MultiFDPacket_t *packet = p->packet;
710 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
711 packet->version = cpu_to_be32(MULTIFD_VERSION);
712 packet->flags = cpu_to_be32(p->flags);
713 packet->size = cpu_to_be32(migrate_multifd_page_count());
714 packet->used = cpu_to_be32(p->pages->used);
715 packet->packet_num = cpu_to_be64(p->packet_num);
717 if (p->pages->block) {
718 strncpy(packet->ramblock, p->pages->block->idstr, 256);
721 for (i = 0; i < p->pages->used; i++) {
722 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
726 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
728 MultiFDPacket_t *packet = p->packet;
732 /* ToDo: We can't use it until we haven't received a message */
735 be32_to_cpus(&packet->magic);
736 if (packet->magic != MULTIFD_MAGIC) {
737 error_setg(errp, "multifd: received packet "
738 "magic %x and expected magic %x",
739 packet->magic, MULTIFD_MAGIC);
743 be32_to_cpus(&packet->version);
744 if (packet->version != MULTIFD_VERSION) {
745 error_setg(errp, "multifd: received packet "
746 "version %d and expected version %d",
747 packet->version, MULTIFD_VERSION);
751 p->flags = be32_to_cpu(packet->flags);
753 be32_to_cpus(&packet->size);
754 if (packet->size > migrate_multifd_page_count()) {
755 error_setg(errp, "multifd: received packet "
756 "with size %d and expected maximum size %d",
757 packet->size, migrate_multifd_page_count()) ;
761 p->pages->used = be32_to_cpu(packet->used);
762 if (p->pages->used > packet->size) {
763 error_setg(errp, "multifd: received packet "
764 "with size %d and expected maximum size %d",
765 p->pages->used, packet->size) ;
769 p->packet_num = be64_to_cpu(packet->packet_num);
771 if (p->pages->used) {
772 /* make sure that ramblock is 0 terminated */
773 packet->ramblock[255] = 0;
774 block = qemu_ram_block_by_name(packet->ramblock);
776 error_setg(errp, "multifd: unknown ram block %s",
782 for (i = 0; i < p->pages->used; i++) {
783 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
785 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
786 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
787 " (max " RAM_ADDR_FMT ")",
788 offset, block->max_length);
791 p->pages->iov[i].iov_base = block->host + offset;
792 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
799 MultiFDSendParams *params;
800 /* number of created threads */
802 /* array of pages to sent */
803 MultiFDPages_t *pages;
804 } *multifd_send_state;
806 static void multifd_send_terminate_threads(Error *err)
811 MigrationState *s = migrate_get_current();
812 migrate_set_error(s, err);
813 if (s->state == MIGRATION_STATUS_SETUP ||
814 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
815 s->state == MIGRATION_STATUS_DEVICE ||
816 s->state == MIGRATION_STATUS_ACTIVE) {
817 migrate_set_state(&s->state, s->state,
818 MIGRATION_STATUS_FAILED);
822 for (i = 0; i < migrate_multifd_channels(); i++) {
823 MultiFDSendParams *p = &multifd_send_state->params[i];
825 qemu_mutex_lock(&p->mutex);
827 qemu_sem_post(&p->sem);
828 qemu_mutex_unlock(&p->mutex);
832 int multifd_save_cleanup(Error **errp)
837 if (!migrate_use_multifd()) {
840 multifd_send_terminate_threads(NULL);
841 for (i = 0; i < migrate_multifd_channels(); i++) {
842 MultiFDSendParams *p = &multifd_send_state->params[i];
845 qemu_thread_join(&p->thread);
847 socket_send_channel_destroy(p->c);
849 qemu_mutex_destroy(&p->mutex);
850 qemu_sem_destroy(&p->sem);
853 multifd_pages_clear(p->pages);
859 g_free(multifd_send_state->params);
860 multifd_send_state->params = NULL;
861 multifd_pages_clear(multifd_send_state->pages);
862 multifd_send_state->pages = NULL;
863 g_free(multifd_send_state);
864 multifd_send_state = NULL;
868 static void *multifd_send_thread(void *opaque)
870 MultiFDSendParams *p = opaque;
871 Error *local_err = NULL;
873 trace_multifd_send_thread_start(p->id);
875 if (multifd_send_initial_packet(p, &local_err) < 0) {
882 qemu_sem_wait(&p->sem);
883 qemu_mutex_lock(&p->mutex);
885 if (p->pending_job) {
886 uint32_t used = p->pages->used;
887 uint64_t packet_num = p->packet_num;
888 uint32_t flags = p->flags;
890 multifd_send_fill_packet(p);
893 p->num_pages += used;
895 qemu_mutex_unlock(&p->mutex);
897 trace_multifd_send(p->id, packet_num, used, flags);
899 /* ToDo: send packet here */
901 qemu_mutex_lock(&p->mutex);
903 qemu_mutex_unlock(&p->mutex);
905 } else if (p->quit) {
906 qemu_mutex_unlock(&p->mutex);
909 qemu_mutex_unlock(&p->mutex);
910 /* this is impossible */
911 error_setg(&local_err, "multifd_send_thread: Unknown command");
917 multifd_send_terminate_threads(local_err);
920 qemu_mutex_lock(&p->mutex);
922 qemu_mutex_unlock(&p->mutex);
924 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
929 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
931 MultiFDSendParams *p = opaque;
932 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
933 Error *local_err = NULL;
935 if (qio_task_propagate_error(task, &local_err)) {
936 if (multifd_save_cleanup(&local_err) != 0) {
937 migrate_set_error(migrate_get_current(), local_err);
940 p->c = QIO_CHANNEL(sioc);
941 qio_channel_set_delay(p->c, false);
943 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
944 QEMU_THREAD_JOINABLE);
946 atomic_inc(&multifd_send_state->count);
950 int multifd_save_setup(void)
953 uint32_t page_count = migrate_multifd_page_count();
956 if (!migrate_use_multifd()) {
959 thread_count = migrate_multifd_channels();
960 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
961 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
962 atomic_set(&multifd_send_state->count, 0);
963 multifd_send_state->pages = multifd_pages_init(page_count);
965 for (i = 0; i < thread_count; i++) {
966 MultiFDSendParams *p = &multifd_send_state->params[i];
968 qemu_mutex_init(&p->mutex);
969 qemu_sem_init(&p->sem, 0);
973 p->pages = multifd_pages_init(page_count);
974 p->packet_len = sizeof(MultiFDPacket_t)
975 + sizeof(ram_addr_t) * page_count;
976 p->packet = g_malloc0(p->packet_len);
977 p->name = g_strdup_printf("multifdsend_%d", i);
978 socket_send_channel_create(multifd_new_send_channel_async, p);
984 MultiFDRecvParams *params;
985 /* number of created threads */
987 } *multifd_recv_state;
989 static void multifd_recv_terminate_threads(Error *err)
994 MigrationState *s = migrate_get_current();
995 migrate_set_error(s, err);
996 if (s->state == MIGRATION_STATUS_SETUP ||
997 s->state == MIGRATION_STATUS_ACTIVE) {
998 migrate_set_state(&s->state, s->state,
999 MIGRATION_STATUS_FAILED);
1003 for (i = 0; i < migrate_multifd_channels(); i++) {
1004 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1006 qemu_mutex_lock(&p->mutex);
1008 qemu_sem_post(&p->sem);
1009 qemu_mutex_unlock(&p->mutex);
1013 int multifd_load_cleanup(Error **errp)
1018 if (!migrate_use_multifd()) {
1021 multifd_recv_terminate_threads(NULL);
1022 for (i = 0; i < migrate_multifd_channels(); i++) {
1023 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1026 qemu_thread_join(&p->thread);
1028 object_unref(OBJECT(p->c));
1030 qemu_mutex_destroy(&p->mutex);
1031 qemu_sem_destroy(&p->sem);
1034 multifd_pages_clear(p->pages);
1040 g_free(multifd_recv_state->params);
1041 multifd_recv_state->params = NULL;
1042 g_free(multifd_recv_state);
1043 multifd_recv_state = NULL;
1048 static void *multifd_recv_thread(void *opaque)
1050 MultiFDRecvParams *p = opaque;
1051 Error *local_err = NULL;
1054 trace_multifd_recv_thread_start(p->id);
1057 qemu_sem_wait(&p->sem);
1058 qemu_mutex_lock(&p->mutex);
1059 if (p->pending_job) {
1062 qemu_mutex_unlock(&p->mutex);
1064 /* ToDo: recv packet here */
1066 qemu_mutex_lock(&p->mutex);
1067 ret = multifd_recv_unfill_packet(p, &local_err);
1069 qemu_mutex_unlock(&p->mutex);
1073 used = p->pages->used;
1075 trace_multifd_recv(p->id, p->packet_num, used, flags);
1076 p->pending_job = false;
1078 p->num_pages += used;
1079 qemu_mutex_unlock(&p->mutex);
1080 } else if (p->quit) {
1081 qemu_mutex_unlock(&p->mutex);
1084 qemu_mutex_unlock(&p->mutex);
1085 /* this is impossible */
1086 error_setg(&local_err, "multifd_recv_thread: Unknown command");
1091 multifd_recv_terminate_threads(local_err);
1093 qemu_mutex_lock(&p->mutex);
1095 qemu_mutex_unlock(&p->mutex);
1097 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1102 int multifd_load_setup(void)
1105 uint32_t page_count = migrate_multifd_page_count();
1108 if (!migrate_use_multifd()) {
1111 thread_count = migrate_multifd_channels();
1112 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1113 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
1114 atomic_set(&multifd_recv_state->count, 0);
1116 for (i = 0; i < thread_count; i++) {
1117 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1119 qemu_mutex_init(&p->mutex);
1120 qemu_sem_init(&p->sem, 0);
1122 p->pending_job = false;
1124 p->pages = multifd_pages_init(page_count);
1125 p->packet_len = sizeof(MultiFDPacket_t)
1126 + sizeof(ram_addr_t) * page_count;
1127 p->packet = g_malloc0(p->packet_len);
1128 p->name = g_strdup_printf("multifdrecv_%d", i);
1133 bool multifd_recv_all_channels_created(void)
1135 int thread_count = migrate_multifd_channels();
1137 if (!migrate_use_multifd()) {
1141 return thread_count == atomic_read(&multifd_recv_state->count);
1144 void multifd_recv_new_channel(QIOChannel *ioc)
1146 MultiFDRecvParams *p;
1147 Error *local_err = NULL;
1150 id = multifd_recv_initial_packet(ioc, &local_err);
1152 multifd_recv_terminate_threads(local_err);
1156 p = &multifd_recv_state->params[id];
1158 error_setg(&local_err, "multifd: received id '%d' already setup'",
1160 multifd_recv_terminate_threads(local_err);
1164 object_ref(OBJECT(ioc));
1165 /* initial packet */
1169 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1170 QEMU_THREAD_JOINABLE);
1171 atomic_inc(&multifd_recv_state->count);
1172 if (multifd_recv_state->count == migrate_multifd_channels()) {
1173 migration_incoming_process();
1178 * save_page_header: write page header to wire
1180 * If this is the 1st block, it also writes the block identification
1182 * Returns the number of bytes written
1184 * @f: QEMUFile where to send the data
1185 * @block: block that contains the page we want to send
1186 * @offset: offset inside the block for the page
1187 * in the lower bits, it contains flags
1189 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1194 if (block == rs->last_sent_block) {
1195 offset |= RAM_SAVE_FLAG_CONTINUE;
1197 qemu_put_be64(f, offset);
1200 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
1201 len = strlen(block->idstr);
1202 qemu_put_byte(f, len);
1203 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
1205 rs->last_sent_block = block;
1211 * mig_throttle_guest_down: throotle down the guest
1213 * Reduce amount of guest cpu execution to hopefully slow down memory
1214 * writes. If guest dirty memory rate is reduced below the rate at
1215 * which we can transfer pages to the destination then we should be
1216 * able to complete migration. Some workloads dirty memory way too
1217 * fast and will not effectively converge, even with auto-converge.
1219 static void mig_throttle_guest_down(void)
1221 MigrationState *s = migrate_get_current();
1222 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1223 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
1225 /* We have not started throttling yet. Let's start it. */
1226 if (!cpu_throttle_active()) {
1227 cpu_throttle_set(pct_initial);
1229 /* Throttling already on, just increase the rate */
1230 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
1235 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1237 * @rs: current RAM state
1238 * @current_addr: address for the zero page
1240 * Update the xbzrle cache to reflect a page that's been sent as all 0.
1241 * The important thing is that a stale (not-yet-0'd) page be replaced
1243 * As a bonus, if the page wasn't in the cache it gets added so that
1244 * when a small write is made into the 0'd page it gets XBZRLE sent.
1246 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
1248 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1252 /* We don't care if this fails to allocate a new cache page
1253 * as long as it updated an old one */
1254 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
1255 ram_counters.dirty_sync_count);
1258 #define ENCODING_FLAG_XBZRLE 0x1
1261 * save_xbzrle_page: compress and send current page
1263 * Returns: 1 means that we wrote the page
1264 * 0 means that page is identical to the one already sent
1265 * -1 means that xbzrle would be longer than normal
1267 * @rs: current RAM state
1268 * @current_data: pointer to the address of the page contents
1269 * @current_addr: addr of the page
1270 * @block: block that contains the page we want to send
1271 * @offset: offset inside the block for the page
1272 * @last_stage: if we are at the completion stage
1274 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
1275 ram_addr_t current_addr, RAMBlock *block,
1276 ram_addr_t offset, bool last_stage)
1278 int encoded_len = 0, bytes_xbzrle;
1279 uint8_t *prev_cached_page;
1281 if (!cache_is_cached(XBZRLE.cache, current_addr,
1282 ram_counters.dirty_sync_count)) {
1283 xbzrle_counters.cache_miss++;
1285 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
1286 ram_counters.dirty_sync_count) == -1) {
1289 /* update *current_data when the page has been
1290 inserted into cache */
1291 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1297 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1299 /* save current buffer into memory */
1300 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1302 /* XBZRLE encoding (if there is no overflow) */
1303 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1304 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1306 if (encoded_len == 0) {
1307 trace_save_xbzrle_page_skipping();
1309 } else if (encoded_len == -1) {
1310 trace_save_xbzrle_page_overflow();
1311 xbzrle_counters.overflow++;
1312 /* update data in the cache */
1314 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1315 *current_data = prev_cached_page;
1320 /* we need to update the data in the cache, in order to get the same data */
1322 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1325 /* Send XBZRLE based compressed page */
1326 bytes_xbzrle = save_page_header(rs, rs->f, block,
1327 offset | RAM_SAVE_FLAG_XBZRLE);
1328 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1329 qemu_put_be16(rs->f, encoded_len);
1330 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
1331 bytes_xbzrle += encoded_len + 1 + 2;
1332 xbzrle_counters.pages++;
1333 xbzrle_counters.bytes += bytes_xbzrle;
1334 ram_counters.transferred += bytes_xbzrle;
1340 * migration_bitmap_find_dirty: find the next dirty page from start
1342 * Called with rcu_read_lock() to protect migration_bitmap
1344 * Returns the byte offset within memory region of the start of a dirty page
1346 * @rs: current RAM state
1347 * @rb: RAMBlock where to search for dirty pages
1348 * @start: page where we start the search
1351 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
1352 unsigned long start)
1354 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1355 unsigned long *bitmap = rb->bmap;
1358 if (!qemu_ram_is_migratable(rb)) {
1362 if (rs->ram_bulk_stage && start > 0) {
1365 next = find_next_bit(bitmap, size, start);
1371 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
1377 ret = test_and_clear_bit(page, rb->bmap);
1380 rs->migration_dirty_pages--;
1385 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1386 ram_addr_t start, ram_addr_t length)
1388 rs->migration_dirty_pages +=
1389 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
1390 &rs->num_dirty_pages_period);
1394 * ram_pagesize_summary: calculate all the pagesizes of a VM
1396 * Returns a summary bitmap of the page sizes of all RAMBlocks
1398 * For VMs with just normal pages this is equivalent to the host page
1399 * size. If it's got some huge pages then it's the OR of all the
1400 * different page sizes.
1402 uint64_t ram_pagesize_summary(void)
1405 uint64_t summary = 0;
1407 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1408 summary |= block->page_size;
1414 static void migration_update_rates(RAMState *rs, int64_t end_time)
1416 uint64_t iter_count = rs->iterations - rs->iterations_prev;
1418 /* calculate period counters */
1419 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1420 / (end_time - rs->time_last_bitmap_sync);
1426 if (migrate_use_xbzrle()) {
1427 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1428 rs->xbzrle_cache_miss_prev) / iter_count;
1429 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1433 static void migration_bitmap_sync(RAMState *rs)
1437 uint64_t bytes_xfer_now;
1439 ram_counters.dirty_sync_count++;
1441 if (!rs->time_last_bitmap_sync) {
1442 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1445 trace_migration_bitmap_sync_start();
1446 memory_global_dirty_log_sync();
1448 qemu_mutex_lock(&rs->bitmap_mutex);
1450 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1451 migration_bitmap_sync_range(rs, block, 0, block->used_length);
1453 ram_counters.remaining = ram_bytes_remaining();
1455 qemu_mutex_unlock(&rs->bitmap_mutex);
1457 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1459 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1461 /* more than 1 second = 1000 millisecons */
1462 if (end_time > rs->time_last_bitmap_sync + 1000) {
1463 bytes_xfer_now = ram_counters.transferred;
1465 /* During block migration the auto-converge logic incorrectly detects
1466 * that ram migration makes no progress. Avoid this by disabling the
1467 * throttling logic during the bulk phase of block migration. */
1468 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1469 /* The following detection logic can be refined later. For now:
1470 Check to see if the dirtied bytes is 50% more than the approx.
1471 amount of bytes that just got transferred since the last time we
1472 were in this routine. If that happens twice, start or increase
1475 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
1476 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
1477 (++rs->dirty_rate_high_cnt >= 2)) {
1478 trace_migration_throttle();
1479 rs->dirty_rate_high_cnt = 0;
1480 mig_throttle_guest_down();
1484 migration_update_rates(rs, end_time);
1486 rs->iterations_prev = rs->iterations;
1488 /* reset period counters */
1489 rs->time_last_bitmap_sync = end_time;
1490 rs->num_dirty_pages_period = 0;
1491 rs->bytes_xfer_prev = bytes_xfer_now;
1493 if (migrate_use_events()) {
1494 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
1499 * save_zero_page: send the zero page to the stream
1501 * Returns the number of pages written.
1503 * @rs: current RAM state
1504 * @block: block that contains the page we want to send
1505 * @offset: offset inside the block for the page
1507 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1509 uint8_t *p = block->host + offset;
1512 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1513 ram_counters.duplicate++;
1514 ram_counters.transferred +=
1515 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
1516 qemu_put_byte(rs->f, 0);
1517 ram_counters.transferred += 1;
1524 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1526 if (!migrate_release_ram() || !migration_in_postcopy()) {
1530 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
1534 * @pages: the number of pages written by the control path,
1536 * > 0 - number of pages written
1538 * Return true if the pages has been saved, otherwise false is returned.
1540 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1543 uint64_t bytes_xmit = 0;
1547 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1549 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1554 ram_counters.transferred += bytes_xmit;
1558 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1562 if (bytes_xmit > 0) {
1563 ram_counters.normal++;
1564 } else if (bytes_xmit == 0) {
1565 ram_counters.duplicate++;
1572 * directly send the page to the stream
1574 * Returns the number of pages written.
1576 * @rs: current RAM state
1577 * @block: block that contains the page we want to send
1578 * @offset: offset inside the block for the page
1579 * @buf: the page to be sent
1580 * @async: send to page asyncly
1582 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1583 uint8_t *buf, bool async)
1585 ram_counters.transferred += save_page_header(rs, rs->f, block,
1586 offset | RAM_SAVE_FLAG_PAGE);
1588 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1589 migrate_release_ram() &
1590 migration_in_postcopy());
1592 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1594 ram_counters.transferred += TARGET_PAGE_SIZE;
1595 ram_counters.normal++;
1600 * ram_save_page: send the given page to the stream
1602 * Returns the number of pages written.
1604 * >=0 - Number of pages written - this might legally be 0
1605 * if xbzrle noticed the page was the same.
1607 * @rs: current RAM state
1608 * @block: block that contains the page we want to send
1609 * @offset: offset inside the block for the page
1610 * @last_stage: if we are at the completion stage
1612 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1616 bool send_async = true;
1617 RAMBlock *block = pss->block;
1618 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1619 ram_addr_t current_addr = block->offset + offset;
1621 p = block->host + offset;
1622 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1624 XBZRLE_cache_lock();
1625 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1626 migrate_use_xbzrle()) {
1627 pages = save_xbzrle_page(rs, &p, current_addr, block,
1628 offset, last_stage);
1630 /* Can't send this cached data async, since the cache page
1631 * might get updated before it gets to the wire
1637 /* XBZRLE overflow or normal page */
1639 pages = save_normal_page(rs, block, offset, p, send_async);
1642 XBZRLE_cache_unlock();
1647 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1648 ram_addr_t offset, uint8_t *source_buf)
1650 RAMState *rs = ram_state;
1651 int bytes_sent, blen;
1652 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1654 bytes_sent = save_page_header(rs, f, block, offset |
1655 RAM_SAVE_FLAG_COMPRESS_PAGE);
1658 * copy it to a internal buffer to avoid it being modified by VM
1659 * so that we can catch up the error during compression and
1662 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1663 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1666 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1667 error_report("compressed data failed!");
1670 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1676 static void flush_compressed_data(RAMState *rs)
1678 int idx, len, thread_count;
1680 if (!migrate_use_compression()) {
1683 thread_count = migrate_compress_threads();
1685 qemu_mutex_lock(&comp_done_lock);
1686 for (idx = 0; idx < thread_count; idx++) {
1687 while (!comp_param[idx].done) {
1688 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1691 qemu_mutex_unlock(&comp_done_lock);
1693 for (idx = 0; idx < thread_count; idx++) {
1694 qemu_mutex_lock(&comp_param[idx].mutex);
1695 if (!comp_param[idx].quit) {
1696 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1697 ram_counters.transferred += len;
1699 qemu_mutex_unlock(&comp_param[idx].mutex);
1703 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1706 param->block = block;
1707 param->offset = offset;
1710 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1713 int idx, thread_count, bytes_xmit = -1, pages = -1;
1715 thread_count = migrate_compress_threads();
1716 qemu_mutex_lock(&comp_done_lock);
1718 for (idx = 0; idx < thread_count; idx++) {
1719 if (comp_param[idx].done) {
1720 comp_param[idx].done = false;
1721 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1722 qemu_mutex_lock(&comp_param[idx].mutex);
1723 set_compress_params(&comp_param[idx], block, offset);
1724 qemu_cond_signal(&comp_param[idx].cond);
1725 qemu_mutex_unlock(&comp_param[idx].mutex);
1727 ram_counters.normal++;
1728 ram_counters.transferred += bytes_xmit;
1735 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1738 qemu_mutex_unlock(&comp_done_lock);
1744 * find_dirty_block: find the next dirty page and update any state
1745 * associated with the search process.
1747 * Returns if a page is found
1749 * @rs: current RAM state
1750 * @pss: data about the state of the current dirty page scan
1751 * @again: set to false if the search has scanned the whole of RAM
1753 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1755 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1756 if (pss->complete_round && pss->block == rs->last_seen_block &&
1757 pss->page >= rs->last_page) {
1759 * We've been once around the RAM and haven't found anything.
1765 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1766 /* Didn't find anything in this RAM Block */
1768 pss->block = QLIST_NEXT_RCU(pss->block, next);
1770 /* Hit the end of the list */
1771 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1772 /* Flag that we've looped */
1773 pss->complete_round = true;
1774 rs->ram_bulk_stage = false;
1775 if (migrate_use_xbzrle()) {
1776 /* If xbzrle is on, stop using the data compression at this
1777 * point. In theory, xbzrle can do better than compression.
1779 flush_compressed_data(rs);
1782 /* Didn't find anything this time, but try again on the new block */
1786 /* Can go around again, but... */
1788 /* We've found something so probably don't need to */
1794 * unqueue_page: gets a page of the queue
1796 * Helper for 'get_queued_page' - gets a page off the queue
1798 * Returns the block of the page (or NULL if none available)
1800 * @rs: current RAM state
1801 * @offset: used to return the offset within the RAMBlock
1803 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1805 RAMBlock *block = NULL;
1807 qemu_mutex_lock(&rs->src_page_req_mutex);
1808 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1809 struct RAMSrcPageRequest *entry =
1810 QSIMPLEQ_FIRST(&rs->src_page_requests);
1812 *offset = entry->offset;
1814 if (entry->len > TARGET_PAGE_SIZE) {
1815 entry->len -= TARGET_PAGE_SIZE;
1816 entry->offset += TARGET_PAGE_SIZE;
1818 memory_region_unref(block->mr);
1819 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1821 migration_consume_urgent_request();
1824 qemu_mutex_unlock(&rs->src_page_req_mutex);
1830 * get_queued_page: unqueue a page from the postocpy requests
1832 * Skips pages that are already sent (!dirty)
1834 * Returns if a queued page is found
1836 * @rs: current RAM state
1837 * @pss: data about the state of the current dirty page scan
1839 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1846 block = unqueue_page(rs, &offset);
1848 * We're sending this page, and since it's postcopy nothing else
1849 * will dirty it, and we must make sure it doesn't get sent again
1850 * even if this queue request was received after the background
1851 * search already sent it.
1856 page = offset >> TARGET_PAGE_BITS;
1857 dirty = test_bit(page, block->bmap);
1859 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1860 page, test_bit(page, block->unsentmap));
1862 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1866 } while (block && !dirty);
1870 * As soon as we start servicing pages out of order, then we have
1871 * to kill the bulk stage, since the bulk stage assumes
1872 * in (migration_bitmap_find_and_reset_dirty) that every page is
1873 * dirty, that's no longer true.
1875 rs->ram_bulk_stage = false;
1878 * We want the background search to continue from the queued page
1879 * since the guest is likely to want other pages near to the page
1880 * it just requested.
1883 pss->page = offset >> TARGET_PAGE_BITS;
1890 * migration_page_queue_free: drop any remaining pages in the ram
1893 * It should be empty at the end anyway, but in error cases there may
1894 * be some left. in case that there is any page left, we drop it.
1897 static void migration_page_queue_free(RAMState *rs)
1899 struct RAMSrcPageRequest *mspr, *next_mspr;
1900 /* This queue generally should be empty - but in the case of a failed
1901 * migration might have some droppings in.
1904 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1905 memory_region_unref(mspr->rb->mr);
1906 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1913 * ram_save_queue_pages: queue the page for transmission
1915 * A request from postcopy destination for example.
1917 * Returns zero on success or negative on error
1919 * @rbname: Name of the RAMBLock of the request. NULL means the
1920 * same that last one.
1921 * @start: starting address from the start of the RAMBlock
1922 * @len: length (in bytes) to send
1924 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1927 RAMState *rs = ram_state;
1929 ram_counters.postcopy_requests++;
1932 /* Reuse last RAMBlock */
1933 ramblock = rs->last_req_rb;
1937 * Shouldn't happen, we can't reuse the last RAMBlock if
1938 * it's the 1st request.
1940 error_report("ram_save_queue_pages no previous block");
1944 ramblock = qemu_ram_block_by_name(rbname);
1947 /* We shouldn't be asked for a non-existent RAMBlock */
1948 error_report("ram_save_queue_pages no block '%s'", rbname);
1951 rs->last_req_rb = ramblock;
1953 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1954 if (start+len > ramblock->used_length) {
1955 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1956 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1957 __func__, start, len, ramblock->used_length);
1961 struct RAMSrcPageRequest *new_entry =
1962 g_malloc0(sizeof(struct RAMSrcPageRequest));
1963 new_entry->rb = ramblock;
1964 new_entry->offset = start;
1965 new_entry->len = len;
1967 memory_region_ref(ramblock->mr);
1968 qemu_mutex_lock(&rs->src_page_req_mutex);
1969 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1970 migration_make_urgent_request();
1971 qemu_mutex_unlock(&rs->src_page_req_mutex);
1981 static bool save_page_use_compression(RAMState *rs)
1983 if (!migrate_use_compression()) {
1988 * If xbzrle is on, stop using the data compression after first
1989 * round of migration even if compression is enabled. In theory,
1990 * xbzrle can do better than compression.
1992 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2000 * ram_save_target_page: save one target page
2002 * Returns the number of pages written
2004 * @rs: current RAM state
2005 * @pss: data about the page we want to send
2006 * @last_stage: if we are at the completion stage
2008 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
2011 RAMBlock *block = pss->block;
2012 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2015 if (control_save_page(rs, block, offset, &res)) {
2020 * When starting the process of a new block, the first page of
2021 * the block should be sent out before other pages in the same
2022 * block, and all the pages in last block should have been sent
2023 * out, keeping this order is important, because the 'cont' flag
2024 * is used to avoid resending the block name.
2026 if (block != rs->last_sent_block && save_page_use_compression(rs)) {
2027 flush_compressed_data(rs);
2030 res = save_zero_page(rs, block, offset);
2032 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2033 * page would be stale
2035 if (!save_page_use_compression(rs)) {
2036 XBZRLE_cache_lock();
2037 xbzrle_cache_zero_page(rs, block->offset + offset);
2038 XBZRLE_cache_unlock();
2040 ram_release_pages(block->idstr, offset, res);
2045 * Make sure the first page is sent out before other pages.
2047 * we post it as normal page as compression will take much
2050 if (block == rs->last_sent_block && save_page_use_compression(rs)) {
2051 return compress_page_with_multi_thread(rs, block, offset);
2054 return ram_save_page(rs, pss, last_stage);
2058 * ram_save_host_page: save a whole host page
2060 * Starting at *offset send pages up to the end of the current host
2061 * page. It's valid for the initial offset to point into the middle of
2062 * a host page in which case the remainder of the hostpage is sent.
2063 * Only dirty target pages are sent. Note that the host page size may
2064 * be a huge page for this block.
2065 * The saving stops at the boundary of the used_length of the block
2066 * if the RAMBlock isn't a multiple of the host page size.
2068 * Returns the number of pages written or negative on error
2070 * @rs: current RAM state
2071 * @ms: current migration state
2072 * @pss: data about the page we want to send
2073 * @last_stage: if we are at the completion stage
2075 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2078 int tmppages, pages = 0;
2079 size_t pagesize_bits =
2080 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2082 if (!qemu_ram_is_migratable(pss->block)) {
2083 error_report("block %s should not be migrated !", pss->block->idstr);
2088 /* Check the pages is dirty and if it is send it */
2089 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2094 tmppages = ram_save_target_page(rs, pss, last_stage);
2100 if (pss->block->unsentmap) {
2101 clear_bit(pss->page, pss->block->unsentmap);
2105 } while ((pss->page & (pagesize_bits - 1)) &&
2106 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
2108 /* The offset we leave with is the last one we looked at */
2114 * ram_find_and_save_block: finds a dirty page and sends it to f
2116 * Called within an RCU critical section.
2118 * Returns the number of pages written where zero means no dirty pages
2120 * @rs: current RAM state
2121 * @last_stage: if we are at the completion stage
2123 * On systems where host-page-size > target-page-size it will send all the
2124 * pages in a host page that are dirty.
2127 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2129 PageSearchStatus pss;
2133 /* No dirty page as there is zero RAM */
2134 if (!ram_bytes_total()) {
2138 pss.block = rs->last_seen_block;
2139 pss.page = rs->last_page;
2140 pss.complete_round = false;
2143 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2148 found = get_queued_page(rs, &pss);
2151 /* priority queue empty, so just search for something dirty */
2152 found = find_dirty_block(rs, &pss, &again);
2156 pages = ram_save_host_page(rs, &pss, last_stage);
2158 } while (!pages && again);
2160 rs->last_seen_block = pss.block;
2161 rs->last_page = pss.page;
2166 void acct_update_position(QEMUFile *f, size_t size, bool zero)
2168 uint64_t pages = size / TARGET_PAGE_SIZE;
2171 ram_counters.duplicate += pages;
2173 ram_counters.normal += pages;
2174 ram_counters.transferred += size;
2175 qemu_update_position(f, size);
2179 uint64_t ram_bytes_total(void)
2185 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2186 total += block->used_length;
2192 static void xbzrle_load_setup(void)
2194 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2197 static void xbzrle_load_cleanup(void)
2199 g_free(XBZRLE.decoded_buf);
2200 XBZRLE.decoded_buf = NULL;
2203 static void ram_state_cleanup(RAMState **rsp)
2206 migration_page_queue_free(*rsp);
2207 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2208 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2214 static void xbzrle_cleanup(void)
2216 XBZRLE_cache_lock();
2218 cache_fini(XBZRLE.cache);
2219 g_free(XBZRLE.encoded_buf);
2220 g_free(XBZRLE.current_buf);
2221 g_free(XBZRLE.zero_target_page);
2222 XBZRLE.cache = NULL;
2223 XBZRLE.encoded_buf = NULL;
2224 XBZRLE.current_buf = NULL;
2225 XBZRLE.zero_target_page = NULL;
2227 XBZRLE_cache_unlock();
2230 static void ram_save_cleanup(void *opaque)
2232 RAMState **rsp = opaque;
2235 /* caller have hold iothread lock or is in a bh, so there is
2236 * no writing race against this migration_bitmap
2238 memory_global_dirty_log_stop();
2240 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2241 g_free(block->bmap);
2243 g_free(block->unsentmap);
2244 block->unsentmap = NULL;
2248 compress_threads_save_cleanup();
2249 ram_state_cleanup(rsp);
2252 static void ram_state_reset(RAMState *rs)
2254 rs->last_seen_block = NULL;
2255 rs->last_sent_block = NULL;
2257 rs->last_version = ram_list.version;
2258 rs->ram_bulk_stage = true;
2261 #define MAX_WAIT 50 /* ms, half buffered_file limit */
2264 * 'expected' is the value you expect the bitmap mostly to be full
2265 * of; it won't bother printing lines that are all this value.
2266 * If 'todump' is null the migration bitmap is dumped.
2268 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2269 unsigned long pages)
2272 int64_t linelen = 128;
2275 for (cur = 0; cur < pages; cur += linelen) {
2279 * Last line; catch the case where the line length
2280 * is longer than remaining ram
2282 if (cur + linelen > pages) {
2283 linelen = pages - cur;
2285 for (curb = 0; curb < linelen; curb++) {
2286 bool thisbit = test_bit(cur + curb, todump);
2287 linebuf[curb] = thisbit ? '1' : '.';
2288 found = found || (thisbit != expected);
2291 linebuf[curb] = '\0';
2292 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2297 /* **** functions for postcopy ***** */
2299 void ram_postcopy_migrated_memory_release(MigrationState *ms)
2301 struct RAMBlock *block;
2303 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2304 unsigned long *bitmap = block->bmap;
2305 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2306 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2308 while (run_start < range) {
2309 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2310 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
2311 (run_end - run_start) << TARGET_PAGE_BITS);
2312 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2318 * postcopy_send_discard_bm_ram: discard a RAMBlock
2320 * Returns zero on success
2322 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2323 * Note: At this point the 'unsentmap' is the processed bitmap combined
2324 * with the dirtymap; so a '1' means it's either dirty or unsent.
2326 * @ms: current migration state
2327 * @pds: state for postcopy
2328 * @start: RAMBlock starting page
2329 * @length: RAMBlock size
2331 static int postcopy_send_discard_bm_ram(MigrationState *ms,
2332 PostcopyDiscardState *pds,
2335 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2336 unsigned long current;
2337 unsigned long *unsentmap = block->unsentmap;
2339 for (current = 0; current < end; ) {
2340 unsigned long one = find_next_bit(unsentmap, end, current);
2343 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2344 unsigned long discard_length;
2347 discard_length = end - one;
2349 discard_length = zero - one;
2351 if (discard_length) {
2352 postcopy_discard_send_range(ms, pds, one, discard_length);
2354 current = one + discard_length;
2364 * postcopy_each_ram_send_discard: discard all RAMBlocks
2366 * Returns 0 for success or negative for error
2368 * Utility for the outgoing postcopy code.
2369 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2370 * passing it bitmap indexes and name.
2371 * (qemu_ram_foreach_block ends up passing unscaled lengths
2372 * which would mean postcopy code would have to deal with target page)
2374 * @ms: current migration state
2376 static int postcopy_each_ram_send_discard(MigrationState *ms)
2378 struct RAMBlock *block;
2381 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2382 PostcopyDiscardState *pds =
2383 postcopy_discard_send_init(ms, block->idstr);
2386 * Postcopy sends chunks of bitmap over the wire, but it
2387 * just needs indexes at this point, avoids it having
2388 * target page specific code.
2390 ret = postcopy_send_discard_bm_ram(ms, pds, block);
2391 postcopy_discard_send_finish(ms, pds);
2401 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
2403 * Helper for postcopy_chunk_hostpages; it's called twice to
2404 * canonicalize the two bitmaps, that are similar, but one is
2407 * Postcopy requires that all target pages in a hostpage are dirty or
2408 * clean, not a mix. This function canonicalizes the bitmaps.
2410 * @ms: current migration state
2411 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2412 * otherwise we need to canonicalize partially dirty host pages
2413 * @block: block that contains the page we want to canonicalize
2414 * @pds: state for postcopy
2416 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2418 PostcopyDiscardState *pds)
2420 RAMState *rs = ram_state;
2421 unsigned long *bitmap = block->bmap;
2422 unsigned long *unsentmap = block->unsentmap;
2423 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2424 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2425 unsigned long run_start;
2427 if (block->page_size == TARGET_PAGE_SIZE) {
2428 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2433 /* Find a sent page */
2434 run_start = find_next_zero_bit(unsentmap, pages, 0);
2436 /* Find a dirty page */
2437 run_start = find_next_bit(bitmap, pages, 0);
2440 while (run_start < pages) {
2441 bool do_fixup = false;
2442 unsigned long fixup_start_addr;
2443 unsigned long host_offset;
2446 * If the start of this run of pages is in the middle of a host
2447 * page, then we need to fixup this host page.
2449 host_offset = run_start % host_ratio;
2452 run_start -= host_offset;
2453 fixup_start_addr = run_start;
2454 /* For the next pass */
2455 run_start = run_start + host_ratio;
2457 /* Find the end of this run */
2458 unsigned long run_end;
2460 run_end = find_next_bit(unsentmap, pages, run_start + 1);
2462 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
2465 * If the end isn't at the start of a host page, then the
2466 * run doesn't finish at the end of a host page
2467 * and we need to discard.
2469 host_offset = run_end % host_ratio;
2472 fixup_start_addr = run_end - host_offset;
2474 * This host page has gone, the next loop iteration starts
2475 * from after the fixup
2477 run_start = fixup_start_addr + host_ratio;
2480 * No discards on this iteration, next loop starts from
2481 * next sent/dirty page
2483 run_start = run_end + 1;
2490 /* Tell the destination to discard this page */
2491 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2492 /* For the unsent_pass we:
2493 * discard partially sent pages
2494 * For the !unsent_pass (dirty) we:
2495 * discard partially dirty pages that were sent
2496 * (any partially sent pages were already discarded
2497 * by the previous unsent_pass)
2499 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2503 /* Clean up the bitmap */
2504 for (page = fixup_start_addr;
2505 page < fixup_start_addr + host_ratio; page++) {
2506 /* All pages in this host page are now not sent */
2507 set_bit(page, unsentmap);
2510 * Remark them as dirty, updating the count for any pages
2511 * that weren't previously dirty.
2513 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2518 /* Find the next sent page for the next iteration */
2519 run_start = find_next_zero_bit(unsentmap, pages, run_start);
2521 /* Find the next dirty page for the next iteration */
2522 run_start = find_next_bit(bitmap, pages, run_start);
2528 * postcopy_chuck_hostpages: discrad any partially sent host page
2530 * Utility for the outgoing postcopy code.
2532 * Discard any partially sent host-page size chunks, mark any partially
2533 * dirty host-page size chunks as all dirty. In this case the host-page
2534 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
2536 * Returns zero on success
2538 * @ms: current migration state
2539 * @block: block we want to work with
2541 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2543 PostcopyDiscardState *pds =
2544 postcopy_discard_send_init(ms, block->idstr);
2546 /* First pass: Discard all partially sent host pages */
2547 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2549 * Second pass: Ensure that all partially dirty host pages are made
2552 postcopy_chunk_hostpages_pass(ms, false, block, pds);
2554 postcopy_discard_send_finish(ms, pds);
2559 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2561 * Returns zero on success
2563 * Transmit the set of pages to be discarded after precopy to the target
2564 * these are pages that:
2565 * a) Have been previously transmitted but are now dirty again
2566 * b) Pages that have never been transmitted, this ensures that
2567 * any pages on the destination that have been mapped by background
2568 * tasks get discarded (transparent huge pages is the specific concern)
2569 * Hopefully this is pretty sparse
2571 * @ms: current migration state
2573 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2575 RAMState *rs = ram_state;
2581 /* This should be our last sync, the src is now paused */
2582 migration_bitmap_sync(rs);
2584 /* Easiest way to make sure we don't resume in the middle of a host-page */
2585 rs->last_seen_block = NULL;
2586 rs->last_sent_block = NULL;
2589 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2590 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2591 unsigned long *bitmap = block->bmap;
2592 unsigned long *unsentmap = block->unsentmap;
2595 /* We don't have a safe way to resize the sentmap, so
2596 * if the bitmap was resized it will be NULL at this
2599 error_report("migration ram resized during precopy phase");
2603 /* Deal with TPS != HPS and huge pages */
2604 ret = postcopy_chunk_hostpages(ms, block);
2611 * Update the unsentmap to be unsentmap = unsentmap | dirty
2613 bitmap_or(unsentmap, unsentmap, bitmap, pages);
2614 #ifdef DEBUG_POSTCOPY
2615 ram_debug_dump_bitmap(unsentmap, true, pages);
2618 trace_ram_postcopy_send_discard_bitmap();
2620 ret = postcopy_each_ram_send_discard(ms);
2627 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2629 * Returns zero on success
2631 * @rbname: name of the RAMBlock of the request. NULL means the
2632 * same that last one.
2633 * @start: RAMBlock starting page
2634 * @length: RAMBlock size
2636 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2640 trace_ram_discard_range(rbname, start, length);
2643 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2646 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2650 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2651 length >> qemu_target_page_bits());
2652 ret = ram_block_discard_range(rb, start, length);
2661 * For every allocation, we will try not to crash the VM if the
2662 * allocation failed.
2664 static int xbzrle_init(void)
2666 Error *local_err = NULL;
2668 if (!migrate_use_xbzrle()) {
2672 XBZRLE_cache_lock();
2674 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2675 if (!XBZRLE.zero_target_page) {
2676 error_report("%s: Error allocating zero page", __func__);
2680 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2681 TARGET_PAGE_SIZE, &local_err);
2682 if (!XBZRLE.cache) {
2683 error_report_err(local_err);
2684 goto free_zero_page;
2687 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2688 if (!XBZRLE.encoded_buf) {
2689 error_report("%s: Error allocating encoded_buf", __func__);
2693 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2694 if (!XBZRLE.current_buf) {
2695 error_report("%s: Error allocating current_buf", __func__);
2696 goto free_encoded_buf;
2699 /* We are all good */
2700 XBZRLE_cache_unlock();
2704 g_free(XBZRLE.encoded_buf);
2705 XBZRLE.encoded_buf = NULL;
2707 cache_fini(XBZRLE.cache);
2708 XBZRLE.cache = NULL;
2710 g_free(XBZRLE.zero_target_page);
2711 XBZRLE.zero_target_page = NULL;
2713 XBZRLE_cache_unlock();
2717 static int ram_state_init(RAMState **rsp)
2719 *rsp = g_try_new0(RAMState, 1);
2722 error_report("%s: Init ramstate fail", __func__);
2726 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2727 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2728 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2731 * Count the total number of pages used by ram blocks not including any
2732 * gaps due to alignment or unplugs.
2734 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2736 ram_state_reset(*rsp);
2741 static void ram_list_init_bitmaps(void)
2744 unsigned long pages;
2746 /* Skip setting bitmap if there is no RAM */
2747 if (ram_bytes_total()) {
2748 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2749 pages = block->max_length >> TARGET_PAGE_BITS;
2750 block->bmap = bitmap_new(pages);
2751 bitmap_set(block->bmap, 0, pages);
2752 if (migrate_postcopy_ram()) {
2753 block->unsentmap = bitmap_new(pages);
2754 bitmap_set(block->unsentmap, 0, pages);
2760 static void ram_init_bitmaps(RAMState *rs)
2762 /* For memory_global_dirty_log_start below. */
2763 qemu_mutex_lock_iothread();
2764 qemu_mutex_lock_ramlist();
2767 ram_list_init_bitmaps();
2768 memory_global_dirty_log_start();
2769 migration_bitmap_sync(rs);
2772 qemu_mutex_unlock_ramlist();
2773 qemu_mutex_unlock_iothread();
2776 static int ram_init_all(RAMState **rsp)
2778 if (ram_state_init(rsp)) {
2782 if (xbzrle_init()) {
2783 ram_state_cleanup(rsp);
2787 ram_init_bitmaps(*rsp);
2792 static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2798 * Postcopy is not using xbzrle/compression, so no need for that.
2799 * Also, since source are already halted, we don't need to care
2800 * about dirty page logging as well.
2803 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2804 pages += bitmap_count_one(block->bmap,
2805 block->used_length >> TARGET_PAGE_BITS);
2808 /* This may not be aligned with current bitmaps. Recalculate. */
2809 rs->migration_dirty_pages = pages;
2811 rs->last_seen_block = NULL;
2812 rs->last_sent_block = NULL;
2814 rs->last_version = ram_list.version;
2816 * Disable the bulk stage, otherwise we'll resend the whole RAM no
2817 * matter what we have sent.
2819 rs->ram_bulk_stage = false;
2821 /* Update RAMState cache of output QEMUFile */
2824 trace_ram_state_resume_prepare(pages);
2828 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2829 * long-running RCU critical section. When rcu-reclaims in the code
2830 * start to become numerous it will be necessary to reduce the
2831 * granularity of these critical sections.
2835 * ram_save_setup: Setup RAM for migration
2837 * Returns zero to indicate success and negative for error
2839 * @f: QEMUFile where to send the data
2840 * @opaque: RAMState pointer
2842 static int ram_save_setup(QEMUFile *f, void *opaque)
2844 RAMState **rsp = opaque;
2847 if (compress_threads_save_setup()) {
2851 /* migration has already setup the bitmap, reuse it. */
2852 if (!migration_in_colo_state()) {
2853 if (ram_init_all(rsp) != 0) {
2854 compress_threads_save_cleanup();
2862 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2864 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2865 qemu_put_byte(f, strlen(block->idstr));
2866 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2867 qemu_put_be64(f, block->used_length);
2868 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2869 qemu_put_be64(f, block->page_size);
2875 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2876 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2878 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2884 * ram_save_iterate: iterative stage for migration
2886 * Returns zero to indicate success and negative for error
2888 * @f: QEMUFile where to send the data
2889 * @opaque: RAMState pointer
2891 static int ram_save_iterate(QEMUFile *f, void *opaque)
2893 RAMState **temp = opaque;
2894 RAMState *rs = *temp;
2900 if (blk_mig_bulk_active()) {
2901 /* Avoid transferring ram during bulk phase of block migration as
2902 * the bulk phase will usually take a long time and transferring
2903 * ram updates during that time is pointless. */
2908 if (ram_list.version != rs->last_version) {
2909 ram_state_reset(rs);
2912 /* Read version before ram_list.blocks */
2915 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2917 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2919 while ((ret = qemu_file_rate_limit(f)) == 0 ||
2920 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
2923 if (qemu_file_get_error(f)) {
2927 pages = ram_find_and_save_block(rs, false);
2928 /* no more pages to sent */
2935 /* we want to check in the 1st loop, just in case it was the 1st time
2936 and we had to sync the dirty bitmap.
2937 qemu_get_clock_ns() is a bit expensive, so we only check each some
2940 if ((i & 63) == 0) {
2941 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2942 if (t1 > MAX_WAIT) {
2943 trace_ram_save_iterate_big_wait(t1, i);
2949 flush_compressed_data(rs);
2953 * Must occur before EOS (or any QEMUFile operation)
2954 * because of RDMA protocol.
2956 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2959 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2960 ram_counters.transferred += 8;
2962 ret = qemu_file_get_error(f);
2971 * ram_save_complete: function called to send the remaining amount of ram
2973 * Returns zero to indicate success
2975 * Called with iothread lock
2977 * @f: QEMUFile where to send the data
2978 * @opaque: RAMState pointer
2980 static int ram_save_complete(QEMUFile *f, void *opaque)
2982 RAMState **temp = opaque;
2983 RAMState *rs = *temp;
2987 if (!migration_in_postcopy()) {
2988 migration_bitmap_sync(rs);
2991 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2993 /* try transferring iterative blocks of memory */
2995 /* flush all remaining blocks regardless of rate limiting */
2999 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3000 /* no more blocks to sent */
3006 flush_compressed_data(rs);
3007 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3011 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3016 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3017 uint64_t *res_precopy_only,
3018 uint64_t *res_compatible,
3019 uint64_t *res_postcopy_only)
3021 RAMState **temp = opaque;
3022 RAMState *rs = *temp;
3023 uint64_t remaining_size;
3025 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3027 if (!migration_in_postcopy() &&
3028 remaining_size < max_size) {
3029 qemu_mutex_lock_iothread();
3031 migration_bitmap_sync(rs);
3033 qemu_mutex_unlock_iothread();
3034 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3037 if (migrate_postcopy_ram()) {
3038 /* We can do postcopy, and all the data is postcopiable */
3039 *res_compatible += remaining_size;
3041 *res_precopy_only += remaining_size;
3045 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3047 unsigned int xh_len;
3049 uint8_t *loaded_data;
3051 /* extract RLE header */
3052 xh_flags = qemu_get_byte(f);
3053 xh_len = qemu_get_be16(f);
3055 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3056 error_report("Failed to load XBZRLE page - wrong compression!");
3060 if (xh_len > TARGET_PAGE_SIZE) {
3061 error_report("Failed to load XBZRLE page - len overflow!");
3064 loaded_data = XBZRLE.decoded_buf;
3065 /* load data and decode */
3066 /* it can change loaded_data to point to an internal buffer */
3067 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3070 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3071 TARGET_PAGE_SIZE) == -1) {
3072 error_report("Failed to load XBZRLE page - decode error!");
3080 * ram_block_from_stream: read a RAMBlock id from the migration stream
3082 * Must be called from within a rcu critical section.
3084 * Returns a pointer from within the RCU-protected ram_list.
3086 * @f: QEMUFile where to read the data from
3087 * @flags: Page flags (mostly to see if it's a continuation of previous block)
3089 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3091 static RAMBlock *block = NULL;
3095 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3097 error_report("Ack, bad migration stream!");
3103 len = qemu_get_byte(f);
3104 qemu_get_buffer(f, (uint8_t *)id, len);
3107 block = qemu_ram_block_by_name(id);
3109 error_report("Can't find block %s", id);
3113 if (!qemu_ram_is_migratable(block)) {
3114 error_report("block %s should not be migrated !", id);
3121 static inline void *host_from_ram_block_offset(RAMBlock *block,
3124 if (!offset_in_ramblock(block, offset)) {
3128 return block->host + offset;
3132 * ram_handle_compressed: handle the zero page case
3134 * If a page (or a whole RDMA chunk) has been
3135 * determined to be zero, then zap it.
3137 * @host: host address for the zero page
3138 * @ch: what the page is filled from. We only support zero
3139 * @size: size of the zero page
3141 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3143 if (ch != 0 || !is_zero_range(host, size)) {
3144 memset(host, ch, size);
3148 /* return the size after decompression, or negative value on error */
3150 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3151 const uint8_t *source, size_t source_len)
3155 err = inflateReset(stream);
3160 stream->avail_in = source_len;
3161 stream->next_in = (uint8_t *)source;
3162 stream->avail_out = dest_len;
3163 stream->next_out = dest;
3165 err = inflate(stream, Z_NO_FLUSH);
3166 if (err != Z_STREAM_END) {
3170 return stream->total_out;
3173 static void *do_data_decompress(void *opaque)
3175 DecompressParam *param = opaque;
3176 unsigned long pagesize;
3180 qemu_mutex_lock(¶m->mutex);
3181 while (!param->quit) {
3186 qemu_mutex_unlock(¶m->mutex);
3188 pagesize = TARGET_PAGE_SIZE;
3190 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3191 param->compbuf, len);
3192 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3193 error_report("decompress data failed");
3194 qemu_file_set_error(decomp_file, ret);
3197 qemu_mutex_lock(&decomp_done_lock);
3199 qemu_cond_signal(&decomp_done_cond);
3200 qemu_mutex_unlock(&decomp_done_lock);
3202 qemu_mutex_lock(¶m->mutex);
3204 qemu_cond_wait(¶m->cond, ¶m->mutex);
3207 qemu_mutex_unlock(¶m->mutex);
3212 static int wait_for_decompress_done(void)
3214 int idx, thread_count;
3216 if (!migrate_use_compression()) {
3220 thread_count = migrate_decompress_threads();
3221 qemu_mutex_lock(&decomp_done_lock);
3222 for (idx = 0; idx < thread_count; idx++) {
3223 while (!decomp_param[idx].done) {
3224 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3227 qemu_mutex_unlock(&decomp_done_lock);
3228 return qemu_file_get_error(decomp_file);
3231 static void compress_threads_load_cleanup(void)
3233 int i, thread_count;
3235 if (!migrate_use_compression()) {
3238 thread_count = migrate_decompress_threads();
3239 for (i = 0; i < thread_count; i++) {
3241 * we use it as a indicator which shows if the thread is
3242 * properly init'd or not
3244 if (!decomp_param[i].compbuf) {
3248 qemu_mutex_lock(&decomp_param[i].mutex);
3249 decomp_param[i].quit = true;
3250 qemu_cond_signal(&decomp_param[i].cond);
3251 qemu_mutex_unlock(&decomp_param[i].mutex);
3253 for (i = 0; i < thread_count; i++) {
3254 if (!decomp_param[i].compbuf) {
3258 qemu_thread_join(decompress_threads + i);
3259 qemu_mutex_destroy(&decomp_param[i].mutex);
3260 qemu_cond_destroy(&decomp_param[i].cond);
3261 inflateEnd(&decomp_param[i].stream);
3262 g_free(decomp_param[i].compbuf);
3263 decomp_param[i].compbuf = NULL;
3265 g_free(decompress_threads);
3266 g_free(decomp_param);
3267 decompress_threads = NULL;
3268 decomp_param = NULL;
3272 static int compress_threads_load_setup(QEMUFile *f)
3274 int i, thread_count;
3276 if (!migrate_use_compression()) {
3280 thread_count = migrate_decompress_threads();
3281 decompress_threads = g_new0(QemuThread, thread_count);
3282 decomp_param = g_new0(DecompressParam, thread_count);
3283 qemu_mutex_init(&decomp_done_lock);
3284 qemu_cond_init(&decomp_done_cond);
3286 for (i = 0; i < thread_count; i++) {
3287 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3291 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3292 qemu_mutex_init(&decomp_param[i].mutex);
3293 qemu_cond_init(&decomp_param[i].cond);
3294 decomp_param[i].done = true;
3295 decomp_param[i].quit = false;
3296 qemu_thread_create(decompress_threads + i, "decompress",
3297 do_data_decompress, decomp_param + i,
3298 QEMU_THREAD_JOINABLE);
3302 compress_threads_load_cleanup();
3306 static void decompress_data_with_multi_threads(QEMUFile *f,
3307 void *host, int len)
3309 int idx, thread_count;
3311 thread_count = migrate_decompress_threads();
3312 qemu_mutex_lock(&decomp_done_lock);
3314 for (idx = 0; idx < thread_count; idx++) {
3315 if (decomp_param[idx].done) {
3316 decomp_param[idx].done = false;
3317 qemu_mutex_lock(&decomp_param[idx].mutex);
3318 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3319 decomp_param[idx].des = host;
3320 decomp_param[idx].len = len;
3321 qemu_cond_signal(&decomp_param[idx].cond);
3322 qemu_mutex_unlock(&decomp_param[idx].mutex);
3326 if (idx < thread_count) {
3329 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3332 qemu_mutex_unlock(&decomp_done_lock);
3336 * ram_load_setup: Setup RAM for migration incoming side
3338 * Returns zero to indicate success and negative for error
3340 * @f: QEMUFile where to receive the data
3341 * @opaque: RAMState pointer
3343 static int ram_load_setup(QEMUFile *f, void *opaque)
3345 if (compress_threads_load_setup(f)) {
3349 xbzrle_load_setup();
3350 ramblock_recv_map_init();
3354 static int ram_load_cleanup(void *opaque)
3357 xbzrle_load_cleanup();
3358 compress_threads_load_cleanup();
3360 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
3361 g_free(rb->receivedmap);
3362 rb->receivedmap = NULL;
3368 * ram_postcopy_incoming_init: allocate postcopy data structures
3370 * Returns 0 for success and negative if there was one error
3372 * @mis: current migration incoming state
3374 * Allocate data structures etc needed by incoming migration with
3375 * postcopy-ram. postcopy-ram's similarly names
3376 * postcopy_ram_incoming_init does the work.
3378 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3380 unsigned long ram_pages = last_ram_page();
3382 return postcopy_ram_incoming_init(mis, ram_pages);
3386 * ram_load_postcopy: load a page in postcopy case
3388 * Returns 0 for success or -errno in case of error
3390 * Called in postcopy mode by ram_load().
3391 * rcu_read_lock is taken prior to this being called.
3393 * @f: QEMUFile where to send the data
3395 static int ram_load_postcopy(QEMUFile *f)
3397 int flags = 0, ret = 0;
3398 bool place_needed = false;
3399 bool matching_page_sizes = false;
3400 MigrationIncomingState *mis = migration_incoming_get_current();
3401 /* Temporary page that is later 'placed' */
3402 void *postcopy_host_page = postcopy_get_tmp_page(mis);
3403 void *last_host = NULL;
3404 bool all_zero = false;
3406 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3409 void *page_buffer = NULL;
3410 void *place_source = NULL;
3411 RAMBlock *block = NULL;
3414 addr = qemu_get_be64(f);
3417 * If qemu file error, we should stop here, and then "addr"
3420 ret = qemu_file_get_error(f);
3425 flags = addr & ~TARGET_PAGE_MASK;
3426 addr &= TARGET_PAGE_MASK;
3428 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3429 place_needed = false;
3430 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
3431 block = ram_block_from_stream(f, flags);
3433 host = host_from_ram_block_offset(block, addr);
3435 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3439 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
3441 * Postcopy requires that we place whole host pages atomically;
3442 * these may be huge pages for RAMBlocks that are backed by
3444 * To make it atomic, the data is read into a temporary page
3445 * that's moved into place later.
3446 * The migration protocol uses, possibly smaller, target-pages
3447 * however the source ensures it always sends all the components
3448 * of a host page in order.
3450 page_buffer = postcopy_host_page +
3451 ((uintptr_t)host & (block->page_size - 1));
3452 /* If all TP are zero then we can optimise the place */
3453 if (!((uintptr_t)host & (block->page_size - 1))) {
3456 /* not the 1st TP within the HP */
3457 if (host != (last_host + TARGET_PAGE_SIZE)) {
3458 error_report("Non-sequential target page %p/%p",
3467 * If it's the last part of a host page then we place the host
3470 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
3471 (block->page_size - 1)) == 0;
3472 place_source = postcopy_host_page;
3476 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3477 case RAM_SAVE_FLAG_ZERO:
3478 ch = qemu_get_byte(f);
3479 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3485 case RAM_SAVE_FLAG_PAGE:
3487 if (!place_needed || !matching_page_sizes) {
3488 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3490 /* Avoids the qemu_file copy during postcopy, which is
3491 * going to do a copy later; can only do it when we
3492 * do this read in one go (matching page sizes)
3494 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3498 case RAM_SAVE_FLAG_EOS:
3502 error_report("Unknown combination of migration flags: %#x"
3503 " (postcopy mode)", flags);
3508 /* Detect for any possible file errors */
3509 if (!ret && qemu_file_get_error(f)) {
3510 ret = qemu_file_get_error(f);
3513 if (!ret && place_needed) {
3514 /* This gets called at the last target page in the host page */
3515 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3518 ret = postcopy_place_page_zero(mis, place_dest,
3521 ret = postcopy_place_page(mis, place_dest,
3522 place_source, block);
3530 static bool postcopy_is_advised(void)
3532 PostcopyState ps = postcopy_state_get();
3533 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3536 static bool postcopy_is_running(void)
3538 PostcopyState ps = postcopy_state_get();
3539 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3542 static int ram_load(QEMUFile *f, void *opaque, int version_id)
3544 int flags = 0, ret = 0, invalid_flags = 0;
3545 static uint64_t seq_iter;
3548 * If system is running in postcopy mode, page inserts to host memory must
3551 bool postcopy_running = postcopy_is_running();
3552 /* ADVISE is earlier, it shows the source has the postcopy capability on */
3553 bool postcopy_advised = postcopy_is_advised();
3557 if (version_id != 4) {
3561 if (!migrate_use_compression()) {
3562 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3564 /* This RCU critical section can be very long running.
3565 * When RCU reclaims in the code start to become numerous,
3566 * it will be necessary to reduce the granularity of this
3571 if (postcopy_running) {
3572 ret = ram_load_postcopy(f);
3575 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3576 ram_addr_t addr, total_ram_bytes;
3580 addr = qemu_get_be64(f);
3581 flags = addr & ~TARGET_PAGE_MASK;
3582 addr &= TARGET_PAGE_MASK;
3584 if (flags & invalid_flags) {
3585 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3586 error_report("Received an unexpected compressed page");
3593 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3594 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3595 RAMBlock *block = ram_block_from_stream(f, flags);
3597 host = host_from_ram_block_offset(block, addr);
3599 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3603 ramblock_recv_bitmap_set(block, host);
3604 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3607 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3608 case RAM_SAVE_FLAG_MEM_SIZE:
3609 /* Synchronize RAM block list */
3610 total_ram_bytes = addr;
3611 while (!ret && total_ram_bytes) {
3616 len = qemu_get_byte(f);
3617 qemu_get_buffer(f, (uint8_t *)id, len);
3619 length = qemu_get_be64(f);
3621 block = qemu_ram_block_by_name(id);
3622 if (block && !qemu_ram_is_migratable(block)) {
3623 error_report("block %s should not be migrated !", id);
3626 if (length != block->used_length) {
3627 Error *local_err = NULL;
3629 ret = qemu_ram_resize(block, length,
3632 error_report_err(local_err);
3635 /* For postcopy we need to check hugepage sizes match */
3636 if (postcopy_advised &&
3637 block->page_size != qemu_host_page_size) {
3638 uint64_t remote_page_size = qemu_get_be64(f);
3639 if (remote_page_size != block->page_size) {
3640 error_report("Mismatched RAM page size %s "
3641 "(local) %zd != %" PRId64,
3642 id, block->page_size,
3647 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3650 error_report("Unknown ramblock \"%s\", cannot "
3651 "accept migration", id);
3655 total_ram_bytes -= length;
3659 case RAM_SAVE_FLAG_ZERO:
3660 ch = qemu_get_byte(f);
3661 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3664 case RAM_SAVE_FLAG_PAGE:
3665 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3668 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3669 len = qemu_get_be32(f);
3670 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3671 error_report("Invalid compressed data length: %d", len);
3675 decompress_data_with_multi_threads(f, host, len);
3678 case RAM_SAVE_FLAG_XBZRLE:
3679 if (load_xbzrle(f, addr, host) < 0) {
3680 error_report("Failed to decompress XBZRLE page at "
3681 RAM_ADDR_FMT, addr);
3686 case RAM_SAVE_FLAG_EOS:
3690 if (flags & RAM_SAVE_FLAG_HOOK) {
3691 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
3693 error_report("Unknown combination of migration flags: %#x",
3699 ret = qemu_file_get_error(f);
3703 ret |= wait_for_decompress_done();
3705 trace_ram_load_complete(ret, seq_iter);
3709 static bool ram_has_postcopy(void *opaque)
3711 return migrate_postcopy_ram();
3714 /* Sync all the dirty bitmap with destination VM. */
3715 static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
3718 QEMUFile *file = s->to_dst_file;
3719 int ramblock_count = 0;
3721 trace_ram_dirty_bitmap_sync_start();
3723 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3724 qemu_savevm_send_recv_bitmap(file, block->idstr);
3725 trace_ram_dirty_bitmap_request(block->idstr);
3729 trace_ram_dirty_bitmap_sync_wait();
3731 /* Wait until all the ramblocks' dirty bitmap synced */
3732 while (ramblock_count--) {
3733 qemu_sem_wait(&s->rp_state.rp_sem);
3736 trace_ram_dirty_bitmap_sync_complete();
3741 static void ram_dirty_bitmap_reload_notify(MigrationState *s)
3743 qemu_sem_post(&s->rp_state.rp_sem);
3747 * Read the received bitmap, revert it as the initial dirty bitmap.
3748 * This is only used when the postcopy migration is paused but wants
3749 * to resume from a middle point.
3751 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
3754 QEMUFile *file = s->rp_state.from_dst_file;
3755 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
3756 uint64_t local_size = nbits / 8;
3757 uint64_t size, end_mark;
3759 trace_ram_dirty_bitmap_reload_begin(block->idstr);
3761 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
3762 error_report("%s: incorrect state %s", __func__,
3763 MigrationStatus_str(s->state));
3768 * Note: see comments in ramblock_recv_bitmap_send() on why we
3769 * need the endianess convertion, and the paddings.
3771 local_size = ROUND_UP(local_size, 8);
3774 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
3776 size = qemu_get_be64(file);
3778 /* The size of the bitmap should match with our ramblock */
3779 if (size != local_size) {
3780 error_report("%s: ramblock '%s' bitmap size mismatch "
3781 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
3782 block->idstr, size, local_size);
3787 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
3788 end_mark = qemu_get_be64(file);
3790 ret = qemu_file_get_error(file);
3791 if (ret || size != local_size) {
3792 error_report("%s: read bitmap failed for ramblock '%s': %d"
3793 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
3794 __func__, block->idstr, ret, local_size, size);
3799 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
3800 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
3801 __func__, block->idstr, end_mark);
3807 * Endianess convertion. We are during postcopy (though paused).
3808 * The dirty bitmap won't change. We can directly modify it.
3810 bitmap_from_le(block->bmap, le_bitmap, nbits);
3813 * What we received is "received bitmap". Revert it as the initial
3814 * dirty bitmap for this ramblock.
3816 bitmap_complement(block->bmap, block->bmap, nbits);
3818 trace_ram_dirty_bitmap_reload_complete(block->idstr);
3821 * We succeeded to sync bitmap for current ramblock. If this is
3822 * the last one to sync, we need to notify the main send thread.
3824 ram_dirty_bitmap_reload_notify(s);
3832 static int ram_resume_prepare(MigrationState *s, void *opaque)
3834 RAMState *rs = *(RAMState **)opaque;
3837 ret = ram_dirty_bitmap_sync_all(s, rs);
3842 ram_state_resume_prepare(rs, s->to_dst_file);
3847 static SaveVMHandlers savevm_ram_handlers = {
3848 .save_setup = ram_save_setup,
3849 .save_live_iterate = ram_save_iterate,
3850 .save_live_complete_postcopy = ram_save_complete,
3851 .save_live_complete_precopy = ram_save_complete,
3852 .has_postcopy = ram_has_postcopy,
3853 .save_live_pending = ram_save_pending,
3854 .load_state = ram_load,
3855 .save_cleanup = ram_save_cleanup,
3856 .load_setup = ram_load_setup,
3857 .load_cleanup = ram_load_cleanup,
3858 .resume_prepare = ram_resume_prepare,
3861 void ram_mig_init(void)
3863 qemu_mutex_init(&XBZRLE.lock);
3864 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);