4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
29 #include "qemu/osdep.h"
32 #include "qemu/cutils.h"
33 #include "qemu/bitops.h"
34 #include "qemu/bitmap.h"
35 #include "qemu/main-loop.h"
38 #include "migration.h"
40 #include "migration/register.h"
41 #include "migration/misc.h"
42 #include "qemu-file.h"
43 #include "postcopy-ram.h"
44 #include "page_cache.h"
45 #include "qemu/error-report.h"
46 #include "qapi/error.h"
47 #include "qapi/qapi-events-migration.h"
48 #include "qapi/qmp/qerror.h"
50 #include "exec/ram_addr.h"
51 #include "exec/target_page.h"
52 #include "qemu/rcu_queue.h"
53 #include "migration/colo.h"
55 #include "sysemu/sysemu.h"
56 #include "qemu/uuid.h"
59 /***********************************************************/
60 /* ram save/restore */
62 /* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
63 * worked for pages that where filled with the same char. We switched
64 * it to only search for the zero value. And to avoid confusion with
65 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
68 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
69 #define RAM_SAVE_FLAG_ZERO 0x02
70 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
71 #define RAM_SAVE_FLAG_PAGE 0x08
72 #define RAM_SAVE_FLAG_EOS 0x10
73 #define RAM_SAVE_FLAG_CONTINUE 0x20
74 #define RAM_SAVE_FLAG_XBZRLE 0x40
75 /* 0x80 is reserved in migration.h start with 0x100 next */
76 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
78 static inline bool is_zero_range(uint8_t *p, uint64_t size)
80 return buffer_is_zero(p, size);
83 XBZRLECacheStats xbzrle_counters;
85 /* struct contains XBZRLE cache and a static page
86 used by the compression */
88 /* buffer used for XBZRLE encoding */
90 /* buffer for storing page content */
92 /* Cache for XBZRLE, Protected by lock. */
95 /* it will store a page full of zeros */
96 uint8_t *zero_target_page;
97 /* buffer used for XBZRLE decoding */
101 static void XBZRLE_cache_lock(void)
103 if (migrate_use_xbzrle())
104 qemu_mutex_lock(&XBZRLE.lock);
107 static void XBZRLE_cache_unlock(void)
109 if (migrate_use_xbzrle())
110 qemu_mutex_unlock(&XBZRLE.lock);
114 * xbzrle_cache_resize: resize the xbzrle cache
116 * This function is called from qmp_migrate_set_cache_size in main
117 * thread, possibly while a migration is in progress. A running
118 * migration may be using the cache and might finish during this call,
119 * hence changes to the cache are protected by XBZRLE.lock().
121 * Returns 0 for success or -1 for error
123 * @new_size: new cache size
124 * @errp: set *errp if the check failed, with reason
126 int xbzrle_cache_resize(int64_t new_size, Error **errp)
128 PageCache *new_cache;
131 /* Check for truncation */
132 if (new_size != (size_t)new_size) {
133 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
134 "exceeding address space");
138 if (new_size == migrate_xbzrle_cache_size()) {
145 if (XBZRLE.cache != NULL) {
146 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
152 cache_fini(XBZRLE.cache);
153 XBZRLE.cache = new_cache;
156 XBZRLE_cache_unlock();
160 /* Should be holding either ram_list.mutex, or the RCU lock. */
161 #define RAMBLOCK_FOREACH_MIGRATABLE(block) \
162 INTERNAL_RAMBLOCK_FOREACH(block) \
163 if (!qemu_ram_is_migratable(block)) {} else
165 #undef RAMBLOCK_FOREACH
167 static void ramblock_recv_map_init(void)
171 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
172 assert(!rb->receivedmap);
173 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
177 int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
179 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
183 bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
185 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
188 void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
190 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
193 void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
196 bitmap_set_atomic(rb->receivedmap,
197 ramblock_recv_bitmap_offset(host_addr, rb),
201 #define RAMBLOCK_RECV_BITMAP_ENDING (0x0123456789abcdefULL)
204 * Format: bitmap_size (8 bytes) + whole_bitmap (N bytes).
206 * Returns >0 if success with sent bytes, or <0 if error.
208 int64_t ramblock_recv_bitmap_send(QEMUFile *file,
209 const char *block_name)
211 RAMBlock *block = qemu_ram_block_by_name(block_name);
212 unsigned long *le_bitmap, nbits;
216 error_report("%s: invalid block name: %s", __func__, block_name);
220 nbits = block->used_length >> TARGET_PAGE_BITS;
223 * Make sure the tmp bitmap buffer is big enough, e.g., on 32bit
224 * machines we may need 4 more bytes for padding (see below
225 * comment). So extend it a bit before hand.
227 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
230 * Always use little endian when sending the bitmap. This is
231 * required that when source and destination VMs are not using the
232 * same endianess. (Note: big endian won't work.)
234 bitmap_to_le(le_bitmap, block->receivedmap, nbits);
236 /* Size of the bitmap, in bytes */
240 * size is always aligned to 8 bytes for 64bit machines, but it
241 * may not be true for 32bit machines. We need this padding to
242 * make sure the migration can survive even between 32bit and
245 size = ROUND_UP(size, 8);
247 qemu_put_be64(file, size);
248 qemu_put_buffer(file, (const uint8_t *)le_bitmap, size);
250 * Mark as an end, in case the middle part is screwed up due to
251 * some "misterious" reason.
253 qemu_put_be64(file, RAMBLOCK_RECV_BITMAP_ENDING);
258 if (qemu_file_get_error(file)) {
259 return qemu_file_get_error(file);
262 return size + sizeof(size);
266 * An outstanding page request, on the source, having been received
269 struct RAMSrcPageRequest {
274 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
277 /* State of RAM for migration */
279 /* QEMUFile used for this migration */
281 /* Last block that we have visited searching for dirty pages */
282 RAMBlock *last_seen_block;
283 /* Last block from where we have sent data */
284 RAMBlock *last_sent_block;
285 /* Last dirty target page we have sent */
286 ram_addr_t last_page;
287 /* last ram version we have seen */
288 uint32_t last_version;
289 /* We are in the first round */
291 /* How many times we have dirty too many pages */
292 int dirty_rate_high_cnt;
293 /* these variables are used for bitmap sync */
294 /* last time we did a full bitmap_sync */
295 int64_t time_last_bitmap_sync;
296 /* bytes transferred at start_time */
297 uint64_t bytes_xfer_prev;
298 /* number of dirty pages since start_time */
299 uint64_t num_dirty_pages_period;
300 /* xbzrle misses since the beginning of the period */
301 uint64_t xbzrle_cache_miss_prev;
302 /* number of iterations at the beginning of period */
303 uint64_t iterations_prev;
304 /* Iterations since start */
306 /* number of dirty bits in the bitmap */
307 uint64_t migration_dirty_pages;
308 /* protects modification of the bitmap */
309 QemuMutex bitmap_mutex;
310 /* The RAMBlock used in the last src_page_requests */
311 RAMBlock *last_req_rb;
312 /* Queue of outstanding page requests from the destination */
313 QemuMutex src_page_req_mutex;
314 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
316 typedef struct RAMState RAMState;
318 static RAMState *ram_state;
320 uint64_t ram_bytes_remaining(void)
322 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
326 MigrationStats ram_counters;
328 /* used by the search for pages to send */
329 struct PageSearchStatus {
330 /* Current block being searched */
332 /* Current page to search from */
334 /* Set once we wrap around */
337 typedef struct PageSearchStatus PageSearchStatus;
339 struct CompressParam {
348 /* internally used fields */
352 typedef struct CompressParam CompressParam;
354 struct DecompressParam {
364 typedef struct DecompressParam DecompressParam;
366 static CompressParam *comp_param;
367 static QemuThread *compress_threads;
368 /* comp_done_cond is used to wake up the migration thread when
369 * one of the compression threads has finished the compression.
370 * comp_done_lock is used to co-work with comp_done_cond.
372 static QemuMutex comp_done_lock;
373 static QemuCond comp_done_cond;
374 /* The empty QEMUFileOps will be used by file in CompressParam */
375 static const QEMUFileOps empty_ops = { };
377 static QEMUFile *decomp_file;
378 static DecompressParam *decomp_param;
379 static QemuThread *decompress_threads;
380 static QemuMutex decomp_done_lock;
381 static QemuCond decomp_done_cond;
383 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
384 ram_addr_t offset, uint8_t *source_buf);
386 static void *do_data_compress(void *opaque)
388 CompressParam *param = opaque;
392 qemu_mutex_lock(¶m->mutex);
393 while (!param->quit) {
395 block = param->block;
396 offset = param->offset;
398 qemu_mutex_unlock(¶m->mutex);
400 do_compress_ram_page(param->file, ¶m->stream, block, offset,
403 qemu_mutex_lock(&comp_done_lock);
405 qemu_cond_signal(&comp_done_cond);
406 qemu_mutex_unlock(&comp_done_lock);
408 qemu_mutex_lock(¶m->mutex);
410 qemu_cond_wait(¶m->cond, ¶m->mutex);
413 qemu_mutex_unlock(¶m->mutex);
418 static inline void terminate_compression_threads(void)
420 int idx, thread_count;
422 thread_count = migrate_compress_threads();
424 for (idx = 0; idx < thread_count; idx++) {
425 qemu_mutex_lock(&comp_param[idx].mutex);
426 comp_param[idx].quit = true;
427 qemu_cond_signal(&comp_param[idx].cond);
428 qemu_mutex_unlock(&comp_param[idx].mutex);
432 static void compress_threads_save_cleanup(void)
436 if (!migrate_use_compression()) {
439 terminate_compression_threads();
440 thread_count = migrate_compress_threads();
441 for (i = 0; i < thread_count; i++) {
443 * we use it as a indicator which shows if the thread is
444 * properly init'd or not
446 if (!comp_param[i].file) {
449 qemu_thread_join(compress_threads + i);
450 qemu_mutex_destroy(&comp_param[i].mutex);
451 qemu_cond_destroy(&comp_param[i].cond);
452 deflateEnd(&comp_param[i].stream);
453 g_free(comp_param[i].originbuf);
454 qemu_fclose(comp_param[i].file);
455 comp_param[i].file = NULL;
457 qemu_mutex_destroy(&comp_done_lock);
458 qemu_cond_destroy(&comp_done_cond);
459 g_free(compress_threads);
461 compress_threads = NULL;
465 static int compress_threads_save_setup(void)
469 if (!migrate_use_compression()) {
472 thread_count = migrate_compress_threads();
473 compress_threads = g_new0(QemuThread, thread_count);
474 comp_param = g_new0(CompressParam, thread_count);
475 qemu_cond_init(&comp_done_cond);
476 qemu_mutex_init(&comp_done_lock);
477 for (i = 0; i < thread_count; i++) {
478 comp_param[i].originbuf = g_try_malloc(TARGET_PAGE_SIZE);
479 if (!comp_param[i].originbuf) {
483 if (deflateInit(&comp_param[i].stream,
484 migrate_compress_level()) != Z_OK) {
485 g_free(comp_param[i].originbuf);
489 /* comp_param[i].file is just used as a dummy buffer to save data,
490 * set its ops to empty.
492 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
493 comp_param[i].done = true;
494 comp_param[i].quit = false;
495 qemu_mutex_init(&comp_param[i].mutex);
496 qemu_cond_init(&comp_param[i].cond);
497 qemu_thread_create(compress_threads + i, "compress",
498 do_data_compress, comp_param + i,
499 QEMU_THREAD_JOINABLE);
504 compress_threads_save_cleanup();
510 #define MULTIFD_MAGIC 0x11223344U
511 #define MULTIFD_VERSION 1
513 #define MULTIFD_FLAG_SYNC (1 << 0)
518 unsigned char uuid[16]; /* QemuUUID */
520 } __attribute__((packed)) MultiFDInit_t;
531 } __attribute__((packed)) MultiFDPacket_t;
534 /* number of used pages */
536 /* number of allocated pages */
538 /* global number of generated multifd packets */
540 /* offset of each page */
542 /* pointer to each page */
548 /* this fields are not changed once the thread is created */
551 /* channel thread name */
553 /* channel thread id */
555 /* communication channel */
557 /* sem where to wait for more work */
559 /* this mutex protects the following parameters */
561 /* is this channel thread running */
563 /* should this thread finish */
565 /* thread has work to do */
567 /* array of pages to sent */
568 MultiFDPages_t *pages;
569 /* packet allocated len */
571 /* pointer to the packet */
572 MultiFDPacket_t *packet;
573 /* multifd flags for each packet */
575 /* global number of generated multifd packets */
577 /* thread local variables */
578 /* packets sent through this channel */
579 uint64_t num_packets;
580 /* pages sent through this channel */
582 /* syncs main thread and channels */
583 QemuSemaphore sem_sync;
587 /* this fields are not changed once the thread is created */
590 /* channel thread name */
592 /* channel thread id */
594 /* communication channel */
596 /* sem where to wait for more work */
598 /* this mutex protects the following parameters */
600 /* is this channel thread running */
602 /* should this thread finish */
604 /* thread has work to do */
606 /* array of pages to receive */
607 MultiFDPages_t *pages;
608 /* packet allocated len */
610 /* pointer to the packet */
611 MultiFDPacket_t *packet;
612 /* multifd flags for each packet */
614 /* global number of generated multifd packets */
616 /* thread local variables */
617 /* packets sent through this channel */
618 uint64_t num_packets;
619 /* pages sent through this channel */
621 /* syncs main thread and channels */
622 QemuSemaphore sem_sync;
625 static int multifd_send_initial_packet(MultiFDSendParams *p, Error **errp)
630 msg.magic = cpu_to_be32(MULTIFD_MAGIC);
631 msg.version = cpu_to_be32(MULTIFD_VERSION);
633 memcpy(msg.uuid, &qemu_uuid.data, sizeof(msg.uuid));
635 ret = qio_channel_write_all(p->c, (char *)&msg, sizeof(msg), errp);
642 static int multifd_recv_initial_packet(QIOChannel *c, Error **errp)
647 ret = qio_channel_read_all(c, (char *)&msg, sizeof(msg), errp);
652 be32_to_cpus(&msg.magic);
653 be32_to_cpus(&msg.version);
655 if (msg.magic != MULTIFD_MAGIC) {
656 error_setg(errp, "multifd: received packet magic %x "
657 "expected %x", msg.magic, MULTIFD_MAGIC);
661 if (msg.version != MULTIFD_VERSION) {
662 error_setg(errp, "multifd: received packet version %d "
663 "expected %d", msg.version, MULTIFD_VERSION);
667 if (memcmp(msg.uuid, &qemu_uuid, sizeof(qemu_uuid))) {
668 char *uuid = qemu_uuid_unparse_strdup(&qemu_uuid);
669 char *msg_uuid = qemu_uuid_unparse_strdup((const QemuUUID *)msg.uuid);
671 error_setg(errp, "multifd: received uuid '%s' and expected "
672 "uuid '%s' for channel %hhd", msg_uuid, uuid, msg.id);
678 if (msg.id > migrate_multifd_channels()) {
679 error_setg(errp, "multifd: received channel version %d "
680 "expected %d", msg.version, MULTIFD_VERSION);
687 static MultiFDPages_t *multifd_pages_init(size_t size)
689 MultiFDPages_t *pages = g_new0(MultiFDPages_t, 1);
691 pages->allocated = size;
692 pages->iov = g_new0(struct iovec, size);
693 pages->offset = g_new0(ram_addr_t, size);
698 static void multifd_pages_clear(MultiFDPages_t *pages)
701 pages->allocated = 0;
702 pages->packet_num = 0;
706 g_free(pages->offset);
707 pages->offset = NULL;
711 static void multifd_send_fill_packet(MultiFDSendParams *p)
713 MultiFDPacket_t *packet = p->packet;
716 packet->magic = cpu_to_be32(MULTIFD_MAGIC);
717 packet->version = cpu_to_be32(MULTIFD_VERSION);
718 packet->flags = cpu_to_be32(p->flags);
719 packet->size = cpu_to_be32(migrate_multifd_page_count());
720 packet->used = cpu_to_be32(p->pages->used);
721 packet->packet_num = cpu_to_be64(p->packet_num);
723 if (p->pages->block) {
724 strncpy(packet->ramblock, p->pages->block->idstr, 256);
727 for (i = 0; i < p->pages->used; i++) {
728 packet->offset[i] = cpu_to_be64(p->pages->offset[i]);
732 static int multifd_recv_unfill_packet(MultiFDRecvParams *p, Error **errp)
734 MultiFDPacket_t *packet = p->packet;
738 /* ToDo: We can't use it until we haven't received a message */
741 be32_to_cpus(&packet->magic);
742 if (packet->magic != MULTIFD_MAGIC) {
743 error_setg(errp, "multifd: received packet "
744 "magic %x and expected magic %x",
745 packet->magic, MULTIFD_MAGIC);
749 be32_to_cpus(&packet->version);
750 if (packet->version != MULTIFD_VERSION) {
751 error_setg(errp, "multifd: received packet "
752 "version %d and expected version %d",
753 packet->version, MULTIFD_VERSION);
757 p->flags = be32_to_cpu(packet->flags);
759 be32_to_cpus(&packet->size);
760 if (packet->size > migrate_multifd_page_count()) {
761 error_setg(errp, "multifd: received packet "
762 "with size %d and expected maximum size %d",
763 packet->size, migrate_multifd_page_count()) ;
767 p->pages->used = be32_to_cpu(packet->used);
768 if (p->pages->used > packet->size) {
769 error_setg(errp, "multifd: received packet "
770 "with size %d and expected maximum size %d",
771 p->pages->used, packet->size) ;
775 p->packet_num = be64_to_cpu(packet->packet_num);
777 if (p->pages->used) {
778 /* make sure that ramblock is 0 terminated */
779 packet->ramblock[255] = 0;
780 block = qemu_ram_block_by_name(packet->ramblock);
782 error_setg(errp, "multifd: unknown ram block %s",
788 for (i = 0; i < p->pages->used; i++) {
789 ram_addr_t offset = be64_to_cpu(packet->offset[i]);
791 if (offset > (block->used_length - TARGET_PAGE_SIZE)) {
792 error_setg(errp, "multifd: offset too long " RAM_ADDR_FMT
793 " (max " RAM_ADDR_FMT ")",
794 offset, block->max_length);
797 p->pages->iov[i].iov_base = block->host + offset;
798 p->pages->iov[i].iov_len = TARGET_PAGE_SIZE;
805 MultiFDSendParams *params;
806 /* number of created threads */
808 /* array of pages to sent */
809 MultiFDPages_t *pages;
810 /* syncs main thread and channels */
811 QemuSemaphore sem_sync;
812 /* global number of generated multifd packets */
814 } *multifd_send_state;
816 static void multifd_send_terminate_threads(Error *err)
821 MigrationState *s = migrate_get_current();
822 migrate_set_error(s, err);
823 if (s->state == MIGRATION_STATUS_SETUP ||
824 s->state == MIGRATION_STATUS_PRE_SWITCHOVER ||
825 s->state == MIGRATION_STATUS_DEVICE ||
826 s->state == MIGRATION_STATUS_ACTIVE) {
827 migrate_set_state(&s->state, s->state,
828 MIGRATION_STATUS_FAILED);
832 for (i = 0; i < migrate_multifd_channels(); i++) {
833 MultiFDSendParams *p = &multifd_send_state->params[i];
835 qemu_mutex_lock(&p->mutex);
837 qemu_sem_post(&p->sem);
838 qemu_mutex_unlock(&p->mutex);
842 int multifd_save_cleanup(Error **errp)
847 if (!migrate_use_multifd()) {
850 multifd_send_terminate_threads(NULL);
851 for (i = 0; i < migrate_multifd_channels(); i++) {
852 MultiFDSendParams *p = &multifd_send_state->params[i];
855 qemu_thread_join(&p->thread);
857 socket_send_channel_destroy(p->c);
859 qemu_mutex_destroy(&p->mutex);
860 qemu_sem_destroy(&p->sem);
861 qemu_sem_destroy(&p->sem_sync);
864 multifd_pages_clear(p->pages);
870 qemu_sem_destroy(&multifd_send_state->sem_sync);
871 g_free(multifd_send_state->params);
872 multifd_send_state->params = NULL;
873 multifd_pages_clear(multifd_send_state->pages);
874 multifd_send_state->pages = NULL;
875 g_free(multifd_send_state);
876 multifd_send_state = NULL;
880 static void multifd_send_sync_main(void)
884 if (!migrate_use_multifd()) {
887 for (i = 0; i < migrate_multifd_channels(); i++) {
888 MultiFDSendParams *p = &multifd_send_state->params[i];
890 trace_multifd_send_sync_main_signal(p->id);
892 qemu_mutex_lock(&p->mutex);
893 p->flags |= MULTIFD_FLAG_SYNC;
895 qemu_mutex_unlock(&p->mutex);
896 qemu_sem_post(&p->sem);
898 for (i = 0; i < migrate_multifd_channels(); i++) {
899 MultiFDSendParams *p = &multifd_send_state->params[i];
901 trace_multifd_send_sync_main_wait(p->id);
902 qemu_sem_wait(&multifd_send_state->sem_sync);
904 trace_multifd_send_sync_main(multifd_send_state->packet_num);
907 static void *multifd_send_thread(void *opaque)
909 MultiFDSendParams *p = opaque;
910 Error *local_err = NULL;
912 trace_multifd_send_thread_start(p->id);
914 if (multifd_send_initial_packet(p, &local_err) < 0) {
921 qemu_sem_wait(&p->sem);
922 qemu_mutex_lock(&p->mutex);
924 if (p->pending_job) {
925 uint32_t used = p->pages->used;
926 uint64_t packet_num = p->packet_num;
927 uint32_t flags = p->flags;
929 multifd_send_fill_packet(p);
932 p->num_pages += used;
934 qemu_mutex_unlock(&p->mutex);
936 trace_multifd_send(p->id, packet_num, used, flags);
938 /* ToDo: send packet here */
940 qemu_mutex_lock(&p->mutex);
942 qemu_mutex_unlock(&p->mutex);
944 if (flags & MULTIFD_FLAG_SYNC) {
945 qemu_sem_post(&multifd_send_state->sem_sync);
947 } else if (p->quit) {
948 qemu_mutex_unlock(&p->mutex);
951 qemu_mutex_unlock(&p->mutex);
952 /* sometimes there are spurious wakeups */
958 multifd_send_terminate_threads(local_err);
961 qemu_mutex_lock(&p->mutex);
963 qemu_mutex_unlock(&p->mutex);
965 trace_multifd_send_thread_end(p->id, p->num_packets, p->num_pages);
970 static void multifd_new_send_channel_async(QIOTask *task, gpointer opaque)
972 MultiFDSendParams *p = opaque;
973 QIOChannel *sioc = QIO_CHANNEL(qio_task_get_source(task));
974 Error *local_err = NULL;
976 if (qio_task_propagate_error(task, &local_err)) {
977 if (multifd_save_cleanup(&local_err) != 0) {
978 migrate_set_error(migrate_get_current(), local_err);
981 p->c = QIO_CHANNEL(sioc);
982 qio_channel_set_delay(p->c, false);
984 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
985 QEMU_THREAD_JOINABLE);
987 atomic_inc(&multifd_send_state->count);
991 int multifd_save_setup(void)
994 uint32_t page_count = migrate_multifd_page_count();
997 if (!migrate_use_multifd()) {
1000 thread_count = migrate_multifd_channels();
1001 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
1002 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
1003 atomic_set(&multifd_send_state->count, 0);
1004 multifd_send_state->pages = multifd_pages_init(page_count);
1005 qemu_sem_init(&multifd_send_state->sem_sync, 0);
1007 for (i = 0; i < thread_count; i++) {
1008 MultiFDSendParams *p = &multifd_send_state->params[i];
1010 qemu_mutex_init(&p->mutex);
1011 qemu_sem_init(&p->sem, 0);
1012 qemu_sem_init(&p->sem_sync, 0);
1016 p->pages = multifd_pages_init(page_count);
1017 p->packet_len = sizeof(MultiFDPacket_t)
1018 + sizeof(ram_addr_t) * page_count;
1019 p->packet = g_malloc0(p->packet_len);
1020 p->name = g_strdup_printf("multifdsend_%d", i);
1021 socket_send_channel_create(multifd_new_send_channel_async, p);
1027 MultiFDRecvParams *params;
1028 /* number of created threads */
1030 /* syncs main thread and channels */
1031 QemuSemaphore sem_sync;
1032 /* global number of generated multifd packets */
1033 uint64_t packet_num;
1034 } *multifd_recv_state;
1036 static void multifd_recv_terminate_threads(Error *err)
1041 MigrationState *s = migrate_get_current();
1042 migrate_set_error(s, err);
1043 if (s->state == MIGRATION_STATUS_SETUP ||
1044 s->state == MIGRATION_STATUS_ACTIVE) {
1045 migrate_set_state(&s->state, s->state,
1046 MIGRATION_STATUS_FAILED);
1050 for (i = 0; i < migrate_multifd_channels(); i++) {
1051 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1053 qemu_mutex_lock(&p->mutex);
1055 qemu_sem_post(&p->sem);
1056 qemu_mutex_unlock(&p->mutex);
1060 int multifd_load_cleanup(Error **errp)
1065 if (!migrate_use_multifd()) {
1068 multifd_recv_terminate_threads(NULL);
1069 for (i = 0; i < migrate_multifd_channels(); i++) {
1070 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1073 qemu_thread_join(&p->thread);
1075 object_unref(OBJECT(p->c));
1077 qemu_mutex_destroy(&p->mutex);
1078 qemu_sem_destroy(&p->sem);
1079 qemu_sem_destroy(&p->sem_sync);
1082 multifd_pages_clear(p->pages);
1088 qemu_sem_destroy(&multifd_recv_state->sem_sync);
1089 g_free(multifd_recv_state->params);
1090 multifd_recv_state->params = NULL;
1091 g_free(multifd_recv_state);
1092 multifd_recv_state = NULL;
1097 static void multifd_recv_sync_main(void)
1101 if (!migrate_use_multifd()) {
1104 for (i = 0; i < migrate_multifd_channels(); i++) {
1105 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1107 trace_multifd_recv_sync_main_signal(p->id);
1108 qemu_mutex_lock(&p->mutex);
1109 p->pending_job = true;
1110 qemu_mutex_unlock(&p->mutex);
1112 for (i = 0; i < migrate_multifd_channels(); i++) {
1113 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1115 trace_multifd_recv_sync_main_wait(p->id);
1116 qemu_sem_wait(&multifd_recv_state->sem_sync);
1117 qemu_mutex_lock(&p->mutex);
1118 if (multifd_recv_state->packet_num < p->packet_num) {
1119 multifd_recv_state->packet_num = p->packet_num;
1121 qemu_mutex_unlock(&p->mutex);
1123 for (i = 0; i < migrate_multifd_channels(); i++) {
1124 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1126 trace_multifd_recv_sync_main_signal(p->id);
1128 qemu_sem_post(&p->sem_sync);
1130 trace_multifd_recv_sync_main(multifd_recv_state->packet_num);
1133 static void *multifd_recv_thread(void *opaque)
1135 MultiFDRecvParams *p = opaque;
1136 Error *local_err = NULL;
1139 trace_multifd_recv_thread_start(p->id);
1145 /* ToDo: recv packet here */
1147 qemu_mutex_lock(&p->mutex);
1148 ret = multifd_recv_unfill_packet(p, &local_err);
1150 qemu_mutex_unlock(&p->mutex);
1154 used = p->pages->used;
1156 trace_multifd_recv(p->id, p->packet_num, used, flags);
1157 p->pending_job = false;
1159 p->num_pages += used;
1160 qemu_mutex_unlock(&p->mutex);
1162 if (flags & MULTIFD_FLAG_SYNC) {
1163 qemu_sem_post(&multifd_recv_state->sem_sync);
1164 qemu_sem_wait(&p->sem_sync);
1169 multifd_recv_terminate_threads(local_err);
1171 qemu_mutex_lock(&p->mutex);
1173 qemu_mutex_unlock(&p->mutex);
1175 trace_multifd_recv_thread_end(p->id, p->num_packets, p->num_pages);
1180 int multifd_load_setup(void)
1183 uint32_t page_count = migrate_multifd_page_count();
1186 if (!migrate_use_multifd()) {
1189 thread_count = migrate_multifd_channels();
1190 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
1191 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
1192 atomic_set(&multifd_recv_state->count, 0);
1193 qemu_sem_init(&multifd_recv_state->sem_sync, 0);
1195 for (i = 0; i < thread_count; i++) {
1196 MultiFDRecvParams *p = &multifd_recv_state->params[i];
1198 qemu_mutex_init(&p->mutex);
1199 qemu_sem_init(&p->sem, 0);
1200 qemu_sem_init(&p->sem_sync, 0);
1202 p->pending_job = false;
1204 p->pages = multifd_pages_init(page_count);
1205 p->packet_len = sizeof(MultiFDPacket_t)
1206 + sizeof(ram_addr_t) * page_count;
1207 p->packet = g_malloc0(p->packet_len);
1208 p->name = g_strdup_printf("multifdrecv_%d", i);
1213 bool multifd_recv_all_channels_created(void)
1215 int thread_count = migrate_multifd_channels();
1217 if (!migrate_use_multifd()) {
1221 return thread_count == atomic_read(&multifd_recv_state->count);
1224 void multifd_recv_new_channel(QIOChannel *ioc)
1226 MultiFDRecvParams *p;
1227 Error *local_err = NULL;
1230 id = multifd_recv_initial_packet(ioc, &local_err);
1232 multifd_recv_terminate_threads(local_err);
1236 p = &multifd_recv_state->params[id];
1238 error_setg(&local_err, "multifd: received id '%d' already setup'",
1240 multifd_recv_terminate_threads(local_err);
1244 object_ref(OBJECT(ioc));
1245 /* initial packet */
1249 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
1250 QEMU_THREAD_JOINABLE);
1251 atomic_inc(&multifd_recv_state->count);
1252 if (multifd_recv_state->count == migrate_multifd_channels()) {
1253 migration_incoming_process();
1258 * save_page_header: write page header to wire
1260 * If this is the 1st block, it also writes the block identification
1262 * Returns the number of bytes written
1264 * @f: QEMUFile where to send the data
1265 * @block: block that contains the page we want to send
1266 * @offset: offset inside the block for the page
1267 * in the lower bits, it contains flags
1269 static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
1274 if (block == rs->last_sent_block) {
1275 offset |= RAM_SAVE_FLAG_CONTINUE;
1277 qemu_put_be64(f, offset);
1280 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
1281 len = strlen(block->idstr);
1282 qemu_put_byte(f, len);
1283 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
1285 rs->last_sent_block = block;
1291 * mig_throttle_guest_down: throotle down the guest
1293 * Reduce amount of guest cpu execution to hopefully slow down memory
1294 * writes. If guest dirty memory rate is reduced below the rate at
1295 * which we can transfer pages to the destination then we should be
1296 * able to complete migration. Some workloads dirty memory way too
1297 * fast and will not effectively converge, even with auto-converge.
1299 static void mig_throttle_guest_down(void)
1301 MigrationState *s = migrate_get_current();
1302 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
1303 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
1305 /* We have not started throttling yet. Let's start it. */
1306 if (!cpu_throttle_active()) {
1307 cpu_throttle_set(pct_initial);
1309 /* Throttling already on, just increase the rate */
1310 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
1315 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
1317 * @rs: current RAM state
1318 * @current_addr: address for the zero page
1320 * Update the xbzrle cache to reflect a page that's been sent as all 0.
1321 * The important thing is that a stale (not-yet-0'd) page be replaced
1323 * As a bonus, if the page wasn't in the cache it gets added so that
1324 * when a small write is made into the 0'd page it gets XBZRLE sent.
1326 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
1328 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
1332 /* We don't care if this fails to allocate a new cache page
1333 * as long as it updated an old one */
1334 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
1335 ram_counters.dirty_sync_count);
1338 #define ENCODING_FLAG_XBZRLE 0x1
1341 * save_xbzrle_page: compress and send current page
1343 * Returns: 1 means that we wrote the page
1344 * 0 means that page is identical to the one already sent
1345 * -1 means that xbzrle would be longer than normal
1347 * @rs: current RAM state
1348 * @current_data: pointer to the address of the page contents
1349 * @current_addr: addr of the page
1350 * @block: block that contains the page we want to send
1351 * @offset: offset inside the block for the page
1352 * @last_stage: if we are at the completion stage
1354 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
1355 ram_addr_t current_addr, RAMBlock *block,
1356 ram_addr_t offset, bool last_stage)
1358 int encoded_len = 0, bytes_xbzrle;
1359 uint8_t *prev_cached_page;
1361 if (!cache_is_cached(XBZRLE.cache, current_addr,
1362 ram_counters.dirty_sync_count)) {
1363 xbzrle_counters.cache_miss++;
1365 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
1366 ram_counters.dirty_sync_count) == -1) {
1369 /* update *current_data when the page has been
1370 inserted into cache */
1371 *current_data = get_cached_data(XBZRLE.cache, current_addr);
1377 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
1379 /* save current buffer into memory */
1380 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
1382 /* XBZRLE encoding (if there is no overflow) */
1383 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
1384 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
1386 if (encoded_len == 0) {
1387 trace_save_xbzrle_page_skipping();
1389 } else if (encoded_len == -1) {
1390 trace_save_xbzrle_page_overflow();
1391 xbzrle_counters.overflow++;
1392 /* update data in the cache */
1394 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
1395 *current_data = prev_cached_page;
1400 /* we need to update the data in the cache, in order to get the same data */
1402 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
1405 /* Send XBZRLE based compressed page */
1406 bytes_xbzrle = save_page_header(rs, rs->f, block,
1407 offset | RAM_SAVE_FLAG_XBZRLE);
1408 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
1409 qemu_put_be16(rs->f, encoded_len);
1410 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
1411 bytes_xbzrle += encoded_len + 1 + 2;
1412 xbzrle_counters.pages++;
1413 xbzrle_counters.bytes += bytes_xbzrle;
1414 ram_counters.transferred += bytes_xbzrle;
1420 * migration_bitmap_find_dirty: find the next dirty page from start
1422 * Called with rcu_read_lock() to protect migration_bitmap
1424 * Returns the byte offset within memory region of the start of a dirty page
1426 * @rs: current RAM state
1427 * @rb: RAMBlock where to search for dirty pages
1428 * @start: page where we start the search
1431 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
1432 unsigned long start)
1434 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
1435 unsigned long *bitmap = rb->bmap;
1438 if (!qemu_ram_is_migratable(rb)) {
1442 if (rs->ram_bulk_stage && start > 0) {
1445 next = find_next_bit(bitmap, size, start);
1451 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
1457 ret = test_and_clear_bit(page, rb->bmap);
1460 rs->migration_dirty_pages--;
1465 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
1466 ram_addr_t start, ram_addr_t length)
1468 rs->migration_dirty_pages +=
1469 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
1470 &rs->num_dirty_pages_period);
1474 * ram_pagesize_summary: calculate all the pagesizes of a VM
1476 * Returns a summary bitmap of the page sizes of all RAMBlocks
1478 * For VMs with just normal pages this is equivalent to the host page
1479 * size. If it's got some huge pages then it's the OR of all the
1480 * different page sizes.
1482 uint64_t ram_pagesize_summary(void)
1485 uint64_t summary = 0;
1487 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1488 summary |= block->page_size;
1494 static void migration_update_rates(RAMState *rs, int64_t end_time)
1496 uint64_t iter_count = rs->iterations - rs->iterations_prev;
1498 /* calculate period counters */
1499 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
1500 / (end_time - rs->time_last_bitmap_sync);
1506 if (migrate_use_xbzrle()) {
1507 xbzrle_counters.cache_miss_rate = (double)(xbzrle_counters.cache_miss -
1508 rs->xbzrle_cache_miss_prev) / iter_count;
1509 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
1513 static void migration_bitmap_sync(RAMState *rs)
1517 uint64_t bytes_xfer_now;
1519 ram_counters.dirty_sync_count++;
1521 if (!rs->time_last_bitmap_sync) {
1522 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1525 trace_migration_bitmap_sync_start();
1526 memory_global_dirty_log_sync();
1528 qemu_mutex_lock(&rs->bitmap_mutex);
1530 RAMBLOCK_FOREACH_MIGRATABLE(block) {
1531 migration_bitmap_sync_range(rs, block, 0, block->used_length);
1533 ram_counters.remaining = ram_bytes_remaining();
1535 qemu_mutex_unlock(&rs->bitmap_mutex);
1537 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1539 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
1541 /* more than 1 second = 1000 millisecons */
1542 if (end_time > rs->time_last_bitmap_sync + 1000) {
1543 bytes_xfer_now = ram_counters.transferred;
1545 /* During block migration the auto-converge logic incorrectly detects
1546 * that ram migration makes no progress. Avoid this by disabling the
1547 * throttling logic during the bulk phase of block migration. */
1548 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
1549 /* The following detection logic can be refined later. For now:
1550 Check to see if the dirtied bytes is 50% more than the approx.
1551 amount of bytes that just got transferred since the last time we
1552 were in this routine. If that happens twice, start or increase
1555 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
1556 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
1557 (++rs->dirty_rate_high_cnt >= 2)) {
1558 trace_migration_throttle();
1559 rs->dirty_rate_high_cnt = 0;
1560 mig_throttle_guest_down();
1564 migration_update_rates(rs, end_time);
1566 rs->iterations_prev = rs->iterations;
1568 /* reset period counters */
1569 rs->time_last_bitmap_sync = end_time;
1570 rs->num_dirty_pages_period = 0;
1571 rs->bytes_xfer_prev = bytes_xfer_now;
1573 if (migrate_use_events()) {
1574 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
1579 * save_zero_page: send the zero page to the stream
1581 * Returns the number of pages written.
1583 * @rs: current RAM state
1584 * @block: block that contains the page we want to send
1585 * @offset: offset inside the block for the page
1587 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
1589 uint8_t *p = block->host + offset;
1592 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
1593 ram_counters.duplicate++;
1594 ram_counters.transferred +=
1595 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
1596 qemu_put_byte(rs->f, 0);
1597 ram_counters.transferred += 1;
1604 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
1606 if (!migrate_release_ram() || !migration_in_postcopy()) {
1610 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
1614 * @pages: the number of pages written by the control path,
1616 * > 0 - number of pages written
1618 * Return true if the pages has been saved, otherwise false is returned.
1620 static bool control_save_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1623 uint64_t bytes_xmit = 0;
1627 ret = ram_control_save_page(rs->f, block->offset, offset, TARGET_PAGE_SIZE,
1629 if (ret == RAM_SAVE_CONTROL_NOT_SUPP) {
1634 ram_counters.transferred += bytes_xmit;
1638 if (ret == RAM_SAVE_CONTROL_DELAYED) {
1642 if (bytes_xmit > 0) {
1643 ram_counters.normal++;
1644 } else if (bytes_xmit == 0) {
1645 ram_counters.duplicate++;
1652 * directly send the page to the stream
1654 * Returns the number of pages written.
1656 * @rs: current RAM state
1657 * @block: block that contains the page we want to send
1658 * @offset: offset inside the block for the page
1659 * @buf: the page to be sent
1660 * @async: send to page asyncly
1662 static int save_normal_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
1663 uint8_t *buf, bool async)
1665 ram_counters.transferred += save_page_header(rs, rs->f, block,
1666 offset | RAM_SAVE_FLAG_PAGE);
1668 qemu_put_buffer_async(rs->f, buf, TARGET_PAGE_SIZE,
1669 migrate_release_ram() &
1670 migration_in_postcopy());
1672 qemu_put_buffer(rs->f, buf, TARGET_PAGE_SIZE);
1674 ram_counters.transferred += TARGET_PAGE_SIZE;
1675 ram_counters.normal++;
1680 * ram_save_page: send the given page to the stream
1682 * Returns the number of pages written.
1684 * >=0 - Number of pages written - this might legally be 0
1685 * if xbzrle noticed the page was the same.
1687 * @rs: current RAM state
1688 * @block: block that contains the page we want to send
1689 * @offset: offset inside the block for the page
1690 * @last_stage: if we are at the completion stage
1692 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
1696 bool send_async = true;
1697 RAMBlock *block = pss->block;
1698 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
1699 ram_addr_t current_addr = block->offset + offset;
1701 p = block->host + offset;
1702 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
1704 XBZRLE_cache_lock();
1705 if (!rs->ram_bulk_stage && !migration_in_postcopy() &&
1706 migrate_use_xbzrle()) {
1707 pages = save_xbzrle_page(rs, &p, current_addr, block,
1708 offset, last_stage);
1710 /* Can't send this cached data async, since the cache page
1711 * might get updated before it gets to the wire
1717 /* XBZRLE overflow or normal page */
1719 pages = save_normal_page(rs, block, offset, p, send_async);
1722 XBZRLE_cache_unlock();
1727 static int do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block,
1728 ram_addr_t offset, uint8_t *source_buf)
1730 RAMState *rs = ram_state;
1731 int bytes_sent, blen;
1732 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
1734 bytes_sent = save_page_header(rs, f, block, offset |
1735 RAM_SAVE_FLAG_COMPRESS_PAGE);
1738 * copy it to a internal buffer to avoid it being modified by VM
1739 * so that we can catch up the error during compression and
1742 memcpy(source_buf, p, TARGET_PAGE_SIZE);
1743 blen = qemu_put_compression_data(f, stream, source_buf, TARGET_PAGE_SIZE);
1746 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1747 error_report("compressed data failed!");
1750 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
1756 static void flush_compressed_data(RAMState *rs)
1758 int idx, len, thread_count;
1760 if (!migrate_use_compression()) {
1763 thread_count = migrate_compress_threads();
1765 qemu_mutex_lock(&comp_done_lock);
1766 for (idx = 0; idx < thread_count; idx++) {
1767 while (!comp_param[idx].done) {
1768 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1771 qemu_mutex_unlock(&comp_done_lock);
1773 for (idx = 0; idx < thread_count; idx++) {
1774 qemu_mutex_lock(&comp_param[idx].mutex);
1775 if (!comp_param[idx].quit) {
1776 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1777 ram_counters.transferred += len;
1779 qemu_mutex_unlock(&comp_param[idx].mutex);
1783 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1786 param->block = block;
1787 param->offset = offset;
1790 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1793 int idx, thread_count, bytes_xmit = -1, pages = -1;
1795 thread_count = migrate_compress_threads();
1796 qemu_mutex_lock(&comp_done_lock);
1798 for (idx = 0; idx < thread_count; idx++) {
1799 if (comp_param[idx].done) {
1800 comp_param[idx].done = false;
1801 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
1802 qemu_mutex_lock(&comp_param[idx].mutex);
1803 set_compress_params(&comp_param[idx], block, offset);
1804 qemu_cond_signal(&comp_param[idx].cond);
1805 qemu_mutex_unlock(&comp_param[idx].mutex);
1807 ram_counters.normal++;
1808 ram_counters.transferred += bytes_xmit;
1815 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
1818 qemu_mutex_unlock(&comp_done_lock);
1824 * find_dirty_block: find the next dirty page and update any state
1825 * associated with the search process.
1827 * Returns if a page is found
1829 * @rs: current RAM state
1830 * @pss: data about the state of the current dirty page scan
1831 * @again: set to false if the search has scanned the whole of RAM
1833 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
1835 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
1836 if (pss->complete_round && pss->block == rs->last_seen_block &&
1837 pss->page >= rs->last_page) {
1839 * We've been once around the RAM and haven't found anything.
1845 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1846 /* Didn't find anything in this RAM Block */
1848 pss->block = QLIST_NEXT_RCU(pss->block, next);
1850 /* Hit the end of the list */
1851 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1852 /* Flag that we've looped */
1853 pss->complete_round = true;
1854 rs->ram_bulk_stage = false;
1855 if (migrate_use_xbzrle()) {
1856 /* If xbzrle is on, stop using the data compression at this
1857 * point. In theory, xbzrle can do better than compression.
1859 flush_compressed_data(rs);
1862 /* Didn't find anything this time, but try again on the new block */
1866 /* Can go around again, but... */
1868 /* We've found something so probably don't need to */
1874 * unqueue_page: gets a page of the queue
1876 * Helper for 'get_queued_page' - gets a page off the queue
1878 * Returns the block of the page (or NULL if none available)
1880 * @rs: current RAM state
1881 * @offset: used to return the offset within the RAMBlock
1883 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
1885 RAMBlock *block = NULL;
1887 qemu_mutex_lock(&rs->src_page_req_mutex);
1888 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1889 struct RAMSrcPageRequest *entry =
1890 QSIMPLEQ_FIRST(&rs->src_page_requests);
1892 *offset = entry->offset;
1894 if (entry->len > TARGET_PAGE_SIZE) {
1895 entry->len -= TARGET_PAGE_SIZE;
1896 entry->offset += TARGET_PAGE_SIZE;
1898 memory_region_unref(block->mr);
1899 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1901 migration_consume_urgent_request();
1904 qemu_mutex_unlock(&rs->src_page_req_mutex);
1910 * get_queued_page: unqueue a page from the postocpy requests
1912 * Skips pages that are already sent (!dirty)
1914 * Returns if a queued page is found
1916 * @rs: current RAM state
1917 * @pss: data about the state of the current dirty page scan
1919 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
1926 block = unqueue_page(rs, &offset);
1928 * We're sending this page, and since it's postcopy nothing else
1929 * will dirty it, and we must make sure it doesn't get sent again
1930 * even if this queue request was received after the background
1931 * search already sent it.
1936 page = offset >> TARGET_PAGE_BITS;
1937 dirty = test_bit(page, block->bmap);
1939 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1940 page, test_bit(page, block->unsentmap));
1942 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
1946 } while (block && !dirty);
1950 * As soon as we start servicing pages out of order, then we have
1951 * to kill the bulk stage, since the bulk stage assumes
1952 * in (migration_bitmap_find_and_reset_dirty) that every page is
1953 * dirty, that's no longer true.
1955 rs->ram_bulk_stage = false;
1958 * We want the background search to continue from the queued page
1959 * since the guest is likely to want other pages near to the page
1960 * it just requested.
1963 pss->page = offset >> TARGET_PAGE_BITS;
1970 * migration_page_queue_free: drop any remaining pages in the ram
1973 * It should be empty at the end anyway, but in error cases there may
1974 * be some left. in case that there is any page left, we drop it.
1977 static void migration_page_queue_free(RAMState *rs)
1979 struct RAMSrcPageRequest *mspr, *next_mspr;
1980 /* This queue generally should be empty - but in the case of a failed
1981 * migration might have some droppings in.
1984 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1985 memory_region_unref(mspr->rb->mr);
1986 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1993 * ram_save_queue_pages: queue the page for transmission
1995 * A request from postcopy destination for example.
1997 * Returns zero on success or negative on error
1999 * @rbname: Name of the RAMBLock of the request. NULL means the
2000 * same that last one.
2001 * @start: starting address from the start of the RAMBlock
2002 * @len: length (in bytes) to send
2004 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
2007 RAMState *rs = ram_state;
2009 ram_counters.postcopy_requests++;
2012 /* Reuse last RAMBlock */
2013 ramblock = rs->last_req_rb;
2017 * Shouldn't happen, we can't reuse the last RAMBlock if
2018 * it's the 1st request.
2020 error_report("ram_save_queue_pages no previous block");
2024 ramblock = qemu_ram_block_by_name(rbname);
2027 /* We shouldn't be asked for a non-existent RAMBlock */
2028 error_report("ram_save_queue_pages no block '%s'", rbname);
2031 rs->last_req_rb = ramblock;
2033 trace_ram_save_queue_pages(ramblock->idstr, start, len);
2034 if (start+len > ramblock->used_length) {
2035 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
2036 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
2037 __func__, start, len, ramblock->used_length);
2041 struct RAMSrcPageRequest *new_entry =
2042 g_malloc0(sizeof(struct RAMSrcPageRequest));
2043 new_entry->rb = ramblock;
2044 new_entry->offset = start;
2045 new_entry->len = len;
2047 memory_region_ref(ramblock->mr);
2048 qemu_mutex_lock(&rs->src_page_req_mutex);
2049 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
2050 migration_make_urgent_request();
2051 qemu_mutex_unlock(&rs->src_page_req_mutex);
2061 static bool save_page_use_compression(RAMState *rs)
2063 if (!migrate_use_compression()) {
2068 * If xbzrle is on, stop using the data compression after first
2069 * round of migration even if compression is enabled. In theory,
2070 * xbzrle can do better than compression.
2072 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
2080 * ram_save_target_page: save one target page
2082 * Returns the number of pages written
2084 * @rs: current RAM state
2085 * @pss: data about the page we want to send
2086 * @last_stage: if we are at the completion stage
2088 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
2091 RAMBlock *block = pss->block;
2092 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
2095 if (control_save_page(rs, block, offset, &res)) {
2100 * When starting the process of a new block, the first page of
2101 * the block should be sent out before other pages in the same
2102 * block, and all the pages in last block should have been sent
2103 * out, keeping this order is important, because the 'cont' flag
2104 * is used to avoid resending the block name.
2106 if (block != rs->last_sent_block && save_page_use_compression(rs)) {
2107 flush_compressed_data(rs);
2110 res = save_zero_page(rs, block, offset);
2112 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
2113 * page would be stale
2115 if (!save_page_use_compression(rs)) {
2116 XBZRLE_cache_lock();
2117 xbzrle_cache_zero_page(rs, block->offset + offset);
2118 XBZRLE_cache_unlock();
2120 ram_release_pages(block->idstr, offset, res);
2125 * Make sure the first page is sent out before other pages.
2127 * we post it as normal page as compression will take much
2130 if (block == rs->last_sent_block && save_page_use_compression(rs)) {
2131 return compress_page_with_multi_thread(rs, block, offset);
2134 return ram_save_page(rs, pss, last_stage);
2138 * ram_save_host_page: save a whole host page
2140 * Starting at *offset send pages up to the end of the current host
2141 * page. It's valid for the initial offset to point into the middle of
2142 * a host page in which case the remainder of the hostpage is sent.
2143 * Only dirty target pages are sent. Note that the host page size may
2144 * be a huge page for this block.
2145 * The saving stops at the boundary of the used_length of the block
2146 * if the RAMBlock isn't a multiple of the host page size.
2148 * Returns the number of pages written or negative on error
2150 * @rs: current RAM state
2151 * @ms: current migration state
2152 * @pss: data about the page we want to send
2153 * @last_stage: if we are at the completion stage
2155 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
2158 int tmppages, pages = 0;
2159 size_t pagesize_bits =
2160 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
2162 if (!qemu_ram_is_migratable(pss->block)) {
2163 error_report("block %s should not be migrated !", pss->block->idstr);
2168 /* Check the pages is dirty and if it is send it */
2169 if (!migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
2174 tmppages = ram_save_target_page(rs, pss, last_stage);
2180 if (pss->block->unsentmap) {
2181 clear_bit(pss->page, pss->block->unsentmap);
2185 } while ((pss->page & (pagesize_bits - 1)) &&
2186 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
2188 /* The offset we leave with is the last one we looked at */
2194 * ram_find_and_save_block: finds a dirty page and sends it to f
2196 * Called within an RCU critical section.
2198 * Returns the number of pages written where zero means no dirty pages
2200 * @rs: current RAM state
2201 * @last_stage: if we are at the completion stage
2203 * On systems where host-page-size > target-page-size it will send all the
2204 * pages in a host page that are dirty.
2207 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
2209 PageSearchStatus pss;
2213 /* No dirty page as there is zero RAM */
2214 if (!ram_bytes_total()) {
2218 pss.block = rs->last_seen_block;
2219 pss.page = rs->last_page;
2220 pss.complete_round = false;
2223 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
2228 found = get_queued_page(rs, &pss);
2231 /* priority queue empty, so just search for something dirty */
2232 found = find_dirty_block(rs, &pss, &again);
2236 pages = ram_save_host_page(rs, &pss, last_stage);
2238 } while (!pages && again);
2240 rs->last_seen_block = pss.block;
2241 rs->last_page = pss.page;
2246 void acct_update_position(QEMUFile *f, size_t size, bool zero)
2248 uint64_t pages = size / TARGET_PAGE_SIZE;
2251 ram_counters.duplicate += pages;
2253 ram_counters.normal += pages;
2254 ram_counters.transferred += size;
2255 qemu_update_position(f, size);
2259 uint64_t ram_bytes_total(void)
2265 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2266 total += block->used_length;
2272 static void xbzrle_load_setup(void)
2274 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2277 static void xbzrle_load_cleanup(void)
2279 g_free(XBZRLE.decoded_buf);
2280 XBZRLE.decoded_buf = NULL;
2283 static void ram_state_cleanup(RAMState **rsp)
2286 migration_page_queue_free(*rsp);
2287 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
2288 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
2294 static void xbzrle_cleanup(void)
2296 XBZRLE_cache_lock();
2298 cache_fini(XBZRLE.cache);
2299 g_free(XBZRLE.encoded_buf);
2300 g_free(XBZRLE.current_buf);
2301 g_free(XBZRLE.zero_target_page);
2302 XBZRLE.cache = NULL;
2303 XBZRLE.encoded_buf = NULL;
2304 XBZRLE.current_buf = NULL;
2305 XBZRLE.zero_target_page = NULL;
2307 XBZRLE_cache_unlock();
2310 static void ram_save_cleanup(void *opaque)
2312 RAMState **rsp = opaque;
2315 /* caller have hold iothread lock or is in a bh, so there is
2316 * no writing race against this migration_bitmap
2318 memory_global_dirty_log_stop();
2320 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2321 g_free(block->bmap);
2323 g_free(block->unsentmap);
2324 block->unsentmap = NULL;
2328 compress_threads_save_cleanup();
2329 ram_state_cleanup(rsp);
2332 static void ram_state_reset(RAMState *rs)
2334 rs->last_seen_block = NULL;
2335 rs->last_sent_block = NULL;
2337 rs->last_version = ram_list.version;
2338 rs->ram_bulk_stage = true;
2341 #define MAX_WAIT 50 /* ms, half buffered_file limit */
2344 * 'expected' is the value you expect the bitmap mostly to be full
2345 * of; it won't bother printing lines that are all this value.
2346 * If 'todump' is null the migration bitmap is dumped.
2348 void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
2349 unsigned long pages)
2352 int64_t linelen = 128;
2355 for (cur = 0; cur < pages; cur += linelen) {
2359 * Last line; catch the case where the line length
2360 * is longer than remaining ram
2362 if (cur + linelen > pages) {
2363 linelen = pages - cur;
2365 for (curb = 0; curb < linelen; curb++) {
2366 bool thisbit = test_bit(cur + curb, todump);
2367 linebuf[curb] = thisbit ? '1' : '.';
2368 found = found || (thisbit != expected);
2371 linebuf[curb] = '\0';
2372 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
2377 /* **** functions for postcopy ***** */
2379 void ram_postcopy_migrated_memory_release(MigrationState *ms)
2381 struct RAMBlock *block;
2383 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2384 unsigned long *bitmap = block->bmap;
2385 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
2386 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
2388 while (run_start < range) {
2389 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
2390 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
2391 (run_end - run_start) << TARGET_PAGE_BITS);
2392 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
2398 * postcopy_send_discard_bm_ram: discard a RAMBlock
2400 * Returns zero on success
2402 * Callback from postcopy_each_ram_send_discard for each RAMBlock
2403 * Note: At this point the 'unsentmap' is the processed bitmap combined
2404 * with the dirtymap; so a '1' means it's either dirty or unsent.
2406 * @ms: current migration state
2407 * @pds: state for postcopy
2408 * @start: RAMBlock starting page
2409 * @length: RAMBlock size
2411 static int postcopy_send_discard_bm_ram(MigrationState *ms,
2412 PostcopyDiscardState *pds,
2415 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
2416 unsigned long current;
2417 unsigned long *unsentmap = block->unsentmap;
2419 for (current = 0; current < end; ) {
2420 unsigned long one = find_next_bit(unsentmap, end, current);
2423 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
2424 unsigned long discard_length;
2427 discard_length = end - one;
2429 discard_length = zero - one;
2431 if (discard_length) {
2432 postcopy_discard_send_range(ms, pds, one, discard_length);
2434 current = one + discard_length;
2444 * postcopy_each_ram_send_discard: discard all RAMBlocks
2446 * Returns 0 for success or negative for error
2448 * Utility for the outgoing postcopy code.
2449 * Calls postcopy_send_discard_bm_ram for each RAMBlock
2450 * passing it bitmap indexes and name.
2451 * (qemu_ram_foreach_block ends up passing unscaled lengths
2452 * which would mean postcopy code would have to deal with target page)
2454 * @ms: current migration state
2456 static int postcopy_each_ram_send_discard(MigrationState *ms)
2458 struct RAMBlock *block;
2461 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2462 PostcopyDiscardState *pds =
2463 postcopy_discard_send_init(ms, block->idstr);
2466 * Postcopy sends chunks of bitmap over the wire, but it
2467 * just needs indexes at this point, avoids it having
2468 * target page specific code.
2470 ret = postcopy_send_discard_bm_ram(ms, pds, block);
2471 postcopy_discard_send_finish(ms, pds);
2481 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
2483 * Helper for postcopy_chunk_hostpages; it's called twice to
2484 * canonicalize the two bitmaps, that are similar, but one is
2487 * Postcopy requires that all target pages in a hostpage are dirty or
2488 * clean, not a mix. This function canonicalizes the bitmaps.
2490 * @ms: current migration state
2491 * @unsent_pass: if true we need to canonicalize partially unsent host pages
2492 * otherwise we need to canonicalize partially dirty host pages
2493 * @block: block that contains the page we want to canonicalize
2494 * @pds: state for postcopy
2496 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
2498 PostcopyDiscardState *pds)
2500 RAMState *rs = ram_state;
2501 unsigned long *bitmap = block->bmap;
2502 unsigned long *unsentmap = block->unsentmap;
2503 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
2504 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2505 unsigned long run_start;
2507 if (block->page_size == TARGET_PAGE_SIZE) {
2508 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
2513 /* Find a sent page */
2514 run_start = find_next_zero_bit(unsentmap, pages, 0);
2516 /* Find a dirty page */
2517 run_start = find_next_bit(bitmap, pages, 0);
2520 while (run_start < pages) {
2521 bool do_fixup = false;
2522 unsigned long fixup_start_addr;
2523 unsigned long host_offset;
2526 * If the start of this run of pages is in the middle of a host
2527 * page, then we need to fixup this host page.
2529 host_offset = run_start % host_ratio;
2532 run_start -= host_offset;
2533 fixup_start_addr = run_start;
2534 /* For the next pass */
2535 run_start = run_start + host_ratio;
2537 /* Find the end of this run */
2538 unsigned long run_end;
2540 run_end = find_next_bit(unsentmap, pages, run_start + 1);
2542 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
2545 * If the end isn't at the start of a host page, then the
2546 * run doesn't finish at the end of a host page
2547 * and we need to discard.
2549 host_offset = run_end % host_ratio;
2552 fixup_start_addr = run_end - host_offset;
2554 * This host page has gone, the next loop iteration starts
2555 * from after the fixup
2557 run_start = fixup_start_addr + host_ratio;
2560 * No discards on this iteration, next loop starts from
2561 * next sent/dirty page
2563 run_start = run_end + 1;
2570 /* Tell the destination to discard this page */
2571 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
2572 /* For the unsent_pass we:
2573 * discard partially sent pages
2574 * For the !unsent_pass (dirty) we:
2575 * discard partially dirty pages that were sent
2576 * (any partially sent pages were already discarded
2577 * by the previous unsent_pass)
2579 postcopy_discard_send_range(ms, pds, fixup_start_addr,
2583 /* Clean up the bitmap */
2584 for (page = fixup_start_addr;
2585 page < fixup_start_addr + host_ratio; page++) {
2586 /* All pages in this host page are now not sent */
2587 set_bit(page, unsentmap);
2590 * Remark them as dirty, updating the count for any pages
2591 * that weren't previously dirty.
2593 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
2598 /* Find the next sent page for the next iteration */
2599 run_start = find_next_zero_bit(unsentmap, pages, run_start);
2601 /* Find the next dirty page for the next iteration */
2602 run_start = find_next_bit(bitmap, pages, run_start);
2608 * postcopy_chuck_hostpages: discrad any partially sent host page
2610 * Utility for the outgoing postcopy code.
2612 * Discard any partially sent host-page size chunks, mark any partially
2613 * dirty host-page size chunks as all dirty. In this case the host-page
2614 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
2616 * Returns zero on success
2618 * @ms: current migration state
2619 * @block: block we want to work with
2621 static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
2623 PostcopyDiscardState *pds =
2624 postcopy_discard_send_init(ms, block->idstr);
2626 /* First pass: Discard all partially sent host pages */
2627 postcopy_chunk_hostpages_pass(ms, true, block, pds);
2629 * Second pass: Ensure that all partially dirty host pages are made
2632 postcopy_chunk_hostpages_pass(ms, false, block, pds);
2634 postcopy_discard_send_finish(ms, pds);
2639 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
2641 * Returns zero on success
2643 * Transmit the set of pages to be discarded after precopy to the target
2644 * these are pages that:
2645 * a) Have been previously transmitted but are now dirty again
2646 * b) Pages that have never been transmitted, this ensures that
2647 * any pages on the destination that have been mapped by background
2648 * tasks get discarded (transparent huge pages is the specific concern)
2649 * Hopefully this is pretty sparse
2651 * @ms: current migration state
2653 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
2655 RAMState *rs = ram_state;
2661 /* This should be our last sync, the src is now paused */
2662 migration_bitmap_sync(rs);
2664 /* Easiest way to make sure we don't resume in the middle of a host-page */
2665 rs->last_seen_block = NULL;
2666 rs->last_sent_block = NULL;
2669 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2670 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
2671 unsigned long *bitmap = block->bmap;
2672 unsigned long *unsentmap = block->unsentmap;
2675 /* We don't have a safe way to resize the sentmap, so
2676 * if the bitmap was resized it will be NULL at this
2679 error_report("migration ram resized during precopy phase");
2683 /* Deal with TPS != HPS and huge pages */
2684 ret = postcopy_chunk_hostpages(ms, block);
2691 * Update the unsentmap to be unsentmap = unsentmap | dirty
2693 bitmap_or(unsentmap, unsentmap, bitmap, pages);
2694 #ifdef DEBUG_POSTCOPY
2695 ram_debug_dump_bitmap(unsentmap, true, pages);
2698 trace_ram_postcopy_send_discard_bitmap();
2700 ret = postcopy_each_ram_send_discard(ms);
2707 * ram_discard_range: discard dirtied pages at the beginning of postcopy
2709 * Returns zero on success
2711 * @rbname: name of the RAMBlock of the request. NULL means the
2712 * same that last one.
2713 * @start: RAMBlock starting page
2714 * @length: RAMBlock size
2716 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
2720 trace_ram_discard_range(rbname, start, length);
2723 RAMBlock *rb = qemu_ram_block_by_name(rbname);
2726 error_report("ram_discard_range: Failed to find block '%s'", rbname);
2730 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2731 length >> qemu_target_page_bits());
2732 ret = ram_block_discard_range(rb, start, length);
2741 * For every allocation, we will try not to crash the VM if the
2742 * allocation failed.
2744 static int xbzrle_init(void)
2746 Error *local_err = NULL;
2748 if (!migrate_use_xbzrle()) {
2752 XBZRLE_cache_lock();
2754 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2755 if (!XBZRLE.zero_target_page) {
2756 error_report("%s: Error allocating zero page", __func__);
2760 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2761 TARGET_PAGE_SIZE, &local_err);
2762 if (!XBZRLE.cache) {
2763 error_report_err(local_err);
2764 goto free_zero_page;
2767 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2768 if (!XBZRLE.encoded_buf) {
2769 error_report("%s: Error allocating encoded_buf", __func__);
2773 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2774 if (!XBZRLE.current_buf) {
2775 error_report("%s: Error allocating current_buf", __func__);
2776 goto free_encoded_buf;
2779 /* We are all good */
2780 XBZRLE_cache_unlock();
2784 g_free(XBZRLE.encoded_buf);
2785 XBZRLE.encoded_buf = NULL;
2787 cache_fini(XBZRLE.cache);
2788 XBZRLE.cache = NULL;
2790 g_free(XBZRLE.zero_target_page);
2791 XBZRLE.zero_target_page = NULL;
2793 XBZRLE_cache_unlock();
2797 static int ram_state_init(RAMState **rsp)
2799 *rsp = g_try_new0(RAMState, 1);
2802 error_report("%s: Init ramstate fail", __func__);
2806 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2807 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2808 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
2811 * Count the total number of pages used by ram blocks not including any
2812 * gaps due to alignment or unplugs.
2814 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2816 ram_state_reset(*rsp);
2821 static void ram_list_init_bitmaps(void)
2824 unsigned long pages;
2826 /* Skip setting bitmap if there is no RAM */
2827 if (ram_bytes_total()) {
2828 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2829 pages = block->max_length >> TARGET_PAGE_BITS;
2830 block->bmap = bitmap_new(pages);
2831 bitmap_set(block->bmap, 0, pages);
2832 if (migrate_postcopy_ram()) {
2833 block->unsentmap = bitmap_new(pages);
2834 bitmap_set(block->unsentmap, 0, pages);
2840 static void ram_init_bitmaps(RAMState *rs)
2842 /* For memory_global_dirty_log_start below. */
2843 qemu_mutex_lock_iothread();
2844 qemu_mutex_lock_ramlist();
2847 ram_list_init_bitmaps();
2848 memory_global_dirty_log_start();
2849 migration_bitmap_sync(rs);
2852 qemu_mutex_unlock_ramlist();
2853 qemu_mutex_unlock_iothread();
2856 static int ram_init_all(RAMState **rsp)
2858 if (ram_state_init(rsp)) {
2862 if (xbzrle_init()) {
2863 ram_state_cleanup(rsp);
2867 ram_init_bitmaps(*rsp);
2872 static void ram_state_resume_prepare(RAMState *rs, QEMUFile *out)
2878 * Postcopy is not using xbzrle/compression, so no need for that.
2879 * Also, since source are already halted, we don't need to care
2880 * about dirty page logging as well.
2883 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2884 pages += bitmap_count_one(block->bmap,
2885 block->used_length >> TARGET_PAGE_BITS);
2888 /* This may not be aligned with current bitmaps. Recalculate. */
2889 rs->migration_dirty_pages = pages;
2891 rs->last_seen_block = NULL;
2892 rs->last_sent_block = NULL;
2894 rs->last_version = ram_list.version;
2896 * Disable the bulk stage, otherwise we'll resend the whole RAM no
2897 * matter what we have sent.
2899 rs->ram_bulk_stage = false;
2901 /* Update RAMState cache of output QEMUFile */
2904 trace_ram_state_resume_prepare(pages);
2908 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2909 * long-running RCU critical section. When rcu-reclaims in the code
2910 * start to become numerous it will be necessary to reduce the
2911 * granularity of these critical sections.
2915 * ram_save_setup: Setup RAM for migration
2917 * Returns zero to indicate success and negative for error
2919 * @f: QEMUFile where to send the data
2920 * @opaque: RAMState pointer
2922 static int ram_save_setup(QEMUFile *f, void *opaque)
2924 RAMState **rsp = opaque;
2927 if (compress_threads_save_setup()) {
2931 /* migration has already setup the bitmap, reuse it. */
2932 if (!migration_in_colo_state()) {
2933 if (ram_init_all(rsp) != 0) {
2934 compress_threads_save_cleanup();
2942 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2944 RAMBLOCK_FOREACH_MIGRATABLE(block) {
2945 qemu_put_byte(f, strlen(block->idstr));
2946 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2947 qemu_put_be64(f, block->used_length);
2948 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2949 qemu_put_be64(f, block->page_size);
2955 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2956 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2958 multifd_send_sync_main();
2959 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2965 * ram_save_iterate: iterative stage for migration
2967 * Returns zero to indicate success and negative for error
2969 * @f: QEMUFile where to send the data
2970 * @opaque: RAMState pointer
2972 static int ram_save_iterate(QEMUFile *f, void *opaque)
2974 RAMState **temp = opaque;
2975 RAMState *rs = *temp;
2981 if (blk_mig_bulk_active()) {
2982 /* Avoid transferring ram during bulk phase of block migration as
2983 * the bulk phase will usually take a long time and transferring
2984 * ram updates during that time is pointless. */
2989 if (ram_list.version != rs->last_version) {
2990 ram_state_reset(rs);
2993 /* Read version before ram_list.blocks */
2996 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2998 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
3000 while ((ret = qemu_file_rate_limit(f)) == 0 ||
3001 !QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
3004 if (qemu_file_get_error(f)) {
3008 pages = ram_find_and_save_block(rs, false);
3009 /* no more pages to sent */
3016 /* we want to check in the 1st loop, just in case it was the 1st time
3017 and we had to sync the dirty bitmap.
3018 qemu_get_clock_ns() is a bit expensive, so we only check each some
3021 if ((i & 63) == 0) {
3022 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
3023 if (t1 > MAX_WAIT) {
3024 trace_ram_save_iterate_big_wait(t1, i);
3030 flush_compressed_data(rs);
3034 * Must occur before EOS (or any QEMUFile operation)
3035 * because of RDMA protocol.
3037 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
3039 multifd_send_sync_main();
3041 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3042 ram_counters.transferred += 8;
3044 ret = qemu_file_get_error(f);
3053 * ram_save_complete: function called to send the remaining amount of ram
3055 * Returns zero to indicate success
3057 * Called with iothread lock
3059 * @f: QEMUFile where to send the data
3060 * @opaque: RAMState pointer
3062 static int ram_save_complete(QEMUFile *f, void *opaque)
3064 RAMState **temp = opaque;
3065 RAMState *rs = *temp;
3069 if (!migration_in_postcopy()) {
3070 migration_bitmap_sync(rs);
3073 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
3075 /* try transferring iterative blocks of memory */
3077 /* flush all remaining blocks regardless of rate limiting */
3081 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
3082 /* no more blocks to sent */
3088 flush_compressed_data(rs);
3089 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
3093 multifd_send_sync_main();
3094 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
3099 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
3100 uint64_t *res_precopy_only,
3101 uint64_t *res_compatible,
3102 uint64_t *res_postcopy_only)
3104 RAMState **temp = opaque;
3105 RAMState *rs = *temp;
3106 uint64_t remaining_size;
3108 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3110 if (!migration_in_postcopy() &&
3111 remaining_size < max_size) {
3112 qemu_mutex_lock_iothread();
3114 migration_bitmap_sync(rs);
3116 qemu_mutex_unlock_iothread();
3117 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
3120 if (migrate_postcopy_ram()) {
3121 /* We can do postcopy, and all the data is postcopiable */
3122 *res_compatible += remaining_size;
3124 *res_precopy_only += remaining_size;
3128 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
3130 unsigned int xh_len;
3132 uint8_t *loaded_data;
3134 /* extract RLE header */
3135 xh_flags = qemu_get_byte(f);
3136 xh_len = qemu_get_be16(f);
3138 if (xh_flags != ENCODING_FLAG_XBZRLE) {
3139 error_report("Failed to load XBZRLE page - wrong compression!");
3143 if (xh_len > TARGET_PAGE_SIZE) {
3144 error_report("Failed to load XBZRLE page - len overflow!");
3147 loaded_data = XBZRLE.decoded_buf;
3148 /* load data and decode */
3149 /* it can change loaded_data to point to an internal buffer */
3150 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
3153 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
3154 TARGET_PAGE_SIZE) == -1) {
3155 error_report("Failed to load XBZRLE page - decode error!");
3163 * ram_block_from_stream: read a RAMBlock id from the migration stream
3165 * Must be called from within a rcu critical section.
3167 * Returns a pointer from within the RCU-protected ram_list.
3169 * @f: QEMUFile where to read the data from
3170 * @flags: Page flags (mostly to see if it's a continuation of previous block)
3172 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
3174 static RAMBlock *block = NULL;
3178 if (flags & RAM_SAVE_FLAG_CONTINUE) {
3180 error_report("Ack, bad migration stream!");
3186 len = qemu_get_byte(f);
3187 qemu_get_buffer(f, (uint8_t *)id, len);
3190 block = qemu_ram_block_by_name(id);
3192 error_report("Can't find block %s", id);
3196 if (!qemu_ram_is_migratable(block)) {
3197 error_report("block %s should not be migrated !", id);
3204 static inline void *host_from_ram_block_offset(RAMBlock *block,
3207 if (!offset_in_ramblock(block, offset)) {
3211 return block->host + offset;
3215 * ram_handle_compressed: handle the zero page case
3217 * If a page (or a whole RDMA chunk) has been
3218 * determined to be zero, then zap it.
3220 * @host: host address for the zero page
3221 * @ch: what the page is filled from. We only support zero
3222 * @size: size of the zero page
3224 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
3226 if (ch != 0 || !is_zero_range(host, size)) {
3227 memset(host, ch, size);
3231 /* return the size after decompression, or negative value on error */
3233 qemu_uncompress_data(z_stream *stream, uint8_t *dest, size_t dest_len,
3234 const uint8_t *source, size_t source_len)
3238 err = inflateReset(stream);
3243 stream->avail_in = source_len;
3244 stream->next_in = (uint8_t *)source;
3245 stream->avail_out = dest_len;
3246 stream->next_out = dest;
3248 err = inflate(stream, Z_NO_FLUSH);
3249 if (err != Z_STREAM_END) {
3253 return stream->total_out;
3256 static void *do_data_decompress(void *opaque)
3258 DecompressParam *param = opaque;
3259 unsigned long pagesize;
3263 qemu_mutex_lock(¶m->mutex);
3264 while (!param->quit) {
3269 qemu_mutex_unlock(¶m->mutex);
3271 pagesize = TARGET_PAGE_SIZE;
3273 ret = qemu_uncompress_data(¶m->stream, des, pagesize,
3274 param->compbuf, len);
3275 if (ret < 0 && migrate_get_current()->decompress_error_check) {
3276 error_report("decompress data failed");
3277 qemu_file_set_error(decomp_file, ret);
3280 qemu_mutex_lock(&decomp_done_lock);
3282 qemu_cond_signal(&decomp_done_cond);
3283 qemu_mutex_unlock(&decomp_done_lock);
3285 qemu_mutex_lock(¶m->mutex);
3287 qemu_cond_wait(¶m->cond, ¶m->mutex);
3290 qemu_mutex_unlock(¶m->mutex);
3295 static int wait_for_decompress_done(void)
3297 int idx, thread_count;
3299 if (!migrate_use_compression()) {
3303 thread_count = migrate_decompress_threads();
3304 qemu_mutex_lock(&decomp_done_lock);
3305 for (idx = 0; idx < thread_count; idx++) {
3306 while (!decomp_param[idx].done) {
3307 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3310 qemu_mutex_unlock(&decomp_done_lock);
3311 return qemu_file_get_error(decomp_file);
3314 static void compress_threads_load_cleanup(void)
3316 int i, thread_count;
3318 if (!migrate_use_compression()) {
3321 thread_count = migrate_decompress_threads();
3322 for (i = 0; i < thread_count; i++) {
3324 * we use it as a indicator which shows if the thread is
3325 * properly init'd or not
3327 if (!decomp_param[i].compbuf) {
3331 qemu_mutex_lock(&decomp_param[i].mutex);
3332 decomp_param[i].quit = true;
3333 qemu_cond_signal(&decomp_param[i].cond);
3334 qemu_mutex_unlock(&decomp_param[i].mutex);
3336 for (i = 0; i < thread_count; i++) {
3337 if (!decomp_param[i].compbuf) {
3341 qemu_thread_join(decompress_threads + i);
3342 qemu_mutex_destroy(&decomp_param[i].mutex);
3343 qemu_cond_destroy(&decomp_param[i].cond);
3344 inflateEnd(&decomp_param[i].stream);
3345 g_free(decomp_param[i].compbuf);
3346 decomp_param[i].compbuf = NULL;
3348 g_free(decompress_threads);
3349 g_free(decomp_param);
3350 decompress_threads = NULL;
3351 decomp_param = NULL;
3355 static int compress_threads_load_setup(QEMUFile *f)
3357 int i, thread_count;
3359 if (!migrate_use_compression()) {
3363 thread_count = migrate_decompress_threads();
3364 decompress_threads = g_new0(QemuThread, thread_count);
3365 decomp_param = g_new0(DecompressParam, thread_count);
3366 qemu_mutex_init(&decomp_done_lock);
3367 qemu_cond_init(&decomp_done_cond);
3369 for (i = 0; i < thread_count; i++) {
3370 if (inflateInit(&decomp_param[i].stream) != Z_OK) {
3374 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
3375 qemu_mutex_init(&decomp_param[i].mutex);
3376 qemu_cond_init(&decomp_param[i].cond);
3377 decomp_param[i].done = true;
3378 decomp_param[i].quit = false;
3379 qemu_thread_create(decompress_threads + i, "decompress",
3380 do_data_decompress, decomp_param + i,
3381 QEMU_THREAD_JOINABLE);
3385 compress_threads_load_cleanup();
3389 static void decompress_data_with_multi_threads(QEMUFile *f,
3390 void *host, int len)
3392 int idx, thread_count;
3394 thread_count = migrate_decompress_threads();
3395 qemu_mutex_lock(&decomp_done_lock);
3397 for (idx = 0; idx < thread_count; idx++) {
3398 if (decomp_param[idx].done) {
3399 decomp_param[idx].done = false;
3400 qemu_mutex_lock(&decomp_param[idx].mutex);
3401 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
3402 decomp_param[idx].des = host;
3403 decomp_param[idx].len = len;
3404 qemu_cond_signal(&decomp_param[idx].cond);
3405 qemu_mutex_unlock(&decomp_param[idx].mutex);
3409 if (idx < thread_count) {
3412 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
3415 qemu_mutex_unlock(&decomp_done_lock);
3419 * ram_load_setup: Setup RAM for migration incoming side
3421 * Returns zero to indicate success and negative for error
3423 * @f: QEMUFile where to receive the data
3424 * @opaque: RAMState pointer
3426 static int ram_load_setup(QEMUFile *f, void *opaque)
3428 if (compress_threads_load_setup(f)) {
3432 xbzrle_load_setup();
3433 ramblock_recv_map_init();
3437 static int ram_load_cleanup(void *opaque)
3440 xbzrle_load_cleanup();
3441 compress_threads_load_cleanup();
3443 RAMBLOCK_FOREACH_MIGRATABLE(rb) {
3444 g_free(rb->receivedmap);
3445 rb->receivedmap = NULL;
3451 * ram_postcopy_incoming_init: allocate postcopy data structures
3453 * Returns 0 for success and negative if there was one error
3455 * @mis: current migration incoming state
3457 * Allocate data structures etc needed by incoming migration with
3458 * postcopy-ram. postcopy-ram's similarly names
3459 * postcopy_ram_incoming_init does the work.
3461 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
3463 unsigned long ram_pages = last_ram_page();
3465 return postcopy_ram_incoming_init(mis, ram_pages);
3469 * ram_load_postcopy: load a page in postcopy case
3471 * Returns 0 for success or -errno in case of error
3473 * Called in postcopy mode by ram_load().
3474 * rcu_read_lock is taken prior to this being called.
3476 * @f: QEMUFile where to send the data
3478 static int ram_load_postcopy(QEMUFile *f)
3480 int flags = 0, ret = 0;
3481 bool place_needed = false;
3482 bool matching_page_sizes = false;
3483 MigrationIncomingState *mis = migration_incoming_get_current();
3484 /* Temporary page that is later 'placed' */
3485 void *postcopy_host_page = postcopy_get_tmp_page(mis);
3486 void *last_host = NULL;
3487 bool all_zero = false;
3489 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3492 void *page_buffer = NULL;
3493 void *place_source = NULL;
3494 RAMBlock *block = NULL;
3497 addr = qemu_get_be64(f);
3500 * If qemu file error, we should stop here, and then "addr"
3503 ret = qemu_file_get_error(f);
3508 flags = addr & ~TARGET_PAGE_MASK;
3509 addr &= TARGET_PAGE_MASK;
3511 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
3512 place_needed = false;
3513 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
3514 block = ram_block_from_stream(f, flags);
3516 host = host_from_ram_block_offset(block, addr);
3518 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3522 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
3524 * Postcopy requires that we place whole host pages atomically;
3525 * these may be huge pages for RAMBlocks that are backed by
3527 * To make it atomic, the data is read into a temporary page
3528 * that's moved into place later.
3529 * The migration protocol uses, possibly smaller, target-pages
3530 * however the source ensures it always sends all the components
3531 * of a host page in order.
3533 page_buffer = postcopy_host_page +
3534 ((uintptr_t)host & (block->page_size - 1));
3535 /* If all TP are zero then we can optimise the place */
3536 if (!((uintptr_t)host & (block->page_size - 1))) {
3539 /* not the 1st TP within the HP */
3540 if (host != (last_host + TARGET_PAGE_SIZE)) {
3541 error_report("Non-sequential target page %p/%p",
3550 * If it's the last part of a host page then we place the host
3553 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
3554 (block->page_size - 1)) == 0;
3555 place_source = postcopy_host_page;
3559 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3560 case RAM_SAVE_FLAG_ZERO:
3561 ch = qemu_get_byte(f);
3562 memset(page_buffer, ch, TARGET_PAGE_SIZE);
3568 case RAM_SAVE_FLAG_PAGE:
3570 if (!place_needed || !matching_page_sizes) {
3571 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
3573 /* Avoids the qemu_file copy during postcopy, which is
3574 * going to do a copy later; can only do it when we
3575 * do this read in one go (matching page sizes)
3577 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
3581 case RAM_SAVE_FLAG_EOS:
3583 multifd_recv_sync_main();
3586 error_report("Unknown combination of migration flags: %#x"
3587 " (postcopy mode)", flags);
3592 /* Detect for any possible file errors */
3593 if (!ret && qemu_file_get_error(f)) {
3594 ret = qemu_file_get_error(f);
3597 if (!ret && place_needed) {
3598 /* This gets called at the last target page in the host page */
3599 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
3602 ret = postcopy_place_page_zero(mis, place_dest,
3605 ret = postcopy_place_page(mis, place_dest,
3606 place_source, block);
3614 static bool postcopy_is_advised(void)
3616 PostcopyState ps = postcopy_state_get();
3617 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
3620 static bool postcopy_is_running(void)
3622 PostcopyState ps = postcopy_state_get();
3623 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
3626 static int ram_load(QEMUFile *f, void *opaque, int version_id)
3628 int flags = 0, ret = 0, invalid_flags = 0;
3629 static uint64_t seq_iter;
3632 * If system is running in postcopy mode, page inserts to host memory must
3635 bool postcopy_running = postcopy_is_running();
3636 /* ADVISE is earlier, it shows the source has the postcopy capability on */
3637 bool postcopy_advised = postcopy_is_advised();
3641 if (version_id != 4) {
3645 if (!migrate_use_compression()) {
3646 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
3648 /* This RCU critical section can be very long running.
3649 * When RCU reclaims in the code start to become numerous,
3650 * it will be necessary to reduce the granularity of this
3655 if (postcopy_running) {
3656 ret = ram_load_postcopy(f);
3659 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
3660 ram_addr_t addr, total_ram_bytes;
3664 addr = qemu_get_be64(f);
3665 flags = addr & ~TARGET_PAGE_MASK;
3666 addr &= TARGET_PAGE_MASK;
3668 if (flags & invalid_flags) {
3669 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
3670 error_report("Received an unexpected compressed page");
3677 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
3678 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
3679 RAMBlock *block = ram_block_from_stream(f, flags);
3681 host = host_from_ram_block_offset(block, addr);
3683 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
3687 ramblock_recv_bitmap_set(block, host);
3688 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
3691 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
3692 case RAM_SAVE_FLAG_MEM_SIZE:
3693 /* Synchronize RAM block list */
3694 total_ram_bytes = addr;
3695 while (!ret && total_ram_bytes) {
3700 len = qemu_get_byte(f);
3701 qemu_get_buffer(f, (uint8_t *)id, len);
3703 length = qemu_get_be64(f);
3705 block = qemu_ram_block_by_name(id);
3706 if (block && !qemu_ram_is_migratable(block)) {
3707 error_report("block %s should not be migrated !", id);
3710 if (length != block->used_length) {
3711 Error *local_err = NULL;
3713 ret = qemu_ram_resize(block, length,
3716 error_report_err(local_err);
3719 /* For postcopy we need to check hugepage sizes match */
3720 if (postcopy_advised &&
3721 block->page_size != qemu_host_page_size) {
3722 uint64_t remote_page_size = qemu_get_be64(f);
3723 if (remote_page_size != block->page_size) {
3724 error_report("Mismatched RAM page size %s "
3725 "(local) %zd != %" PRId64,
3726 id, block->page_size,
3731 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
3734 error_report("Unknown ramblock \"%s\", cannot "
3735 "accept migration", id);
3739 total_ram_bytes -= length;
3743 case RAM_SAVE_FLAG_ZERO:
3744 ch = qemu_get_byte(f);
3745 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
3748 case RAM_SAVE_FLAG_PAGE:
3749 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
3752 case RAM_SAVE_FLAG_COMPRESS_PAGE:
3753 len = qemu_get_be32(f);
3754 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
3755 error_report("Invalid compressed data length: %d", len);
3759 decompress_data_with_multi_threads(f, host, len);
3762 case RAM_SAVE_FLAG_XBZRLE:
3763 if (load_xbzrle(f, addr, host) < 0) {
3764 error_report("Failed to decompress XBZRLE page at "
3765 RAM_ADDR_FMT, addr);
3770 case RAM_SAVE_FLAG_EOS:
3772 multifd_recv_sync_main();
3775 if (flags & RAM_SAVE_FLAG_HOOK) {
3776 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
3778 error_report("Unknown combination of migration flags: %#x",
3784 ret = qemu_file_get_error(f);
3788 ret |= wait_for_decompress_done();
3790 trace_ram_load_complete(ret, seq_iter);
3794 static bool ram_has_postcopy(void *opaque)
3796 return migrate_postcopy_ram();
3799 /* Sync all the dirty bitmap with destination VM. */
3800 static int ram_dirty_bitmap_sync_all(MigrationState *s, RAMState *rs)
3803 QEMUFile *file = s->to_dst_file;
3804 int ramblock_count = 0;
3806 trace_ram_dirty_bitmap_sync_start();
3808 RAMBLOCK_FOREACH_MIGRATABLE(block) {
3809 qemu_savevm_send_recv_bitmap(file, block->idstr);
3810 trace_ram_dirty_bitmap_request(block->idstr);
3814 trace_ram_dirty_bitmap_sync_wait();
3816 /* Wait until all the ramblocks' dirty bitmap synced */
3817 while (ramblock_count--) {
3818 qemu_sem_wait(&s->rp_state.rp_sem);
3821 trace_ram_dirty_bitmap_sync_complete();
3826 static void ram_dirty_bitmap_reload_notify(MigrationState *s)
3828 qemu_sem_post(&s->rp_state.rp_sem);
3832 * Read the received bitmap, revert it as the initial dirty bitmap.
3833 * This is only used when the postcopy migration is paused but wants
3834 * to resume from a middle point.
3836 int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block)
3839 QEMUFile *file = s->rp_state.from_dst_file;
3840 unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS;
3841 uint64_t local_size = nbits / 8;
3842 uint64_t size, end_mark;
3844 trace_ram_dirty_bitmap_reload_begin(block->idstr);
3846 if (s->state != MIGRATION_STATUS_POSTCOPY_RECOVER) {
3847 error_report("%s: incorrect state %s", __func__,
3848 MigrationStatus_str(s->state));
3853 * Note: see comments in ramblock_recv_bitmap_send() on why we
3854 * need the endianess convertion, and the paddings.
3856 local_size = ROUND_UP(local_size, 8);
3859 le_bitmap = bitmap_new(nbits + BITS_PER_LONG);
3861 size = qemu_get_be64(file);
3863 /* The size of the bitmap should match with our ramblock */
3864 if (size != local_size) {
3865 error_report("%s: ramblock '%s' bitmap size mismatch "
3866 "(0x%"PRIx64" != 0x%"PRIx64")", __func__,
3867 block->idstr, size, local_size);
3872 size = qemu_get_buffer(file, (uint8_t *)le_bitmap, local_size);
3873 end_mark = qemu_get_be64(file);
3875 ret = qemu_file_get_error(file);
3876 if (ret || size != local_size) {
3877 error_report("%s: read bitmap failed for ramblock '%s': %d"
3878 " (size 0x%"PRIx64", got: 0x%"PRIx64")",
3879 __func__, block->idstr, ret, local_size, size);
3884 if (end_mark != RAMBLOCK_RECV_BITMAP_ENDING) {
3885 error_report("%s: ramblock '%s' end mark incorrect: 0x%"PRIu64,
3886 __func__, block->idstr, end_mark);
3892 * Endianess convertion. We are during postcopy (though paused).
3893 * The dirty bitmap won't change. We can directly modify it.
3895 bitmap_from_le(block->bmap, le_bitmap, nbits);
3898 * What we received is "received bitmap". Revert it as the initial
3899 * dirty bitmap for this ramblock.
3901 bitmap_complement(block->bmap, block->bmap, nbits);
3903 trace_ram_dirty_bitmap_reload_complete(block->idstr);
3906 * We succeeded to sync bitmap for current ramblock. If this is
3907 * the last one to sync, we need to notify the main send thread.
3909 ram_dirty_bitmap_reload_notify(s);
3917 static int ram_resume_prepare(MigrationState *s, void *opaque)
3919 RAMState *rs = *(RAMState **)opaque;
3922 ret = ram_dirty_bitmap_sync_all(s, rs);
3927 ram_state_resume_prepare(rs, s->to_dst_file);
3932 static SaveVMHandlers savevm_ram_handlers = {
3933 .save_setup = ram_save_setup,
3934 .save_live_iterate = ram_save_iterate,
3935 .save_live_complete_postcopy = ram_save_complete,
3936 .save_live_complete_precopy = ram_save_complete,
3937 .has_postcopy = ram_has_postcopy,
3938 .save_live_pending = ram_save_pending,
3939 .load_state = ram_load,
3940 .save_cleanup = ram_save_cleanup,
3941 .load_setup = ram_load_setup,
3942 .load_cleanup = ram_load_cleanup,
3943 .resume_prepare = ram_resume_prepare,
3946 void ram_mig_init(void)
3948 qemu_mutex_init(&XBZRLE.lock);
3949 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);