4 * Copyright (c) 2003-2008 Fabrice Bellard
5 * Copyright (c) 2011-2015 Red Hat Inc
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 #include "qemu/osdep.h"
29 #include "qemu-common.h"
32 #include "qapi-event.h"
33 #include "qemu/cutils.h"
34 #include "qemu/bitops.h"
35 #include "qemu/bitmap.h"
36 #include "qemu/timer.h"
37 #include "qemu/main-loop.h"
38 #include "migration/migration.h"
39 #include "migration/postcopy-ram.h"
40 #include "exec/address-spaces.h"
41 #include "migration/page_cache.h"
42 #include "qemu/error-report.h"
44 #include "exec/ram_addr.h"
45 #include "qemu/rcu_queue.h"
46 #include "migration/colo.h"
48 /***********************************************************/
49 /* ram save/restore */
51 #define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
52 #define RAM_SAVE_FLAG_COMPRESS 0x02
53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
54 #define RAM_SAVE_FLAG_PAGE 0x08
55 #define RAM_SAVE_FLAG_EOS 0x10
56 #define RAM_SAVE_FLAG_CONTINUE 0x20
57 #define RAM_SAVE_FLAG_XBZRLE 0x40
58 /* 0x80 is reserved in migration.h start with 0x100 next */
59 #define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
61 static uint8_t *ZERO_TARGET_PAGE;
63 static inline bool is_zero_range(uint8_t *p, uint64_t size)
65 return buffer_is_zero(p, size);
68 /* struct contains XBZRLE cache and a static page
69 used by the compression */
71 /* buffer used for XBZRLE encoding */
73 /* buffer for storing page content */
75 /* Cache for XBZRLE, Protected by lock. */
80 /* buffer used for XBZRLE decoding */
81 static uint8_t *xbzrle_decoded_buf;
83 static void XBZRLE_cache_lock(void)
85 if (migrate_use_xbzrle())
86 qemu_mutex_lock(&XBZRLE.lock);
89 static void XBZRLE_cache_unlock(void)
91 if (migrate_use_xbzrle())
92 qemu_mutex_unlock(&XBZRLE.lock);
96 * xbzrle_cache_resize: resize the xbzrle cache
98 * This function is called from qmp_migrate_set_cache_size in main
99 * thread, possibly while a migration is in progress. A running
100 * migration may be using the cache and might finish during this call,
101 * hence changes to the cache are protected by XBZRLE.lock().
103 * Returns the new_size or negative in case of error.
105 * @new_size: new cache size
107 int64_t xbzrle_cache_resize(int64_t new_size)
109 PageCache *new_cache;
112 if (new_size < TARGET_PAGE_SIZE) {
118 if (XBZRLE.cache != NULL) {
119 if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
122 new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
125 error_report("Error creating cache");
130 cache_fini(XBZRLE.cache);
131 XBZRLE.cache = new_cache;
135 ret = pow2floor(new_size);
137 XBZRLE_cache_unlock();
141 /* State of RAM for migration */
143 /* Last block that we have visited searching for dirty pages */
144 RAMBlock *last_seen_block;
145 /* Last block from where we have sent data */
146 RAMBlock *last_sent_block;
147 /* Last offset we have sent data from */
148 ram_addr_t last_offset;
149 /* last ram version we have seen */
150 uint32_t last_version;
151 /* We are in the first round */
153 /* How many times we have dirty too many pages */
154 int dirty_rate_high_cnt;
155 /* How many times we have synchronized the bitmap */
156 uint64_t bitmap_sync_count;
157 /* these variables are used for bitmap sync */
158 /* last time we did a full bitmap_sync */
159 int64_t time_last_bitmap_sync;
160 /* bytes transferred at start_time */
161 uint64_t bytes_xfer_prev;
162 /* number of dirty pages since start_time */
163 uint64_t num_dirty_pages_period;
164 /* xbzrle misses since the beginning of the period */
165 uint64_t xbzrle_cache_miss_prev;
166 /* number of iterations at the beginning of period */
167 uint64_t iterations_prev;
168 /* Accounting fields */
169 /* number of zero pages. It used to be pages filled by the same char. */
171 /* number of normal transferred pages */
173 /* Iterations since start */
175 /* xbzrle transmitted bytes. Notice that this is with
176 * compression, they can't be calculated from the pages */
177 uint64_t xbzrle_bytes;
178 /* xbzrle transmmited pages */
179 uint64_t xbzrle_pages;
180 /* xbzrle number of cache miss */
181 uint64_t xbzrle_cache_miss;
182 /* xbzrle miss rate */
183 double xbzrle_cache_miss_rate;
185 typedef struct RAMState RAMState;
187 static RAMState ram_state;
189 /* accounting for migration statistics */
190 typedef struct AccountingInfo {
191 uint64_t xbzrle_overflows;
194 static AccountingInfo acct_info;
196 static void acct_clear(void)
198 memset(&acct_info, 0, sizeof(acct_info));
201 uint64_t dup_mig_pages_transferred(void)
203 return ram_state.zero_pages;
206 uint64_t norm_mig_pages_transferred(void)
208 return ram_state.norm_pages;
211 uint64_t xbzrle_mig_bytes_transferred(void)
213 return ram_state.xbzrle_bytes;
216 uint64_t xbzrle_mig_pages_transferred(void)
218 return ram_state.xbzrle_pages;
221 uint64_t xbzrle_mig_pages_cache_miss(void)
223 return ram_state.xbzrle_cache_miss;
226 double xbzrle_mig_cache_miss_rate(void)
228 return ram_state.xbzrle_cache_miss_rate;
231 uint64_t xbzrle_mig_pages_overflow(void)
233 return acct_info.xbzrle_overflows;
236 static QemuMutex migration_bitmap_mutex;
237 static uint64_t migration_dirty_pages;
239 /* used by the search for pages to send */
240 struct PageSearchStatus {
241 /* Current block being searched */
243 /* Current offset to search from */
245 /* Set once we wrap around */
248 typedef struct PageSearchStatus PageSearchStatus;
250 static struct BitmapRcu {
252 /* Main migration bitmap */
254 /* bitmap of pages that haven't been sent even once
255 * only maintained and used in postcopy at the moment
256 * where it's used to send the dirtymap at the start
257 * of the postcopy phase
259 unsigned long *unsentmap;
260 } *migration_bitmap_rcu;
262 struct CompressParam {
271 typedef struct CompressParam CompressParam;
273 struct DecompressParam {
282 typedef struct DecompressParam DecompressParam;
284 static CompressParam *comp_param;
285 static QemuThread *compress_threads;
286 /* comp_done_cond is used to wake up the migration thread when
287 * one of the compression threads has finished the compression.
288 * comp_done_lock is used to co-work with comp_done_cond.
290 static QemuMutex comp_done_lock;
291 static QemuCond comp_done_cond;
292 /* The empty QEMUFileOps will be used by file in CompressParam */
293 static const QEMUFileOps empty_ops = { };
295 static bool compression_switch;
296 static DecompressParam *decomp_param;
297 static QemuThread *decompress_threads;
298 static QemuMutex decomp_done_lock;
299 static QemuCond decomp_done_cond;
301 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
304 static void *do_data_compress(void *opaque)
306 CompressParam *param = opaque;
310 qemu_mutex_lock(¶m->mutex);
311 while (!param->quit) {
313 block = param->block;
314 offset = param->offset;
316 qemu_mutex_unlock(¶m->mutex);
318 do_compress_ram_page(param->file, block, offset);
320 qemu_mutex_lock(&comp_done_lock);
322 qemu_cond_signal(&comp_done_cond);
323 qemu_mutex_unlock(&comp_done_lock);
325 qemu_mutex_lock(¶m->mutex);
327 qemu_cond_wait(¶m->cond, ¶m->mutex);
330 qemu_mutex_unlock(¶m->mutex);
335 static inline void terminate_compression_threads(void)
337 int idx, thread_count;
339 thread_count = migrate_compress_threads();
341 for (idx = 0; idx < thread_count; idx++) {
342 qemu_mutex_lock(&comp_param[idx].mutex);
343 comp_param[idx].quit = true;
344 qemu_cond_signal(&comp_param[idx].cond);
345 qemu_mutex_unlock(&comp_param[idx].mutex);
349 void migrate_compress_threads_join(void)
353 if (!migrate_use_compression()) {
356 terminate_compression_threads();
357 thread_count = migrate_compress_threads();
358 for (i = 0; i < thread_count; i++) {
359 qemu_thread_join(compress_threads + i);
360 qemu_fclose(comp_param[i].file);
361 qemu_mutex_destroy(&comp_param[i].mutex);
362 qemu_cond_destroy(&comp_param[i].cond);
364 qemu_mutex_destroy(&comp_done_lock);
365 qemu_cond_destroy(&comp_done_cond);
366 g_free(compress_threads);
368 compress_threads = NULL;
372 void migrate_compress_threads_create(void)
376 if (!migrate_use_compression()) {
379 compression_switch = true;
380 thread_count = migrate_compress_threads();
381 compress_threads = g_new0(QemuThread, thread_count);
382 comp_param = g_new0(CompressParam, thread_count);
383 qemu_cond_init(&comp_done_cond);
384 qemu_mutex_init(&comp_done_lock);
385 for (i = 0; i < thread_count; i++) {
386 /* comp_param[i].file is just used as a dummy buffer to save data,
387 * set its ops to empty.
389 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
390 comp_param[i].done = true;
391 comp_param[i].quit = false;
392 qemu_mutex_init(&comp_param[i].mutex);
393 qemu_cond_init(&comp_param[i].cond);
394 qemu_thread_create(compress_threads + i, "compress",
395 do_data_compress, comp_param + i,
396 QEMU_THREAD_JOINABLE);
401 * save_page_header: write page header to wire
403 * If this is the 1st block, it also writes the block identification
405 * Returns the number of bytes written
407 * @f: QEMUFile where to send the data
408 * @block: block that contains the page we want to send
409 * @offset: offset inside the block for the page
410 * in the lower bits, it contains flags
412 static size_t save_page_header(QEMUFile *f, RAMBlock *block, ram_addr_t offset)
416 qemu_put_be64(f, offset);
419 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
420 len = strlen(block->idstr);
421 qemu_put_byte(f, len);
422 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
429 * mig_throttle_guest_down: throotle down the guest
431 * Reduce amount of guest cpu execution to hopefully slow down memory
432 * writes. If guest dirty memory rate is reduced below the rate at
433 * which we can transfer pages to the destination then we should be
434 * able to complete migration. Some workloads dirty memory way too
435 * fast and will not effectively converge, even with auto-converge.
437 static void mig_throttle_guest_down(void)
439 MigrationState *s = migrate_get_current();
440 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
441 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
443 /* We have not started throttling yet. Let's start it. */
444 if (!cpu_throttle_active()) {
445 cpu_throttle_set(pct_initial);
447 /* Throttling already on, just increase the rate */
448 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
453 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
455 * @rs: current RAM state
456 * @current_addr: address for the zero page
458 * Update the xbzrle cache to reflect a page that's been sent as all 0.
459 * The important thing is that a stale (not-yet-0'd) page be replaced
461 * As a bonus, if the page wasn't in the cache it gets added so that
462 * when a small write is made into the 0'd page it gets XBZRLE sent.
464 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
466 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
470 /* We don't care if this fails to allocate a new cache page
471 * as long as it updated an old one */
472 cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
473 rs->bitmap_sync_count);
476 #define ENCODING_FLAG_XBZRLE 0x1
479 * save_xbzrle_page: compress and send current page
481 * Returns: 1 means that we wrote the page
482 * 0 means that page is identical to the one already sent
483 * -1 means that xbzrle would be longer than normal
485 * @rs: current RAM state
486 * @f: QEMUFile where to send the data
487 * @current_data: pointer to the address of the page contents
488 * @current_addr: addr of the page
489 * @block: block that contains the page we want to send
490 * @offset: offset inside the block for the page
491 * @last_stage: if we are at the completion stage
492 * @bytes_transferred: increase it with the number of transferred bytes
494 static int save_xbzrle_page(RAMState *rs, QEMUFile *f, uint8_t **current_data,
495 ram_addr_t current_addr, RAMBlock *block,
496 ram_addr_t offset, bool last_stage,
497 uint64_t *bytes_transferred)
499 int encoded_len = 0, bytes_xbzrle;
500 uint8_t *prev_cached_page;
502 if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
503 rs->xbzrle_cache_miss++;
505 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
506 rs->bitmap_sync_count) == -1) {
509 /* update *current_data when the page has been
510 inserted into cache */
511 *current_data = get_cached_data(XBZRLE.cache, current_addr);
517 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
519 /* save current buffer into memory */
520 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
522 /* XBZRLE encoding (if there is no overflow) */
523 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
524 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
526 if (encoded_len == 0) {
527 trace_save_xbzrle_page_skipping();
529 } else if (encoded_len == -1) {
530 trace_save_xbzrle_page_overflow();
531 acct_info.xbzrle_overflows++;
532 /* update data in the cache */
534 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
535 *current_data = prev_cached_page;
540 /* we need to update the data in the cache, in order to get the same data */
542 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
545 /* Send XBZRLE based compressed page */
546 bytes_xbzrle = save_page_header(f, block, offset | RAM_SAVE_FLAG_XBZRLE);
547 qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
548 qemu_put_be16(f, encoded_len);
549 qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
550 bytes_xbzrle += encoded_len + 1 + 2;
552 rs->xbzrle_bytes += bytes_xbzrle;
553 *bytes_transferred += bytes_xbzrle;
559 * migration_bitmap_find_dirty: find the next dirty page from start
561 * Called with rcu_read_lock() to protect migration_bitmap
563 * Returns the byte offset within memory region of the start of a dirty page
565 * @rs: current RAM state
566 * @rb: RAMBlock where to search for dirty pages
567 * @start: starting address (typically so we can continue from previous page)
568 * @ram_addr_abs: pointer into which to store the address of the dirty page
569 * within the global ram_addr space
572 ram_addr_t migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
574 ram_addr_t *ram_addr_abs)
576 unsigned long base = rb->offset >> TARGET_PAGE_BITS;
577 unsigned long nr = base + (start >> TARGET_PAGE_BITS);
578 uint64_t rb_size = rb->used_length;
579 unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
580 unsigned long *bitmap;
584 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
585 if (rs->ram_bulk_stage && nr > base) {
588 next = find_next_bit(bitmap, size, nr);
591 *ram_addr_abs = next << TARGET_PAGE_BITS;
592 return (next - base) << TARGET_PAGE_BITS;
595 static inline bool migration_bitmap_clear_dirty(ram_addr_t addr)
598 int nr = addr >> TARGET_PAGE_BITS;
599 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
601 ret = test_and_clear_bit(nr, bitmap);
604 migration_dirty_pages--;
609 static void migration_bitmap_sync_range(RAMState *rs, ram_addr_t start,
612 unsigned long *bitmap;
613 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
614 migration_dirty_pages += cpu_physical_memory_sync_dirty_bitmap(bitmap,
615 start, length, &rs->num_dirty_pages_period);
618 static void migration_bitmap_sync_init(RAMState *rs)
620 rs->time_last_bitmap_sync = 0;
621 rs->bytes_xfer_prev = 0;
622 rs->num_dirty_pages_period = 0;
623 rs->xbzrle_cache_miss_prev = 0;
624 rs->iterations_prev = 0;
628 * ram_pagesize_summary: calculate all the pagesizes of a VM
630 * Returns a summary bitmap of the page sizes of all RAMBlocks
632 * For VMs with just normal pages this is equivalent to the host page
633 * size. If it's got some huge pages then it's the OR of all the
634 * different page sizes.
636 uint64_t ram_pagesize_summary(void)
639 uint64_t summary = 0;
641 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
642 summary |= block->page_size;
648 static void migration_bitmap_sync(RAMState *rs)
651 MigrationState *s = migrate_get_current();
653 uint64_t bytes_xfer_now;
655 rs->bitmap_sync_count++;
657 if (!rs->bytes_xfer_prev) {
658 rs->bytes_xfer_prev = ram_bytes_transferred();
661 if (!rs->time_last_bitmap_sync) {
662 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
665 trace_migration_bitmap_sync_start();
666 memory_global_dirty_log_sync();
668 qemu_mutex_lock(&migration_bitmap_mutex);
670 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
671 migration_bitmap_sync_range(rs, block->offset, block->used_length);
674 qemu_mutex_unlock(&migration_bitmap_mutex);
676 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
678 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
680 /* more than 1 second = 1000 millisecons */
681 if (end_time > rs->time_last_bitmap_sync + 1000) {
682 if (migrate_auto_converge()) {
683 /* The following detection logic can be refined later. For now:
684 Check to see if the dirtied bytes is 50% more than the approx.
685 amount of bytes that just got transferred since the last time we
686 were in this routine. If that happens twice, start or increase
688 bytes_xfer_now = ram_bytes_transferred();
690 if (s->dirty_pages_rate &&
691 (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
692 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
693 (rs->dirty_rate_high_cnt++ >= 2)) {
694 trace_migration_throttle();
695 rs->dirty_rate_high_cnt = 0;
696 mig_throttle_guest_down();
698 rs->bytes_xfer_prev = bytes_xfer_now;
701 if (migrate_use_xbzrle()) {
702 if (rs->iterations_prev != rs->iterations) {
703 rs->xbzrle_cache_miss_rate =
704 (double)(rs->xbzrle_cache_miss -
705 rs->xbzrle_cache_miss_prev) /
706 (rs->iterations - rs->iterations_prev);
708 rs->iterations_prev = rs->iterations;
709 rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
711 s->dirty_pages_rate = rs->num_dirty_pages_period * 1000
712 / (end_time - rs->time_last_bitmap_sync);
713 s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
714 rs->time_last_bitmap_sync = end_time;
715 rs->num_dirty_pages_period = 0;
717 s->dirty_sync_count = rs->bitmap_sync_count;
718 if (migrate_use_events()) {
719 qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
724 * save_zero_page: send the zero page to the stream
726 * Returns the number of pages written.
728 * @rs: current RAM state
729 * @f: QEMUFile where to send the data
730 * @block: block that contains the page we want to send
731 * @offset: offset inside the block for the page
732 * @p: pointer to the page
733 * @bytes_transferred: increase it with the number of transferred bytes
735 static int save_zero_page(RAMState *rs, QEMUFile *f, RAMBlock *block,
737 uint8_t *p, uint64_t *bytes_transferred)
741 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
743 *bytes_transferred += save_page_header(f, block,
744 offset | RAM_SAVE_FLAG_COMPRESS);
746 *bytes_transferred += 1;
753 static void ram_release_pages(MigrationState *ms, const char *rbname,
754 uint64_t offset, int pages)
756 if (!migrate_release_ram() || !migration_in_postcopy(ms)) {
760 ram_discard_range(NULL, rbname, offset, pages << TARGET_PAGE_BITS);
764 * ram_save_page: send the given page to the stream
766 * Returns the number of pages written.
768 * >=0 - Number of pages written - this might legally be 0
769 * if xbzrle noticed the page was the same.
771 * @rs: current RAM state
772 * @ms: current migration state
773 * @f: QEMUFile where to send the data
774 * @block: block that contains the page we want to send
775 * @offset: offset inside the block for the page
776 * @last_stage: if we are at the completion stage
777 * @bytes_transferred: increase it with the number of transferred bytes
779 static int ram_save_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
780 PageSearchStatus *pss, bool last_stage,
781 uint64_t *bytes_transferred)
785 ram_addr_t current_addr;
788 bool send_async = true;
789 RAMBlock *block = pss->block;
790 ram_addr_t offset = pss->offset;
792 p = block->host + offset;
794 /* In doubt sent page as normal */
796 ret = ram_control_save_page(f, block->offset,
797 offset, TARGET_PAGE_SIZE, &bytes_xmit);
799 *bytes_transferred += bytes_xmit;
805 current_addr = block->offset + offset;
807 if (block == rs->last_sent_block) {
808 offset |= RAM_SAVE_FLAG_CONTINUE;
810 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
811 if (ret != RAM_SAVE_CONTROL_DELAYED) {
812 if (bytes_xmit > 0) {
814 } else if (bytes_xmit == 0) {
819 pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
821 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
822 * page would be stale
824 xbzrle_cache_zero_page(rs, current_addr);
825 ram_release_pages(ms, block->idstr, pss->offset, pages);
826 } else if (!rs->ram_bulk_stage &&
827 !migration_in_postcopy(ms) && migrate_use_xbzrle()) {
828 pages = save_xbzrle_page(rs, f, &p, current_addr, block,
829 offset, last_stage, bytes_transferred);
831 /* Can't send this cached data async, since the cache page
832 * might get updated before it gets to the wire
839 /* XBZRLE overflow or normal page */
841 *bytes_transferred += save_page_header(f, block,
842 offset | RAM_SAVE_FLAG_PAGE);
844 qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE,
845 migrate_release_ram() &
846 migration_in_postcopy(ms));
848 qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
850 *bytes_transferred += TARGET_PAGE_SIZE;
855 XBZRLE_cache_unlock();
860 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
863 int bytes_sent, blen;
864 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
866 bytes_sent = save_page_header(f, block, offset |
867 RAM_SAVE_FLAG_COMPRESS_PAGE);
868 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
869 migrate_compress_level());
872 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
873 error_report("compressed data failed!");
876 ram_release_pages(migrate_get_current(), block->idstr,
877 offset & TARGET_PAGE_MASK, 1);
883 static uint64_t bytes_transferred;
885 static void flush_compressed_data(QEMUFile *f)
887 int idx, len, thread_count;
889 if (!migrate_use_compression()) {
892 thread_count = migrate_compress_threads();
894 qemu_mutex_lock(&comp_done_lock);
895 for (idx = 0; idx < thread_count; idx++) {
896 while (!comp_param[idx].done) {
897 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
900 qemu_mutex_unlock(&comp_done_lock);
902 for (idx = 0; idx < thread_count; idx++) {
903 qemu_mutex_lock(&comp_param[idx].mutex);
904 if (!comp_param[idx].quit) {
905 len = qemu_put_qemu_file(f, comp_param[idx].file);
906 bytes_transferred += len;
908 qemu_mutex_unlock(&comp_param[idx].mutex);
912 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
915 param->block = block;
916 param->offset = offset;
919 static int compress_page_with_multi_thread(RAMState *rs, QEMUFile *f,
920 RAMBlock *block, ram_addr_t offset,
921 uint64_t *bytes_transferred)
923 int idx, thread_count, bytes_xmit = -1, pages = -1;
925 thread_count = migrate_compress_threads();
926 qemu_mutex_lock(&comp_done_lock);
928 for (idx = 0; idx < thread_count; idx++) {
929 if (comp_param[idx].done) {
930 comp_param[idx].done = false;
931 bytes_xmit = qemu_put_qemu_file(f, comp_param[idx].file);
932 qemu_mutex_lock(&comp_param[idx].mutex);
933 set_compress_params(&comp_param[idx], block, offset);
934 qemu_cond_signal(&comp_param[idx].cond);
935 qemu_mutex_unlock(&comp_param[idx].mutex);
938 *bytes_transferred += bytes_xmit;
945 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
948 qemu_mutex_unlock(&comp_done_lock);
954 * ram_save_compressed_page: compress the given page and send it to the stream
956 * Returns the number of pages written.
958 * @rs: current RAM state
959 * @ms: current migration state
960 * @f: QEMUFile where to send the data
961 * @block: block that contains the page we want to send
962 * @offset: offset inside the block for the page
963 * @last_stage: if we are at the completion stage
964 * @bytes_transferred: increase it with the number of transferred bytes
966 static int ram_save_compressed_page(RAMState *rs, MigrationState *ms,
968 PageSearchStatus *pss, bool last_stage,
969 uint64_t *bytes_transferred)
972 uint64_t bytes_xmit = 0;
975 RAMBlock *block = pss->block;
976 ram_addr_t offset = pss->offset;
978 p = block->host + offset;
980 ret = ram_control_save_page(f, block->offset,
981 offset, TARGET_PAGE_SIZE, &bytes_xmit);
983 *bytes_transferred += bytes_xmit;
986 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
987 if (ret != RAM_SAVE_CONTROL_DELAYED) {
988 if (bytes_xmit > 0) {
990 } else if (bytes_xmit == 0) {
995 /* When starting the process of a new block, the first page of
996 * the block should be sent out before other pages in the same
997 * block, and all the pages in last block should have been sent
998 * out, keeping this order is important, because the 'cont' flag
999 * is used to avoid resending the block name.
1001 if (block != rs->last_sent_block) {
1002 flush_compressed_data(f);
1003 pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
1005 /* Make sure the first page is sent out before other pages */
1006 bytes_xmit = save_page_header(f, block, offset |
1007 RAM_SAVE_FLAG_COMPRESS_PAGE);
1008 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
1009 migrate_compress_level());
1011 *bytes_transferred += bytes_xmit + blen;
1015 qemu_file_set_error(f, blen);
1016 error_report("compressed data failed!");
1020 ram_release_pages(ms, block->idstr, pss->offset, pages);
1023 offset |= RAM_SAVE_FLAG_CONTINUE;
1024 pages = save_zero_page(rs, f, block, offset, p, bytes_transferred);
1026 pages = compress_page_with_multi_thread(rs, f, block, offset,
1029 ram_release_pages(ms, block->idstr, pss->offset, pages);
1038 * find_dirty_block: find the next dirty page and update any state
1039 * associated with the search process.
1041 * Returns if a page is found
1043 * @rs: current RAM state
1044 * @f: QEMUFile where to send the data
1045 * @pss: data about the state of the current dirty page scan
1046 * @again: set to false if the search has scanned the whole of RAM
1047 * @ram_addr_abs: pointer into which to store the address of the dirty page
1048 * within the global ram_addr space
1050 static bool find_dirty_block(RAMState *rs, QEMUFile *f, PageSearchStatus *pss,
1051 bool *again, ram_addr_t *ram_addr_abs)
1053 pss->offset = migration_bitmap_find_dirty(rs, pss->block, pss->offset,
1055 if (pss->complete_round && pss->block == rs->last_seen_block &&
1056 pss->offset >= rs->last_offset) {
1058 * We've been once around the RAM and haven't found anything.
1064 if (pss->offset >= pss->block->used_length) {
1065 /* Didn't find anything in this RAM Block */
1067 pss->block = QLIST_NEXT_RCU(pss->block, next);
1069 /* Hit the end of the list */
1070 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1071 /* Flag that we've looped */
1072 pss->complete_round = true;
1073 rs->ram_bulk_stage = false;
1074 if (migrate_use_xbzrle()) {
1075 /* If xbzrle is on, stop using the data compression at this
1076 * point. In theory, xbzrle can do better than compression.
1078 flush_compressed_data(f);
1079 compression_switch = false;
1082 /* Didn't find anything this time, but try again on the new block */
1086 /* Can go around again, but... */
1088 /* We've found something so probably don't need to */
1094 * unqueue_page: gets a page of the queue
1096 * Helper for 'get_queued_page' - gets a page off the queue
1098 * Returns the block of the page (or NULL if none available)
1100 * @ms: current migration state
1101 * @offset: used to return the offset within the RAMBlock
1102 * @ram_addr_abs: pointer into which to store the address of the dirty page
1103 * within the global ram_addr space
1105 static RAMBlock *unqueue_page(MigrationState *ms, ram_addr_t *offset,
1106 ram_addr_t *ram_addr_abs)
1108 RAMBlock *block = NULL;
1110 qemu_mutex_lock(&ms->src_page_req_mutex);
1111 if (!QSIMPLEQ_EMPTY(&ms->src_page_requests)) {
1112 struct MigrationSrcPageRequest *entry =
1113 QSIMPLEQ_FIRST(&ms->src_page_requests);
1115 *offset = entry->offset;
1116 *ram_addr_abs = (entry->offset + entry->rb->offset) &
1119 if (entry->len > TARGET_PAGE_SIZE) {
1120 entry->len -= TARGET_PAGE_SIZE;
1121 entry->offset += TARGET_PAGE_SIZE;
1123 memory_region_unref(block->mr);
1124 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1128 qemu_mutex_unlock(&ms->src_page_req_mutex);
1134 * get_queued_page: unqueue a page from the postocpy requests
1136 * Skips pages that are already sent (!dirty)
1138 * Returns if a queued page is found
1140 * @rs: current RAM state
1141 * @ms: current migration state
1142 * @pss: data about the state of the current dirty page scan
1143 * @ram_addr_abs: pointer into which to store the address of the dirty page
1144 * within the global ram_addr space
1146 static bool get_queued_page(RAMState *rs, MigrationState *ms,
1147 PageSearchStatus *pss,
1148 ram_addr_t *ram_addr_abs)
1155 block = unqueue_page(ms, &offset, ram_addr_abs);
1157 * We're sending this page, and since it's postcopy nothing else
1158 * will dirty it, and we must make sure it doesn't get sent again
1159 * even if this queue request was received after the background
1160 * search already sent it.
1163 unsigned long *bitmap;
1164 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1165 dirty = test_bit(*ram_addr_abs >> TARGET_PAGE_BITS, bitmap);
1167 trace_get_queued_page_not_dirty(
1168 block->idstr, (uint64_t)offset,
1169 (uint64_t)*ram_addr_abs,
1170 test_bit(*ram_addr_abs >> TARGET_PAGE_BITS,
1171 atomic_rcu_read(&migration_bitmap_rcu)->unsentmap));
1173 trace_get_queued_page(block->idstr,
1175 (uint64_t)*ram_addr_abs);
1179 } while (block && !dirty);
1183 * As soon as we start servicing pages out of order, then we have
1184 * to kill the bulk stage, since the bulk stage assumes
1185 * in (migration_bitmap_find_and_reset_dirty) that every page is
1186 * dirty, that's no longer true.
1188 rs->ram_bulk_stage = false;
1191 * We want the background search to continue from the queued page
1192 * since the guest is likely to want other pages near to the page
1193 * it just requested.
1196 pss->offset = offset;
1203 * migration_page_queue_free: drop any remaining pages in the ram
1206 * It should be empty at the end anyway, but in error cases there may
1207 * be some left. in case that there is any page left, we drop it.
1209 * @ms: current migration state
1211 void migration_page_queue_free(MigrationState *ms)
1213 struct MigrationSrcPageRequest *mspr, *next_mspr;
1214 /* This queue generally should be empty - but in the case of a failed
1215 * migration might have some droppings in.
1218 QSIMPLEQ_FOREACH_SAFE(mspr, &ms->src_page_requests, next_req, next_mspr) {
1219 memory_region_unref(mspr->rb->mr);
1220 QSIMPLEQ_REMOVE_HEAD(&ms->src_page_requests, next_req);
1227 * ram_save_queue_pages: queue the page for transmission
1229 * A request from postcopy destination for example.
1231 * Returns zero on success or negative on error
1233 * @ms: current migration state
1234 * @rbname: Name of the RAMBLock of the request. NULL means the
1235 * same that last one.
1236 * @start: starting address from the start of the RAMBlock
1237 * @len: length (in bytes) to send
1239 int ram_save_queue_pages(MigrationState *ms, const char *rbname,
1240 ram_addr_t start, ram_addr_t len)
1244 ms->postcopy_requests++;
1247 /* Reuse last RAMBlock */
1248 ramblock = ms->last_req_rb;
1252 * Shouldn't happen, we can't reuse the last RAMBlock if
1253 * it's the 1st request.
1255 error_report("ram_save_queue_pages no previous block");
1259 ramblock = qemu_ram_block_by_name(rbname);
1262 /* We shouldn't be asked for a non-existent RAMBlock */
1263 error_report("ram_save_queue_pages no block '%s'", rbname);
1266 ms->last_req_rb = ramblock;
1268 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1269 if (start+len > ramblock->used_length) {
1270 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1271 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1272 __func__, start, len, ramblock->used_length);
1276 struct MigrationSrcPageRequest *new_entry =
1277 g_malloc0(sizeof(struct MigrationSrcPageRequest));
1278 new_entry->rb = ramblock;
1279 new_entry->offset = start;
1280 new_entry->len = len;
1282 memory_region_ref(ramblock->mr);
1283 qemu_mutex_lock(&ms->src_page_req_mutex);
1284 QSIMPLEQ_INSERT_TAIL(&ms->src_page_requests, new_entry, next_req);
1285 qemu_mutex_unlock(&ms->src_page_req_mutex);
1296 * ram_save_target_page: save one target page
1298 * Returns the number of pages written
1300 * @rs: current RAM state
1301 * @ms: current migration state
1302 * @f: QEMUFile where to send the data
1303 * @pss: data about the page we want to send
1304 * @last_stage: if we are at the completion stage
1305 * @bytes_transferred: increase it with the number of transferred bytes
1306 * @dirty_ram_abs: address of the start of the dirty page in ram_addr_t space
1308 static int ram_save_target_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
1309 PageSearchStatus *pss,
1311 uint64_t *bytes_transferred,
1312 ram_addr_t dirty_ram_abs)
1316 /* Check the pages is dirty and if it is send it */
1317 if (migration_bitmap_clear_dirty(dirty_ram_abs)) {
1318 unsigned long *unsentmap;
1319 if (compression_switch && migrate_use_compression()) {
1320 res = ram_save_compressed_page(rs, ms, f, pss,
1324 res = ram_save_page(rs, ms, f, pss, last_stage,
1331 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1333 clear_bit(dirty_ram_abs >> TARGET_PAGE_BITS, unsentmap);
1335 /* Only update last_sent_block if a block was actually sent; xbzrle
1336 * might have decided the page was identical so didn't bother writing
1340 rs->last_sent_block = pss->block;
1348 * ram_save_host_page: save a whole host page
1350 * Starting at *offset send pages up to the end of the current host
1351 * page. It's valid for the initial offset to point into the middle of
1352 * a host page in which case the remainder of the hostpage is sent.
1353 * Only dirty target pages are sent. Note that the host page size may
1354 * be a huge page for this block.
1356 * Returns the number of pages written or negative on error
1358 * @rs: current RAM state
1359 * @ms: current migration state
1360 * @f: QEMUFile where to send the data
1361 * @pss: data about the page we want to send
1362 * @last_stage: if we are at the completion stage
1363 * @bytes_transferred: increase it with the number of transferred bytes
1364 * @dirty_ram_abs: Address of the start of the dirty page in ram_addr_t space
1366 static int ram_save_host_page(RAMState *rs, MigrationState *ms, QEMUFile *f,
1367 PageSearchStatus *pss,
1369 uint64_t *bytes_transferred,
1370 ram_addr_t dirty_ram_abs)
1372 int tmppages, pages = 0;
1373 size_t pagesize = qemu_ram_pagesize(pss->block);
1376 tmppages = ram_save_target_page(rs, ms, f, pss, last_stage,
1377 bytes_transferred, dirty_ram_abs);
1383 pss->offset += TARGET_PAGE_SIZE;
1384 dirty_ram_abs += TARGET_PAGE_SIZE;
1385 } while (pss->offset & (pagesize - 1));
1387 /* The offset we leave with is the last one we looked at */
1388 pss->offset -= TARGET_PAGE_SIZE;
1393 * ram_find_and_save_block: finds a dirty page and sends it to f
1395 * Called within an RCU critical section.
1397 * Returns the number of pages written where zero means no dirty pages
1399 * @rs: current RAM state
1400 * @f: QEMUFile where to send the data
1401 * @last_stage: if we are at the completion stage
1402 * @bytes_transferred: increase it with the number of transferred bytes
1404 * On systems where host-page-size > target-page-size it will send all the
1405 * pages in a host page that are dirty.
1408 static int ram_find_and_save_block(RAMState *rs, QEMUFile *f, bool last_stage,
1409 uint64_t *bytes_transferred)
1411 PageSearchStatus pss;
1412 MigrationState *ms = migrate_get_current();
1415 ram_addr_t dirty_ram_abs; /* Address of the start of the dirty page in
1418 /* No dirty page as there is zero RAM */
1419 if (!ram_bytes_total()) {
1423 pss.block = rs->last_seen_block;
1424 pss.offset = rs->last_offset;
1425 pss.complete_round = false;
1428 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1433 found = get_queued_page(rs, ms, &pss, &dirty_ram_abs);
1436 /* priority queue empty, so just search for something dirty */
1437 found = find_dirty_block(rs, f, &pss, &again, &dirty_ram_abs);
1441 pages = ram_save_host_page(rs, ms, f, &pss,
1442 last_stage, bytes_transferred,
1445 } while (!pages && again);
1447 rs->last_seen_block = pss.block;
1448 rs->last_offset = pss.offset;
1453 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1455 uint64_t pages = size / TARGET_PAGE_SIZE;
1456 RAMState *rs = &ram_state;
1459 rs->zero_pages += pages;
1461 rs->norm_pages += pages;
1462 bytes_transferred += size;
1463 qemu_update_position(f, size);
1467 static ram_addr_t ram_save_remaining(void)
1469 return migration_dirty_pages;
1472 uint64_t ram_bytes_remaining(void)
1474 return ram_save_remaining() * TARGET_PAGE_SIZE;
1477 uint64_t ram_bytes_transferred(void)
1479 return bytes_transferred;
1482 uint64_t ram_bytes_total(void)
1488 QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1489 total += block->used_length;
1494 void free_xbzrle_decoded_buf(void)
1496 g_free(xbzrle_decoded_buf);
1497 xbzrle_decoded_buf = NULL;
1500 static void migration_bitmap_free(struct BitmapRcu *bmap)
1503 g_free(bmap->unsentmap);
1507 static void ram_migration_cleanup(void *opaque)
1509 /* caller have hold iothread lock or is in a bh, so there is
1510 * no writing race against this migration_bitmap
1512 struct BitmapRcu *bitmap = migration_bitmap_rcu;
1513 atomic_rcu_set(&migration_bitmap_rcu, NULL);
1515 memory_global_dirty_log_stop();
1516 call_rcu(bitmap, migration_bitmap_free, rcu);
1519 XBZRLE_cache_lock();
1521 cache_fini(XBZRLE.cache);
1522 g_free(XBZRLE.encoded_buf);
1523 g_free(XBZRLE.current_buf);
1524 g_free(ZERO_TARGET_PAGE);
1525 XBZRLE.cache = NULL;
1526 XBZRLE.encoded_buf = NULL;
1527 XBZRLE.current_buf = NULL;
1529 XBZRLE_cache_unlock();
1532 static void ram_state_reset(RAMState *rs)
1534 rs->last_seen_block = NULL;
1535 rs->last_sent_block = NULL;
1536 rs->last_offset = 0;
1537 rs->last_version = ram_list.version;
1538 rs->ram_bulk_stage = true;
1541 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1543 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1545 /* called in qemu main thread, so there is
1546 * no writing race against this migration_bitmap
1548 if (migration_bitmap_rcu) {
1549 struct BitmapRcu *old_bitmap = migration_bitmap_rcu, *bitmap;
1550 bitmap = g_new(struct BitmapRcu, 1);
1551 bitmap->bmap = bitmap_new(new);
1553 /* prevent migration_bitmap content from being set bit
1554 * by migration_bitmap_sync_range() at the same time.
1555 * it is safe to migration if migration_bitmap is cleared bit
1558 qemu_mutex_lock(&migration_bitmap_mutex);
1559 bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1560 bitmap_set(bitmap->bmap, old, new - old);
1562 /* We don't have a way to safely extend the sentmap
1563 * with RCU; so mark it as missing, entry to postcopy
1566 bitmap->unsentmap = NULL;
1568 atomic_rcu_set(&migration_bitmap_rcu, bitmap);
1569 qemu_mutex_unlock(&migration_bitmap_mutex);
1570 migration_dirty_pages += new - old;
1571 call_rcu(old_bitmap, migration_bitmap_free, rcu);
1576 * 'expected' is the value you expect the bitmap mostly to be full
1577 * of; it won't bother printing lines that are all this value.
1578 * If 'todump' is null the migration bitmap is dumped.
1580 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1582 int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1585 int64_t linelen = 128;
1589 todump = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1592 for (cur = 0; cur < ram_pages; cur += linelen) {
1596 * Last line; catch the case where the line length
1597 * is longer than remaining ram
1599 if (cur + linelen > ram_pages) {
1600 linelen = ram_pages - cur;
1602 for (curb = 0; curb < linelen; curb++) {
1603 bool thisbit = test_bit(cur + curb, todump);
1604 linebuf[curb] = thisbit ? '1' : '.';
1605 found = found || (thisbit != expected);
1608 linebuf[curb] = '\0';
1609 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1614 /* **** functions for postcopy ***** */
1616 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1618 struct RAMBlock *block;
1619 unsigned long *bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1621 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1622 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1623 unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1624 unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1626 while (run_start < range) {
1627 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1628 ram_discard_range(NULL, block->idstr, run_start << TARGET_PAGE_BITS,
1629 (run_end - run_start) << TARGET_PAGE_BITS);
1630 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1636 * postcopy_send_discard_bm_ram: discard a RAMBlock
1638 * Returns zero on success
1640 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1641 * Note: At this point the 'unsentmap' is the processed bitmap combined
1642 * with the dirtymap; so a '1' means it's either dirty or unsent.
1644 * @ms: current migration state
1645 * @pds: state for postcopy
1646 * @start: RAMBlock starting page
1647 * @length: RAMBlock size
1649 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1650 PostcopyDiscardState *pds,
1651 unsigned long start,
1652 unsigned long length)
1654 unsigned long end = start + length; /* one after the end */
1655 unsigned long current;
1656 unsigned long *unsentmap;
1658 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1659 for (current = start; current < end; ) {
1660 unsigned long one = find_next_bit(unsentmap, end, current);
1663 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1664 unsigned long discard_length;
1667 discard_length = end - one;
1669 discard_length = zero - one;
1671 if (discard_length) {
1672 postcopy_discard_send_range(ms, pds, one, discard_length);
1674 current = one + discard_length;
1684 * postcopy_each_ram_send_discard: discard all RAMBlocks
1686 * Returns 0 for success or negative for error
1688 * Utility for the outgoing postcopy code.
1689 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1690 * passing it bitmap indexes and name.
1691 * (qemu_ram_foreach_block ends up passing unscaled lengths
1692 * which would mean postcopy code would have to deal with target page)
1694 * @ms: current migration state
1696 static int postcopy_each_ram_send_discard(MigrationState *ms)
1698 struct RAMBlock *block;
1701 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1702 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1703 PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1708 * Postcopy sends chunks of bitmap over the wire, but it
1709 * just needs indexes at this point, avoids it having
1710 * target page specific code.
1712 ret = postcopy_send_discard_bm_ram(ms, pds, first,
1713 block->used_length >> TARGET_PAGE_BITS);
1714 postcopy_discard_send_finish(ms, pds);
1724 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1726 * Helper for postcopy_chunk_hostpages; it's called twice to
1727 * canonicalize the two bitmaps, that are similar, but one is
1730 * Postcopy requires that all target pages in a hostpage are dirty or
1731 * clean, not a mix. This function canonicalizes the bitmaps.
1733 * @ms: current migration state
1734 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1735 * otherwise we need to canonicalize partially dirty host pages
1736 * @block: block that contains the page we want to canonicalize
1737 * @pds: state for postcopy
1739 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1741 PostcopyDiscardState *pds)
1743 unsigned long *bitmap;
1744 unsigned long *unsentmap;
1745 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1746 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1747 unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1748 unsigned long last = first + (len - 1);
1749 unsigned long run_start;
1751 if (block->page_size == TARGET_PAGE_SIZE) {
1752 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1756 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1757 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1760 /* Find a sent page */
1761 run_start = find_next_zero_bit(unsentmap, last + 1, first);
1763 /* Find a dirty page */
1764 run_start = find_next_bit(bitmap, last + 1, first);
1767 while (run_start <= last) {
1768 bool do_fixup = false;
1769 unsigned long fixup_start_addr;
1770 unsigned long host_offset;
1773 * If the start of this run of pages is in the middle of a host
1774 * page, then we need to fixup this host page.
1776 host_offset = run_start % host_ratio;
1779 run_start -= host_offset;
1780 fixup_start_addr = run_start;
1781 /* For the next pass */
1782 run_start = run_start + host_ratio;
1784 /* Find the end of this run */
1785 unsigned long run_end;
1787 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1789 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1792 * If the end isn't at the start of a host page, then the
1793 * run doesn't finish at the end of a host page
1794 * and we need to discard.
1796 host_offset = run_end % host_ratio;
1799 fixup_start_addr = run_end - host_offset;
1801 * This host page has gone, the next loop iteration starts
1802 * from after the fixup
1804 run_start = fixup_start_addr + host_ratio;
1807 * No discards on this iteration, next loop starts from
1808 * next sent/dirty page
1810 run_start = run_end + 1;
1817 /* Tell the destination to discard this page */
1818 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1819 /* For the unsent_pass we:
1820 * discard partially sent pages
1821 * For the !unsent_pass (dirty) we:
1822 * discard partially dirty pages that were sent
1823 * (any partially sent pages were already discarded
1824 * by the previous unsent_pass)
1826 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1830 /* Clean up the bitmap */
1831 for (page = fixup_start_addr;
1832 page < fixup_start_addr + host_ratio; page++) {
1833 /* All pages in this host page are now not sent */
1834 set_bit(page, unsentmap);
1837 * Remark them as dirty, updating the count for any pages
1838 * that weren't previously dirty.
1840 migration_dirty_pages += !test_and_set_bit(page, bitmap);
1845 /* Find the next sent page for the next iteration */
1846 run_start = find_next_zero_bit(unsentmap, last + 1,
1849 /* Find the next dirty page for the next iteration */
1850 run_start = find_next_bit(bitmap, last + 1, run_start);
1856 * postcopy_chuck_hostpages: discrad any partially sent host page
1858 * Utility for the outgoing postcopy code.
1860 * Discard any partially sent host-page size chunks, mark any partially
1861 * dirty host-page size chunks as all dirty. In this case the host-page
1862 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1864 * Returns zero on success
1866 * @ms: current migration state
1868 static int postcopy_chunk_hostpages(MigrationState *ms)
1870 RAMState *rs = &ram_state;
1871 struct RAMBlock *block;
1873 /* Easiest way to make sure we don't resume in the middle of a host-page */
1874 rs->last_seen_block = NULL;
1875 rs->last_sent_block = NULL;
1876 rs->last_offset = 0;
1878 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1879 unsigned long first = block->offset >> TARGET_PAGE_BITS;
1881 PostcopyDiscardState *pds =
1882 postcopy_discard_send_init(ms, first, block->idstr);
1884 /* First pass: Discard all partially sent host pages */
1885 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1887 * Second pass: Ensure that all partially dirty host pages are made
1890 postcopy_chunk_hostpages_pass(ms, false, block, pds);
1892 postcopy_discard_send_finish(ms, pds);
1893 } /* ram_list loop */
1899 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1901 * Returns zero on success
1903 * Transmit the set of pages to be discarded after precopy to the target
1904 * these are pages that:
1905 * a) Have been previously transmitted but are now dirty again
1906 * b) Pages that have never been transmitted, this ensures that
1907 * any pages on the destination that have been mapped by background
1908 * tasks get discarded (transparent huge pages is the specific concern)
1909 * Hopefully this is pretty sparse
1911 * @ms: current migration state
1913 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1916 unsigned long *bitmap, *unsentmap;
1920 /* This should be our last sync, the src is now paused */
1921 migration_bitmap_sync(&ram_state);
1923 unsentmap = atomic_rcu_read(&migration_bitmap_rcu)->unsentmap;
1925 /* We don't have a safe way to resize the sentmap, so
1926 * if the bitmap was resized it will be NULL at this
1929 error_report("migration ram resized during precopy phase");
1934 /* Deal with TPS != HPS and huge pages */
1935 ret = postcopy_chunk_hostpages(ms);
1942 * Update the unsentmap to be unsentmap = unsentmap | dirty
1944 bitmap = atomic_rcu_read(&migration_bitmap_rcu)->bmap;
1945 bitmap_or(unsentmap, unsentmap, bitmap,
1946 last_ram_offset() >> TARGET_PAGE_BITS);
1949 trace_ram_postcopy_send_discard_bitmap();
1950 #ifdef DEBUG_POSTCOPY
1951 ram_debug_dump_bitmap(unsentmap, true);
1954 ret = postcopy_each_ram_send_discard(ms);
1961 * ram_discard_range: discard dirtied pages at the beginning of postcopy
1963 * Returns zero on success
1965 * @mis: current migration incoming state
1966 * @rbname: name of the RAMBlock of the request. NULL means the
1967 * same that last one.
1968 * @start: RAMBlock starting page
1969 * @length: RAMBlock size
1971 int ram_discard_range(MigrationIncomingState *mis,
1973 uint64_t start, size_t length)
1977 trace_ram_discard_range(rbname, start, length);
1980 RAMBlock *rb = qemu_ram_block_by_name(rbname);
1983 error_report("ram_discard_range: Failed to find block '%s'", rbname);
1987 ret = ram_block_discard_range(rb, start, length);
1995 static int ram_save_init_globals(RAMState *rs)
1997 int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1999 rs->dirty_rate_high_cnt = 0;
2000 rs->bitmap_sync_count = 0;
2004 rs->xbzrle_bytes = 0;
2005 rs->xbzrle_pages = 0;
2006 rs->xbzrle_cache_miss = 0;
2007 rs->xbzrle_cache_miss_rate = 0;
2008 migration_bitmap_sync_init(rs);
2009 qemu_mutex_init(&migration_bitmap_mutex);
2011 if (migrate_use_xbzrle()) {
2012 XBZRLE_cache_lock();
2013 ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
2014 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
2017 if (!XBZRLE.cache) {
2018 XBZRLE_cache_unlock();
2019 error_report("Error creating cache");
2022 XBZRLE_cache_unlock();
2024 /* We prefer not to abort if there is no memory */
2025 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2026 if (!XBZRLE.encoded_buf) {
2027 error_report("Error allocating encoded_buf");
2031 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2032 if (!XBZRLE.current_buf) {
2033 error_report("Error allocating current_buf");
2034 g_free(XBZRLE.encoded_buf);
2035 XBZRLE.encoded_buf = NULL;
2042 /* For memory_global_dirty_log_start below. */
2043 qemu_mutex_lock_iothread();
2045 qemu_mutex_lock_ramlist();
2047 bytes_transferred = 0;
2048 ram_state_reset(rs);
2050 migration_bitmap_rcu = g_new0(struct BitmapRcu, 1);
2051 /* Skip setting bitmap if there is no RAM */
2052 if (ram_bytes_total()) {
2053 ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2054 migration_bitmap_rcu->bmap = bitmap_new(ram_bitmap_pages);
2055 bitmap_set(migration_bitmap_rcu->bmap, 0, ram_bitmap_pages);
2057 if (migrate_postcopy_ram()) {
2058 migration_bitmap_rcu->unsentmap = bitmap_new(ram_bitmap_pages);
2059 bitmap_set(migration_bitmap_rcu->unsentmap, 0, ram_bitmap_pages);
2064 * Count the total number of pages used by ram blocks not including any
2065 * gaps due to alignment or unplugs.
2067 migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2069 memory_global_dirty_log_start();
2070 migration_bitmap_sync(rs);
2071 qemu_mutex_unlock_ramlist();
2072 qemu_mutex_unlock_iothread();
2079 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2080 * long-running RCU critical section. When rcu-reclaims in the code
2081 * start to become numerous it will be necessary to reduce the
2082 * granularity of these critical sections.
2086 * ram_save_setup: Setup RAM for migration
2088 * Returns zero to indicate success and negative for error
2090 * @f: QEMUFile where to send the data
2091 * @opaque: RAMState pointer
2093 static int ram_save_setup(QEMUFile *f, void *opaque)
2095 RAMState *rs = opaque;
2098 /* migration has already setup the bitmap, reuse it. */
2099 if (!migration_in_colo_state()) {
2100 if (ram_save_init_globals(rs) < 0) {
2107 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2109 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2110 qemu_put_byte(f, strlen(block->idstr));
2111 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2112 qemu_put_be64(f, block->used_length);
2113 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2114 qemu_put_be64(f, block->page_size);
2120 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2121 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2123 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2129 * ram_save_iterate: iterative stage for migration
2131 * Returns zero to indicate success and negative for error
2133 * @f: QEMUFile where to send the data
2134 * @opaque: RAMState pointer
2136 static int ram_save_iterate(QEMUFile *f, void *opaque)
2138 RAMState *rs = opaque;
2145 if (ram_list.version != rs->last_version) {
2146 ram_state_reset(rs);
2149 /* Read version before ram_list.blocks */
2152 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2154 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2156 while ((ret = qemu_file_rate_limit(f)) == 0) {
2159 pages = ram_find_and_save_block(rs, f, false, &bytes_transferred);
2160 /* no more pages to sent */
2167 /* we want to check in the 1st loop, just in case it was the 1st time
2168 and we had to sync the dirty bitmap.
2169 qemu_get_clock_ns() is a bit expensive, so we only check each some
2172 if ((i & 63) == 0) {
2173 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2174 if (t1 > MAX_WAIT) {
2175 trace_ram_save_iterate_big_wait(t1, i);
2181 flush_compressed_data(f);
2185 * Must occur before EOS (or any QEMUFile operation)
2186 * because of RDMA protocol.
2188 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2190 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2191 bytes_transferred += 8;
2193 ret = qemu_file_get_error(f);
2202 * ram_save_complete: function called to send the remaining amount of ram
2204 * Returns zero to indicate success
2206 * Called with iothread lock
2208 * @f: QEMUFile where to send the data
2209 * @opaque: RAMState pointer
2211 static int ram_save_complete(QEMUFile *f, void *opaque)
2213 RAMState *rs = opaque;
2217 if (!migration_in_postcopy(migrate_get_current())) {
2218 migration_bitmap_sync(rs);
2221 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2223 /* try transferring iterative blocks of memory */
2225 /* flush all remaining blocks regardless of rate limiting */
2229 pages = ram_find_and_save_block(rs, f, !migration_in_colo_state(),
2230 &bytes_transferred);
2231 /* no more blocks to sent */
2237 flush_compressed_data(f);
2238 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2242 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2247 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2248 uint64_t *non_postcopiable_pending,
2249 uint64_t *postcopiable_pending)
2251 RAMState *rs = opaque;
2252 uint64_t remaining_size;
2254 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2256 if (!migration_in_postcopy(migrate_get_current()) &&
2257 remaining_size < max_size) {
2258 qemu_mutex_lock_iothread();
2260 migration_bitmap_sync(rs);
2262 qemu_mutex_unlock_iothread();
2263 remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
2266 /* We can do postcopy, and all the data is postcopiable */
2267 *postcopiable_pending += remaining_size;
2270 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2272 unsigned int xh_len;
2274 uint8_t *loaded_data;
2276 if (!xbzrle_decoded_buf) {
2277 xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2279 loaded_data = xbzrle_decoded_buf;
2281 /* extract RLE header */
2282 xh_flags = qemu_get_byte(f);
2283 xh_len = qemu_get_be16(f);
2285 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2286 error_report("Failed to load XBZRLE page - wrong compression!");
2290 if (xh_len > TARGET_PAGE_SIZE) {
2291 error_report("Failed to load XBZRLE page - len overflow!");
2294 /* load data and decode */
2295 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2298 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2299 TARGET_PAGE_SIZE) == -1) {
2300 error_report("Failed to load XBZRLE page - decode error!");
2308 * ram_block_from_stream: read a RAMBlock id from the migration stream
2310 * Must be called from within a rcu critical section.
2312 * Returns a pointer from within the RCU-protected ram_list.
2314 * @f: QEMUFile where to read the data from
2315 * @flags: Page flags (mostly to see if it's a continuation of previous block)
2317 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2319 static RAMBlock *block = NULL;
2323 if (flags & RAM_SAVE_FLAG_CONTINUE) {
2325 error_report("Ack, bad migration stream!");
2331 len = qemu_get_byte(f);
2332 qemu_get_buffer(f, (uint8_t *)id, len);
2335 block = qemu_ram_block_by_name(id);
2337 error_report("Can't find block %s", id);
2344 static inline void *host_from_ram_block_offset(RAMBlock *block,
2347 if (!offset_in_ramblock(block, offset)) {
2351 return block->host + offset;
2355 * ram_handle_compressed: handle the zero page case
2357 * If a page (or a whole RDMA chunk) has been
2358 * determined to be zero, then zap it.
2360 * @host: host address for the zero page
2361 * @ch: what the page is filled from. We only support zero
2362 * @size: size of the zero page
2364 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2366 if (ch != 0 || !is_zero_range(host, size)) {
2367 memset(host, ch, size);
2371 static void *do_data_decompress(void *opaque)
2373 DecompressParam *param = opaque;
2374 unsigned long pagesize;
2378 qemu_mutex_lock(¶m->mutex);
2379 while (!param->quit) {
2384 qemu_mutex_unlock(¶m->mutex);
2386 pagesize = TARGET_PAGE_SIZE;
2387 /* uncompress() will return failed in some case, especially
2388 * when the page is dirted when doing the compression, it's
2389 * not a problem because the dirty page will be retransferred
2390 * and uncompress() won't break the data in other pages.
2392 uncompress((Bytef *)des, &pagesize,
2393 (const Bytef *)param->compbuf, len);
2395 qemu_mutex_lock(&decomp_done_lock);
2397 qemu_cond_signal(&decomp_done_cond);
2398 qemu_mutex_unlock(&decomp_done_lock);
2400 qemu_mutex_lock(¶m->mutex);
2402 qemu_cond_wait(¶m->cond, ¶m->mutex);
2405 qemu_mutex_unlock(¶m->mutex);
2410 static void wait_for_decompress_done(void)
2412 int idx, thread_count;
2414 if (!migrate_use_compression()) {
2418 thread_count = migrate_decompress_threads();
2419 qemu_mutex_lock(&decomp_done_lock);
2420 for (idx = 0; idx < thread_count; idx++) {
2421 while (!decomp_param[idx].done) {
2422 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2425 qemu_mutex_unlock(&decomp_done_lock);
2428 void migrate_decompress_threads_create(void)
2430 int i, thread_count;
2432 thread_count = migrate_decompress_threads();
2433 decompress_threads = g_new0(QemuThread, thread_count);
2434 decomp_param = g_new0(DecompressParam, thread_count);
2435 qemu_mutex_init(&decomp_done_lock);
2436 qemu_cond_init(&decomp_done_cond);
2437 for (i = 0; i < thread_count; i++) {
2438 qemu_mutex_init(&decomp_param[i].mutex);
2439 qemu_cond_init(&decomp_param[i].cond);
2440 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2441 decomp_param[i].done = true;
2442 decomp_param[i].quit = false;
2443 qemu_thread_create(decompress_threads + i, "decompress",
2444 do_data_decompress, decomp_param + i,
2445 QEMU_THREAD_JOINABLE);
2449 void migrate_decompress_threads_join(void)
2451 int i, thread_count;
2453 thread_count = migrate_decompress_threads();
2454 for (i = 0; i < thread_count; i++) {
2455 qemu_mutex_lock(&decomp_param[i].mutex);
2456 decomp_param[i].quit = true;
2457 qemu_cond_signal(&decomp_param[i].cond);
2458 qemu_mutex_unlock(&decomp_param[i].mutex);
2460 for (i = 0; i < thread_count; i++) {
2461 qemu_thread_join(decompress_threads + i);
2462 qemu_mutex_destroy(&decomp_param[i].mutex);
2463 qemu_cond_destroy(&decomp_param[i].cond);
2464 g_free(decomp_param[i].compbuf);
2466 g_free(decompress_threads);
2467 g_free(decomp_param);
2468 decompress_threads = NULL;
2469 decomp_param = NULL;
2472 static void decompress_data_with_multi_threads(QEMUFile *f,
2473 void *host, int len)
2475 int idx, thread_count;
2477 thread_count = migrate_decompress_threads();
2478 qemu_mutex_lock(&decomp_done_lock);
2480 for (idx = 0; idx < thread_count; idx++) {
2481 if (decomp_param[idx].done) {
2482 decomp_param[idx].done = false;
2483 qemu_mutex_lock(&decomp_param[idx].mutex);
2484 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2485 decomp_param[idx].des = host;
2486 decomp_param[idx].len = len;
2487 qemu_cond_signal(&decomp_param[idx].cond);
2488 qemu_mutex_unlock(&decomp_param[idx].mutex);
2492 if (idx < thread_count) {
2495 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2498 qemu_mutex_unlock(&decomp_done_lock);
2502 * ram_postcopy_incoming_init: allocate postcopy data structures
2504 * Returns 0 for success and negative if there was one error
2506 * @mis: current migration incoming state
2508 * Allocate data structures etc needed by incoming migration with
2509 * postcopy-ram. postcopy-ram's similarly names
2510 * postcopy_ram_incoming_init does the work.
2512 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2514 size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2516 return postcopy_ram_incoming_init(mis, ram_pages);
2520 * ram_load_postcopy: load a page in postcopy case
2522 * Returns 0 for success or -errno in case of error
2524 * Called in postcopy mode by ram_load().
2525 * rcu_read_lock is taken prior to this being called.
2527 * @f: QEMUFile where to send the data
2529 static int ram_load_postcopy(QEMUFile *f)
2531 int flags = 0, ret = 0;
2532 bool place_needed = false;
2533 bool matching_page_sizes = false;
2534 MigrationIncomingState *mis = migration_incoming_get_current();
2535 /* Temporary page that is later 'placed' */
2536 void *postcopy_host_page = postcopy_get_tmp_page(mis);
2537 void *last_host = NULL;
2538 bool all_zero = false;
2540 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2543 void *page_buffer = NULL;
2544 void *place_source = NULL;
2545 RAMBlock *block = NULL;
2548 addr = qemu_get_be64(f);
2549 flags = addr & ~TARGET_PAGE_MASK;
2550 addr &= TARGET_PAGE_MASK;
2552 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2553 place_needed = false;
2554 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2555 block = ram_block_from_stream(f, flags);
2557 host = host_from_ram_block_offset(block, addr);
2559 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2563 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2565 * Postcopy requires that we place whole host pages atomically;
2566 * these may be huge pages for RAMBlocks that are backed by
2568 * To make it atomic, the data is read into a temporary page
2569 * that's moved into place later.
2570 * The migration protocol uses, possibly smaller, target-pages
2571 * however the source ensures it always sends all the components
2572 * of a host page in order.
2574 page_buffer = postcopy_host_page +
2575 ((uintptr_t)host & (block->page_size - 1));
2576 /* If all TP are zero then we can optimise the place */
2577 if (!((uintptr_t)host & (block->page_size - 1))) {
2580 /* not the 1st TP within the HP */
2581 if (host != (last_host + TARGET_PAGE_SIZE)) {
2582 error_report("Non-sequential target page %p/%p",
2591 * If it's the last part of a host page then we place the host
2594 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2595 (block->page_size - 1)) == 0;
2596 place_source = postcopy_host_page;
2600 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2601 case RAM_SAVE_FLAG_COMPRESS:
2602 ch = qemu_get_byte(f);
2603 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2609 case RAM_SAVE_FLAG_PAGE:
2611 if (!place_needed || !matching_page_sizes) {
2612 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2614 /* Avoids the qemu_file copy during postcopy, which is
2615 * going to do a copy later; can only do it when we
2616 * do this read in one go (matching page sizes)
2618 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2622 case RAM_SAVE_FLAG_EOS:
2626 error_report("Unknown combination of migration flags: %#x"
2627 " (postcopy mode)", flags);
2632 /* This gets called at the last target page in the host page */
2633 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2636 ret = postcopy_place_page_zero(mis, place_dest,
2639 ret = postcopy_place_page(mis, place_dest,
2640 place_source, block->page_size);
2644 ret = qemu_file_get_error(f);
2651 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2653 int flags = 0, ret = 0;
2654 static uint64_t seq_iter;
2657 * If system is running in postcopy mode, page inserts to host memory must
2660 bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2661 /* ADVISE is earlier, it shows the source has the postcopy capability on */
2662 bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2666 if (version_id != 4) {
2670 /* This RCU critical section can be very long running.
2671 * When RCU reclaims in the code start to become numerous,
2672 * it will be necessary to reduce the granularity of this
2677 if (postcopy_running) {
2678 ret = ram_load_postcopy(f);
2681 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2682 ram_addr_t addr, total_ram_bytes;
2686 addr = qemu_get_be64(f);
2687 flags = addr & ~TARGET_PAGE_MASK;
2688 addr &= TARGET_PAGE_MASK;
2690 if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2691 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2692 RAMBlock *block = ram_block_from_stream(f, flags);
2694 host = host_from_ram_block_offset(block, addr);
2696 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2702 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2703 case RAM_SAVE_FLAG_MEM_SIZE:
2704 /* Synchronize RAM block list */
2705 total_ram_bytes = addr;
2706 while (!ret && total_ram_bytes) {
2711 len = qemu_get_byte(f);
2712 qemu_get_buffer(f, (uint8_t *)id, len);
2714 length = qemu_get_be64(f);
2716 block = qemu_ram_block_by_name(id);
2718 if (length != block->used_length) {
2719 Error *local_err = NULL;
2721 ret = qemu_ram_resize(block, length,
2724 error_report_err(local_err);
2727 /* For postcopy we need to check hugepage sizes match */
2728 if (postcopy_advised &&
2729 block->page_size != qemu_host_page_size) {
2730 uint64_t remote_page_size = qemu_get_be64(f);
2731 if (remote_page_size != block->page_size) {
2732 error_report("Mismatched RAM page size %s "
2733 "(local) %zd != %" PRId64,
2734 id, block->page_size,
2739 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2742 error_report("Unknown ramblock \"%s\", cannot "
2743 "accept migration", id);
2747 total_ram_bytes -= length;
2751 case RAM_SAVE_FLAG_COMPRESS:
2752 ch = qemu_get_byte(f);
2753 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2756 case RAM_SAVE_FLAG_PAGE:
2757 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2760 case RAM_SAVE_FLAG_COMPRESS_PAGE:
2761 len = qemu_get_be32(f);
2762 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2763 error_report("Invalid compressed data length: %d", len);
2767 decompress_data_with_multi_threads(f, host, len);
2770 case RAM_SAVE_FLAG_XBZRLE:
2771 if (load_xbzrle(f, addr, host) < 0) {
2772 error_report("Failed to decompress XBZRLE page at "
2773 RAM_ADDR_FMT, addr);
2778 case RAM_SAVE_FLAG_EOS:
2782 if (flags & RAM_SAVE_FLAG_HOOK) {
2783 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2785 error_report("Unknown combination of migration flags: %#x",
2791 ret = qemu_file_get_error(f);
2795 wait_for_decompress_done();
2797 trace_ram_load_complete(ret, seq_iter);
2801 static SaveVMHandlers savevm_ram_handlers = {
2802 .save_live_setup = ram_save_setup,
2803 .save_live_iterate = ram_save_iterate,
2804 .save_live_complete_postcopy = ram_save_complete,
2805 .save_live_complete_precopy = ram_save_complete,
2806 .save_live_pending = ram_save_pending,
2807 .load_state = ram_load,
2808 .cleanup = ram_migration_cleanup,
2811 void ram_mig_init(void)
2813 qemu_mutex_init(&XBZRLE.lock);
2814 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);