]> Git Repo - qemu.git/blame - migration/ram.c
migration: fix pfd leak
[qemu.git] / migration / ram.c
CommitLineData
56e93d26
JQ
1/*
2 * QEMU System Emulator
3 *
4 * Copyright (c) 2003-2008 Fabrice Bellard
76cc7b58
JQ
5 * Copyright (c) 2011-2015 Red Hat Inc
6 *
7 * Authors:
8 * Juan Quintela <[email protected]>
56e93d26
JQ
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice shall be included in
18 * all copies or substantial portions of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 * THE SOFTWARE.
27 */
e688df6b 28
1393a485 29#include "qemu/osdep.h"
33c11879 30#include "cpu.h"
56e93d26 31#include <zlib.h>
f348b6d1 32#include "qemu/cutils.h"
56e93d26
JQ
33#include "qemu/bitops.h"
34#include "qemu/bitmap.h"
7205c9ec 35#include "qemu/main-loop.h"
709e3fe8 36#include "xbzrle.h"
7b1e1a22 37#include "ram.h"
6666c96a 38#include "migration.h"
f2a8f0a6 39#include "migration/register.h"
7b1e1a22 40#include "migration/misc.h"
08a0aee1 41#include "qemu-file.h"
be07b0ac 42#include "postcopy-ram.h"
56e93d26 43#include "migration/page_cache.h"
56e93d26 44#include "qemu/error-report.h"
e688df6b 45#include "qapi/error.h"
9af23989 46#include "qapi/qapi-events-migration.h"
8acabf69 47#include "qapi/qmp/qerror.h"
56e93d26 48#include "trace.h"
56e93d26 49#include "exec/ram_addr.h"
f9494614 50#include "exec/target_page.h"
56e93d26 51#include "qemu/rcu_queue.h"
a91246c9 52#include "migration/colo.h"
9ac78b61 53#include "migration/block.h"
56e93d26 54
56e93d26
JQ
55/***********************************************************/
56/* ram save/restore */
57
bb890ed5
JQ
58/* RAM_SAVE_FLAG_ZERO used to be named RAM_SAVE_FLAG_COMPRESS, it
59 * worked for pages that where filled with the same char. We switched
60 * it to only search for the zero value. And to avoid confusion with
61 * RAM_SSAVE_FLAG_COMPRESS_PAGE just rename it.
62 */
63
56e93d26 64#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
bb890ed5 65#define RAM_SAVE_FLAG_ZERO 0x02
56e93d26
JQ
66#define RAM_SAVE_FLAG_MEM_SIZE 0x04
67#define RAM_SAVE_FLAG_PAGE 0x08
68#define RAM_SAVE_FLAG_EOS 0x10
69#define RAM_SAVE_FLAG_CONTINUE 0x20
70#define RAM_SAVE_FLAG_XBZRLE 0x40
71/* 0x80 is reserved in migration.h start with 0x100 next */
72#define RAM_SAVE_FLAG_COMPRESS_PAGE 0x100
73
56e93d26
JQ
74static inline bool is_zero_range(uint8_t *p, uint64_t size)
75{
a1febc49 76 return buffer_is_zero(p, size);
56e93d26
JQ
77}
78
9360447d
JQ
79XBZRLECacheStats xbzrle_counters;
80
56e93d26
JQ
81/* struct contains XBZRLE cache and a static page
82 used by the compression */
83static struct {
84 /* buffer used for XBZRLE encoding */
85 uint8_t *encoded_buf;
86 /* buffer for storing page content */
87 uint8_t *current_buf;
88 /* Cache for XBZRLE, Protected by lock. */
89 PageCache *cache;
90 QemuMutex lock;
c00e0928
JQ
91 /* it will store a page full of zeros */
92 uint8_t *zero_target_page;
f265e0e4
JQ
93 /* buffer used for XBZRLE decoding */
94 uint8_t *decoded_buf;
56e93d26
JQ
95} XBZRLE;
96
56e93d26
JQ
97static void XBZRLE_cache_lock(void)
98{
99 if (migrate_use_xbzrle())
100 qemu_mutex_lock(&XBZRLE.lock);
101}
102
103static void XBZRLE_cache_unlock(void)
104{
105 if (migrate_use_xbzrle())
106 qemu_mutex_unlock(&XBZRLE.lock);
107}
108
3d0684b2
JQ
109/**
110 * xbzrle_cache_resize: resize the xbzrle cache
111 *
112 * This function is called from qmp_migrate_set_cache_size in main
113 * thread, possibly while a migration is in progress. A running
114 * migration may be using the cache and might finish during this call,
115 * hence changes to the cache are protected by XBZRLE.lock().
116 *
c9dede2d 117 * Returns 0 for success or -1 for error
3d0684b2
JQ
118 *
119 * @new_size: new cache size
8acabf69 120 * @errp: set *errp if the check failed, with reason
56e93d26 121 */
c9dede2d 122int xbzrle_cache_resize(int64_t new_size, Error **errp)
56e93d26
JQ
123{
124 PageCache *new_cache;
c9dede2d 125 int64_t ret = 0;
56e93d26 126
8acabf69
JQ
127 /* Check for truncation */
128 if (new_size != (size_t)new_size) {
129 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
130 "exceeding address space");
131 return -1;
132 }
133
2a313e5c
JQ
134 if (new_size == migrate_xbzrle_cache_size()) {
135 /* nothing to do */
c9dede2d 136 return 0;
2a313e5c
JQ
137 }
138
56e93d26
JQ
139 XBZRLE_cache_lock();
140
141 if (XBZRLE.cache != NULL) {
80f8dfde 142 new_cache = cache_init(new_size, TARGET_PAGE_SIZE, errp);
56e93d26 143 if (!new_cache) {
56e93d26
JQ
144 ret = -1;
145 goto out;
146 }
147
148 cache_fini(XBZRLE.cache);
149 XBZRLE.cache = new_cache;
150 }
56e93d26
JQ
151out:
152 XBZRLE_cache_unlock();
153 return ret;
154}
155
f9494614
AP
156static void ramblock_recv_map_init(void)
157{
158 RAMBlock *rb;
159
160 RAMBLOCK_FOREACH(rb) {
161 assert(!rb->receivedmap);
162 rb->receivedmap = bitmap_new(rb->max_length >> qemu_target_page_bits());
163 }
164}
165
166int ramblock_recv_bitmap_test(RAMBlock *rb, void *host_addr)
167{
168 return test_bit(ramblock_recv_bitmap_offset(host_addr, rb),
169 rb->receivedmap);
170}
171
1cba9f6e
DDAG
172bool ramblock_recv_bitmap_test_byte_offset(RAMBlock *rb, uint64_t byte_offset)
173{
174 return test_bit(byte_offset >> TARGET_PAGE_BITS, rb->receivedmap);
175}
176
f9494614
AP
177void ramblock_recv_bitmap_set(RAMBlock *rb, void *host_addr)
178{
179 set_bit_atomic(ramblock_recv_bitmap_offset(host_addr, rb), rb->receivedmap);
180}
181
182void ramblock_recv_bitmap_set_range(RAMBlock *rb, void *host_addr,
183 size_t nr)
184{
185 bitmap_set_atomic(rb->receivedmap,
186 ramblock_recv_bitmap_offset(host_addr, rb),
187 nr);
188}
189
ec481c6c
JQ
190/*
191 * An outstanding page request, on the source, having been received
192 * and queued
193 */
194struct RAMSrcPageRequest {
195 RAMBlock *rb;
196 hwaddr offset;
197 hwaddr len;
198
199 QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
200};
201
6f37bb8b
JQ
202/* State of RAM for migration */
203struct RAMState {
204b88b8
JQ
204 /* QEMUFile used for this migration */
205 QEMUFile *f;
6f37bb8b
JQ
206 /* Last block that we have visited searching for dirty pages */
207 RAMBlock *last_seen_block;
208 /* Last block from where we have sent data */
209 RAMBlock *last_sent_block;
269ace29
JQ
210 /* Last dirty target page we have sent */
211 ram_addr_t last_page;
6f37bb8b
JQ
212 /* last ram version we have seen */
213 uint32_t last_version;
214 /* We are in the first round */
215 bool ram_bulk_stage;
8d820d6f
JQ
216 /* How many times we have dirty too many pages */
217 int dirty_rate_high_cnt;
f664da80
JQ
218 /* these variables are used for bitmap sync */
219 /* last time we did a full bitmap_sync */
220 int64_t time_last_bitmap_sync;
eac74159 221 /* bytes transferred at start_time */
c4bdf0cf 222 uint64_t bytes_xfer_prev;
a66cd90c 223 /* number of dirty pages since start_time */
68908ed6 224 uint64_t num_dirty_pages_period;
b5833fde
JQ
225 /* xbzrle misses since the beginning of the period */
226 uint64_t xbzrle_cache_miss_prev;
36040d9c
JQ
227 /* number of iterations at the beginning of period */
228 uint64_t iterations_prev;
23b28c3c
JQ
229 /* Iterations since start */
230 uint64_t iterations;
9360447d 231 /* number of dirty bits in the bitmap */
2dfaf12e
PX
232 uint64_t migration_dirty_pages;
233 /* protects modification of the bitmap */
108cfae0 234 QemuMutex bitmap_mutex;
68a098f3
JQ
235 /* The RAMBlock used in the last src_page_requests */
236 RAMBlock *last_req_rb;
ec481c6c
JQ
237 /* Queue of outstanding page requests from the destination */
238 QemuMutex src_page_req_mutex;
239 QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
6f37bb8b
JQ
240};
241typedef struct RAMState RAMState;
242
53518d94 243static RAMState *ram_state;
6f37bb8b 244
9edabd4d 245uint64_t ram_bytes_remaining(void)
2f4fde93 246{
bae416e5
DDAG
247 return ram_state ? (ram_state->migration_dirty_pages * TARGET_PAGE_SIZE) :
248 0;
2f4fde93
JQ
249}
250
9360447d 251MigrationStats ram_counters;
96506894 252
b8fb8cb7
DDAG
253/* used by the search for pages to send */
254struct PageSearchStatus {
255 /* Current block being searched */
256 RAMBlock *block;
a935e30f
JQ
257 /* Current page to search from */
258 unsigned long page;
b8fb8cb7
DDAG
259 /* Set once we wrap around */
260 bool complete_round;
261};
262typedef struct PageSearchStatus PageSearchStatus;
263
56e93d26 264struct CompressParam {
56e93d26 265 bool done;
90e56fb4 266 bool quit;
56e93d26
JQ
267 QEMUFile *file;
268 QemuMutex mutex;
269 QemuCond cond;
270 RAMBlock *block;
271 ram_addr_t offset;
272};
273typedef struct CompressParam CompressParam;
274
275struct DecompressParam {
73a8912b 276 bool done;
90e56fb4 277 bool quit;
56e93d26
JQ
278 QemuMutex mutex;
279 QemuCond cond;
280 void *des;
d341d9f3 281 uint8_t *compbuf;
56e93d26
JQ
282 int len;
283};
284typedef struct DecompressParam DecompressParam;
285
286static CompressParam *comp_param;
287static QemuThread *compress_threads;
288/* comp_done_cond is used to wake up the migration thread when
289 * one of the compression threads has finished the compression.
290 * comp_done_lock is used to co-work with comp_done_cond.
291 */
0d9f9a5c
LL
292static QemuMutex comp_done_lock;
293static QemuCond comp_done_cond;
56e93d26
JQ
294/* The empty QEMUFileOps will be used by file in CompressParam */
295static const QEMUFileOps empty_ops = { };
296
56e93d26
JQ
297static DecompressParam *decomp_param;
298static QemuThread *decompress_threads;
73a8912b
LL
299static QemuMutex decomp_done_lock;
300static QemuCond decomp_done_cond;
56e93d26 301
a7a9a88f
LL
302static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
303 ram_addr_t offset);
56e93d26
JQ
304
305static void *do_data_compress(void *opaque)
306{
307 CompressParam *param = opaque;
a7a9a88f
LL
308 RAMBlock *block;
309 ram_addr_t offset;
56e93d26 310
a7a9a88f 311 qemu_mutex_lock(&param->mutex);
90e56fb4 312 while (!param->quit) {
a7a9a88f
LL
313 if (param->block) {
314 block = param->block;
315 offset = param->offset;
316 param->block = NULL;
317 qemu_mutex_unlock(&param->mutex);
318
319 do_compress_ram_page(param->file, block, offset);
320
0d9f9a5c 321 qemu_mutex_lock(&comp_done_lock);
a7a9a88f 322 param->done = true;
0d9f9a5c
LL
323 qemu_cond_signal(&comp_done_cond);
324 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
325
326 qemu_mutex_lock(&param->mutex);
327 } else {
56e93d26
JQ
328 qemu_cond_wait(&param->cond, &param->mutex);
329 }
56e93d26 330 }
a7a9a88f 331 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
332
333 return NULL;
334}
335
336static inline void terminate_compression_threads(void)
337{
338 int idx, thread_count;
339
340 thread_count = migrate_compress_threads();
3d0684b2 341
56e93d26
JQ
342 for (idx = 0; idx < thread_count; idx++) {
343 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 344 comp_param[idx].quit = true;
56e93d26
JQ
345 qemu_cond_signal(&comp_param[idx].cond);
346 qemu_mutex_unlock(&comp_param[idx].mutex);
347 }
348}
349
f0afa331 350static void compress_threads_save_cleanup(void)
56e93d26
JQ
351{
352 int i, thread_count;
353
354 if (!migrate_use_compression()) {
355 return;
356 }
357 terminate_compression_threads();
358 thread_count = migrate_compress_threads();
359 for (i = 0; i < thread_count; i++) {
360 qemu_thread_join(compress_threads + i);
361 qemu_fclose(comp_param[i].file);
362 qemu_mutex_destroy(&comp_param[i].mutex);
363 qemu_cond_destroy(&comp_param[i].cond);
364 }
0d9f9a5c
LL
365 qemu_mutex_destroy(&comp_done_lock);
366 qemu_cond_destroy(&comp_done_cond);
56e93d26
JQ
367 g_free(compress_threads);
368 g_free(comp_param);
56e93d26
JQ
369 compress_threads = NULL;
370 comp_param = NULL;
56e93d26
JQ
371}
372
f0afa331 373static void compress_threads_save_setup(void)
56e93d26
JQ
374{
375 int i, thread_count;
376
377 if (!migrate_use_compression()) {
378 return;
379 }
56e93d26
JQ
380 thread_count = migrate_compress_threads();
381 compress_threads = g_new0(QemuThread, thread_count);
382 comp_param = g_new0(CompressParam, thread_count);
0d9f9a5c
LL
383 qemu_cond_init(&comp_done_cond);
384 qemu_mutex_init(&comp_done_lock);
56e93d26 385 for (i = 0; i < thread_count; i++) {
e110aa91
C
386 /* comp_param[i].file is just used as a dummy buffer to save data,
387 * set its ops to empty.
56e93d26
JQ
388 */
389 comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
390 comp_param[i].done = true;
90e56fb4 391 comp_param[i].quit = false;
56e93d26
JQ
392 qemu_mutex_init(&comp_param[i].mutex);
393 qemu_cond_init(&comp_param[i].cond);
394 qemu_thread_create(compress_threads + i, "compress",
395 do_data_compress, comp_param + i,
396 QEMU_THREAD_JOINABLE);
397 }
398}
399
f986c3d2
JQ
400/* Multiple fd's */
401
402struct MultiFDSendParams {
403 uint8_t id;
404 char *name;
405 QemuThread thread;
406 QemuSemaphore sem;
407 QemuMutex mutex;
408 bool quit;
409};
410typedef struct MultiFDSendParams MultiFDSendParams;
411
412struct {
413 MultiFDSendParams *params;
414 /* number of created threads */
415 int count;
416} *multifd_send_state;
417
418static void terminate_multifd_send_threads(Error *errp)
419{
420 int i;
421
422 for (i = 0; i < multifd_send_state->count; i++) {
423 MultiFDSendParams *p = &multifd_send_state->params[i];
424
425 qemu_mutex_lock(&p->mutex);
426 p->quit = true;
427 qemu_sem_post(&p->sem);
428 qemu_mutex_unlock(&p->mutex);
429 }
430}
431
432int multifd_save_cleanup(Error **errp)
433{
434 int i;
435 int ret = 0;
436
437 if (!migrate_use_multifd()) {
438 return 0;
439 }
440 terminate_multifd_send_threads(NULL);
441 for (i = 0; i < multifd_send_state->count; i++) {
442 MultiFDSendParams *p = &multifd_send_state->params[i];
443
444 qemu_thread_join(&p->thread);
445 qemu_mutex_destroy(&p->mutex);
446 qemu_sem_destroy(&p->sem);
447 g_free(p->name);
448 p->name = NULL;
449 }
450 g_free(multifd_send_state->params);
451 multifd_send_state->params = NULL;
452 g_free(multifd_send_state);
453 multifd_send_state = NULL;
454 return ret;
455}
456
457static void *multifd_send_thread(void *opaque)
458{
459 MultiFDSendParams *p = opaque;
460
461 while (true) {
462 qemu_mutex_lock(&p->mutex);
463 if (p->quit) {
464 qemu_mutex_unlock(&p->mutex);
465 break;
466 }
467 qemu_mutex_unlock(&p->mutex);
468 qemu_sem_wait(&p->sem);
469 }
470
471 return NULL;
472}
473
474int multifd_save_setup(void)
475{
476 int thread_count;
477 uint8_t i;
478
479 if (!migrate_use_multifd()) {
480 return 0;
481 }
482 thread_count = migrate_multifd_channels();
483 multifd_send_state = g_malloc0(sizeof(*multifd_send_state));
484 multifd_send_state->params = g_new0(MultiFDSendParams, thread_count);
485 multifd_send_state->count = 0;
486 for (i = 0; i < thread_count; i++) {
487 MultiFDSendParams *p = &multifd_send_state->params[i];
488
489 qemu_mutex_init(&p->mutex);
490 qemu_sem_init(&p->sem, 0);
491 p->quit = false;
492 p->id = i;
493 p->name = g_strdup_printf("multifdsend_%d", i);
494 qemu_thread_create(&p->thread, p->name, multifd_send_thread, p,
495 QEMU_THREAD_JOINABLE);
496
497 multifd_send_state->count++;
498 }
499 return 0;
500}
501
502struct MultiFDRecvParams {
503 uint8_t id;
504 char *name;
505 QemuThread thread;
506 QemuSemaphore sem;
507 QemuMutex mutex;
508 bool quit;
509};
510typedef struct MultiFDRecvParams MultiFDRecvParams;
511
512struct {
513 MultiFDRecvParams *params;
514 /* number of created threads */
515 int count;
516} *multifd_recv_state;
517
518static void terminate_multifd_recv_threads(Error *errp)
519{
520 int i;
521
522 for (i = 0; i < multifd_recv_state->count; i++) {
523 MultiFDRecvParams *p = &multifd_recv_state->params[i];
524
525 qemu_mutex_lock(&p->mutex);
526 p->quit = true;
527 qemu_sem_post(&p->sem);
528 qemu_mutex_unlock(&p->mutex);
529 }
530}
531
532int multifd_load_cleanup(Error **errp)
533{
534 int i;
535 int ret = 0;
536
537 if (!migrate_use_multifd()) {
538 return 0;
539 }
540 terminate_multifd_recv_threads(NULL);
541 for (i = 0; i < multifd_recv_state->count; i++) {
542 MultiFDRecvParams *p = &multifd_recv_state->params[i];
543
544 qemu_thread_join(&p->thread);
545 qemu_mutex_destroy(&p->mutex);
546 qemu_sem_destroy(&p->sem);
547 g_free(p->name);
548 p->name = NULL;
549 }
550 g_free(multifd_recv_state->params);
551 multifd_recv_state->params = NULL;
552 g_free(multifd_recv_state);
553 multifd_recv_state = NULL;
554
555 return ret;
556}
557
558static void *multifd_recv_thread(void *opaque)
559{
560 MultiFDRecvParams *p = opaque;
561
562 while (true) {
563 qemu_mutex_lock(&p->mutex);
564 if (p->quit) {
565 qemu_mutex_unlock(&p->mutex);
566 break;
567 }
568 qemu_mutex_unlock(&p->mutex);
569 qemu_sem_wait(&p->sem);
570 }
571
572 return NULL;
573}
574
575int multifd_load_setup(void)
576{
577 int thread_count;
578 uint8_t i;
579
580 if (!migrate_use_multifd()) {
581 return 0;
582 }
583 thread_count = migrate_multifd_channels();
584 multifd_recv_state = g_malloc0(sizeof(*multifd_recv_state));
585 multifd_recv_state->params = g_new0(MultiFDRecvParams, thread_count);
586 multifd_recv_state->count = 0;
587 for (i = 0; i < thread_count; i++) {
588 MultiFDRecvParams *p = &multifd_recv_state->params[i];
589
590 qemu_mutex_init(&p->mutex);
591 qemu_sem_init(&p->sem, 0);
592 p->quit = false;
593 p->id = i;
594 p->name = g_strdup_printf("multifdrecv_%d", i);
595 qemu_thread_create(&p->thread, p->name, multifd_recv_thread, p,
596 QEMU_THREAD_JOINABLE);
597 multifd_recv_state->count++;
598 }
599 return 0;
600}
601
56e93d26 602/**
3d0684b2 603 * save_page_header: write page header to wire
56e93d26
JQ
604 *
605 * If this is the 1st block, it also writes the block identification
606 *
3d0684b2 607 * Returns the number of bytes written
56e93d26
JQ
608 *
609 * @f: QEMUFile where to send the data
610 * @block: block that contains the page we want to send
611 * @offset: offset inside the block for the page
612 * in the lower bits, it contains flags
613 */
2bf3aa85
JQ
614static size_t save_page_header(RAMState *rs, QEMUFile *f, RAMBlock *block,
615 ram_addr_t offset)
56e93d26 616{
9f5f380b 617 size_t size, len;
56e93d26 618
24795694
JQ
619 if (block == rs->last_sent_block) {
620 offset |= RAM_SAVE_FLAG_CONTINUE;
621 }
2bf3aa85 622 qemu_put_be64(f, offset);
56e93d26
JQ
623 size = 8;
624
625 if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
9f5f380b 626 len = strlen(block->idstr);
2bf3aa85
JQ
627 qemu_put_byte(f, len);
628 qemu_put_buffer(f, (uint8_t *)block->idstr, len);
9f5f380b 629 size += 1 + len;
24795694 630 rs->last_sent_block = block;
56e93d26
JQ
631 }
632 return size;
633}
634
3d0684b2
JQ
635/**
636 * mig_throttle_guest_down: throotle down the guest
637 *
638 * Reduce amount of guest cpu execution to hopefully slow down memory
639 * writes. If guest dirty memory rate is reduced below the rate at
640 * which we can transfer pages to the destination then we should be
641 * able to complete migration. Some workloads dirty memory way too
642 * fast and will not effectively converge, even with auto-converge.
070afca2
JH
643 */
644static void mig_throttle_guest_down(void)
645{
646 MigrationState *s = migrate_get_current();
2594f56d
DB
647 uint64_t pct_initial = s->parameters.cpu_throttle_initial;
648 uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
070afca2
JH
649
650 /* We have not started throttling yet. Let's start it. */
651 if (!cpu_throttle_active()) {
652 cpu_throttle_set(pct_initial);
653 } else {
654 /* Throttling already on, just increase the rate */
655 cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
656 }
657}
658
3d0684b2
JQ
659/**
660 * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
661 *
6f37bb8b 662 * @rs: current RAM state
3d0684b2
JQ
663 * @current_addr: address for the zero page
664 *
665 * Update the xbzrle cache to reflect a page that's been sent as all 0.
56e93d26
JQ
666 * The important thing is that a stale (not-yet-0'd) page be replaced
667 * by the new data.
668 * As a bonus, if the page wasn't in the cache it gets added so that
3d0684b2 669 * when a small write is made into the 0'd page it gets XBZRLE sent.
56e93d26 670 */
6f37bb8b 671static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
56e93d26 672{
6f37bb8b 673 if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
56e93d26
JQ
674 return;
675 }
676
677 /* We don't care if this fails to allocate a new cache page
678 * as long as it updated an old one */
c00e0928 679 cache_insert(XBZRLE.cache, current_addr, XBZRLE.zero_target_page,
9360447d 680 ram_counters.dirty_sync_count);
56e93d26
JQ
681}
682
683#define ENCODING_FLAG_XBZRLE 0x1
684
685/**
686 * save_xbzrle_page: compress and send current page
687 *
688 * Returns: 1 means that we wrote the page
689 * 0 means that page is identical to the one already sent
690 * -1 means that xbzrle would be longer than normal
691 *
5a987738 692 * @rs: current RAM state
3d0684b2
JQ
693 * @current_data: pointer to the address of the page contents
694 * @current_addr: addr of the page
56e93d26
JQ
695 * @block: block that contains the page we want to send
696 * @offset: offset inside the block for the page
697 * @last_stage: if we are at the completion stage
56e93d26 698 */
204b88b8 699static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
56e93d26 700 ram_addr_t current_addr, RAMBlock *block,
072c2511 701 ram_addr_t offset, bool last_stage)
56e93d26
JQ
702{
703 int encoded_len = 0, bytes_xbzrle;
704 uint8_t *prev_cached_page;
705
9360447d
JQ
706 if (!cache_is_cached(XBZRLE.cache, current_addr,
707 ram_counters.dirty_sync_count)) {
708 xbzrle_counters.cache_miss++;
56e93d26
JQ
709 if (!last_stage) {
710 if (cache_insert(XBZRLE.cache, current_addr, *current_data,
9360447d 711 ram_counters.dirty_sync_count) == -1) {
56e93d26
JQ
712 return -1;
713 } else {
714 /* update *current_data when the page has been
715 inserted into cache */
716 *current_data = get_cached_data(XBZRLE.cache, current_addr);
717 }
718 }
719 return -1;
720 }
721
722 prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
723
724 /* save current buffer into memory */
725 memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
726
727 /* XBZRLE encoding (if there is no overflow) */
728 encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
729 TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
730 TARGET_PAGE_SIZE);
731 if (encoded_len == 0) {
55c4446b 732 trace_save_xbzrle_page_skipping();
56e93d26
JQ
733 return 0;
734 } else if (encoded_len == -1) {
55c4446b 735 trace_save_xbzrle_page_overflow();
9360447d 736 xbzrle_counters.overflow++;
56e93d26
JQ
737 /* update data in the cache */
738 if (!last_stage) {
739 memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
740 *current_data = prev_cached_page;
741 }
742 return -1;
743 }
744
745 /* we need to update the data in the cache, in order to get the same data */
746 if (!last_stage) {
747 memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
748 }
749
750 /* Send XBZRLE based compressed page */
2bf3aa85 751 bytes_xbzrle = save_page_header(rs, rs->f, block,
204b88b8
JQ
752 offset | RAM_SAVE_FLAG_XBZRLE);
753 qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
754 qemu_put_be16(rs->f, encoded_len);
755 qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
56e93d26 756 bytes_xbzrle += encoded_len + 1 + 2;
9360447d
JQ
757 xbzrle_counters.pages++;
758 xbzrle_counters.bytes += bytes_xbzrle;
759 ram_counters.transferred += bytes_xbzrle;
56e93d26
JQ
760
761 return 1;
762}
763
3d0684b2
JQ
764/**
765 * migration_bitmap_find_dirty: find the next dirty page from start
f3f491fc 766 *
3d0684b2
JQ
767 * Called with rcu_read_lock() to protect migration_bitmap
768 *
769 * Returns the byte offset within memory region of the start of a dirty page
770 *
6f37bb8b 771 * @rs: current RAM state
3d0684b2 772 * @rb: RAMBlock where to search for dirty pages
a935e30f 773 * @start: page where we start the search
f3f491fc 774 */
56e93d26 775static inline
a935e30f 776unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
f20e2865 777 unsigned long start)
56e93d26 778{
6b6712ef
JQ
779 unsigned long size = rb->used_length >> TARGET_PAGE_BITS;
780 unsigned long *bitmap = rb->bmap;
56e93d26
JQ
781 unsigned long next;
782
6b6712ef
JQ
783 if (rs->ram_bulk_stage && start > 0) {
784 next = start + 1;
56e93d26 785 } else {
6b6712ef 786 next = find_next_bit(bitmap, size, start);
56e93d26
JQ
787 }
788
6b6712ef 789 return next;
56e93d26
JQ
790}
791
06b10688 792static inline bool migration_bitmap_clear_dirty(RAMState *rs,
f20e2865
JQ
793 RAMBlock *rb,
794 unsigned long page)
a82d593b
DDAG
795{
796 bool ret;
a82d593b 797
6b6712ef 798 ret = test_and_clear_bit(page, rb->bmap);
a82d593b
DDAG
799
800 if (ret) {
0d8ec885 801 rs->migration_dirty_pages--;
a82d593b
DDAG
802 }
803 return ret;
804}
805
15440dd5
JQ
806static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
807 ram_addr_t start, ram_addr_t length)
56e93d26 808{
0d8ec885 809 rs->migration_dirty_pages +=
6b6712ef 810 cpu_physical_memory_sync_dirty_bitmap(rb, start, length,
0d8ec885 811 &rs->num_dirty_pages_period);
56e93d26
JQ
812}
813
3d0684b2
JQ
814/**
815 * ram_pagesize_summary: calculate all the pagesizes of a VM
816 *
817 * Returns a summary bitmap of the page sizes of all RAMBlocks
818 *
819 * For VMs with just normal pages this is equivalent to the host page
820 * size. If it's got some huge pages then it's the OR of all the
821 * different page sizes.
e8ca1db2
DDAG
822 */
823uint64_t ram_pagesize_summary(void)
824{
825 RAMBlock *block;
826 uint64_t summary = 0;
827
99e15582 828 RAMBLOCK_FOREACH(block) {
e8ca1db2
DDAG
829 summary |= block->page_size;
830 }
831
832 return summary;
833}
834
8d820d6f 835static void migration_bitmap_sync(RAMState *rs)
56e93d26
JQ
836{
837 RAMBlock *block;
56e93d26 838 int64_t end_time;
c4bdf0cf 839 uint64_t bytes_xfer_now;
56e93d26 840
9360447d 841 ram_counters.dirty_sync_count++;
56e93d26 842
f664da80
JQ
843 if (!rs->time_last_bitmap_sync) {
844 rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56e93d26
JQ
845 }
846
847 trace_migration_bitmap_sync_start();
9c1f8f44 848 memory_global_dirty_log_sync();
56e93d26 849
108cfae0 850 qemu_mutex_lock(&rs->bitmap_mutex);
56e93d26 851 rcu_read_lock();
99e15582 852 RAMBLOCK_FOREACH(block) {
15440dd5 853 migration_bitmap_sync_range(rs, block, 0, block->used_length);
56e93d26
JQ
854 }
855 rcu_read_unlock();
108cfae0 856 qemu_mutex_unlock(&rs->bitmap_mutex);
56e93d26 857
a66cd90c 858 trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
1ffb5dfd 859
56e93d26
JQ
860 end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
861
862 /* more than 1 second = 1000 millisecons */
f664da80 863 if (end_time > rs->time_last_bitmap_sync + 1000) {
d693c6f1 864 /* calculate period counters */
9360447d 865 ram_counters.dirty_pages_rate = rs->num_dirty_pages_period * 1000
d693c6f1 866 / (end_time - rs->time_last_bitmap_sync);
9360447d 867 bytes_xfer_now = ram_counters.transferred;
d693c6f1 868
9ac78b61
PL
869 /* During block migration the auto-converge logic incorrectly detects
870 * that ram migration makes no progress. Avoid this by disabling the
871 * throttling logic during the bulk phase of block migration. */
872 if (migrate_auto_converge() && !blk_mig_bulk_active()) {
56e93d26
JQ
873 /* The following detection logic can be refined later. For now:
874 Check to see if the dirtied bytes is 50% more than the approx.
875 amount of bytes that just got transferred since the last time we
070afca2
JH
876 were in this routine. If that happens twice, start or increase
877 throttling */
070afca2 878
d693c6f1 879 if ((rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
eac74159 880 (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
b4a3c64b 881 (++rs->dirty_rate_high_cnt >= 2)) {
56e93d26 882 trace_migration_throttle();
8d820d6f 883 rs->dirty_rate_high_cnt = 0;
070afca2 884 mig_throttle_guest_down();
d693c6f1 885 }
56e93d26 886 }
070afca2 887
56e93d26 888 if (migrate_use_xbzrle()) {
23b28c3c 889 if (rs->iterations_prev != rs->iterations) {
9360447d
JQ
890 xbzrle_counters.cache_miss_rate =
891 (double)(xbzrle_counters.cache_miss -
b5833fde 892 rs->xbzrle_cache_miss_prev) /
23b28c3c 893 (rs->iterations - rs->iterations_prev);
56e93d26 894 }
23b28c3c 895 rs->iterations_prev = rs->iterations;
9360447d 896 rs->xbzrle_cache_miss_prev = xbzrle_counters.cache_miss;
56e93d26 897 }
d693c6f1
FF
898
899 /* reset period counters */
f664da80 900 rs->time_last_bitmap_sync = end_time;
a66cd90c 901 rs->num_dirty_pages_period = 0;
d2a4d85a 902 rs->bytes_xfer_prev = bytes_xfer_now;
56e93d26 903 }
4addcd4f 904 if (migrate_use_events()) {
9360447d 905 qapi_event_send_migration_pass(ram_counters.dirty_sync_count, NULL);
4addcd4f 906 }
56e93d26
JQ
907}
908
909/**
3d0684b2 910 * save_zero_page: send the zero page to the stream
56e93d26 911 *
3d0684b2 912 * Returns the number of pages written.
56e93d26 913 *
f7ccd61b 914 * @rs: current RAM state
56e93d26
JQ
915 * @block: block that contains the page we want to send
916 * @offset: offset inside the block for the page
56e93d26 917 */
7faccdc3 918static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset)
56e93d26 919{
7faccdc3 920 uint8_t *p = block->host + offset;
56e93d26
JQ
921 int pages = -1;
922
923 if (is_zero_range(p, TARGET_PAGE_SIZE)) {
9360447d
JQ
924 ram_counters.duplicate++;
925 ram_counters.transferred +=
bb890ed5 926 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_ZERO);
ce25d337 927 qemu_put_byte(rs->f, 0);
9360447d 928 ram_counters.transferred += 1;
56e93d26
JQ
929 pages = 1;
930 }
931
932 return pages;
933}
934
5727309d 935static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
53f09a10 936{
5727309d 937 if (!migrate_release_ram() || !migration_in_postcopy()) {
53f09a10
PB
938 return;
939 }
940
aaa2064c 941 ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
53f09a10
PB
942}
943
56e93d26 944/**
3d0684b2 945 * ram_save_page: send the given page to the stream
56e93d26 946 *
3d0684b2 947 * Returns the number of pages written.
3fd3c4b3
DDAG
948 * < 0 - error
949 * >=0 - Number of pages written - this might legally be 0
950 * if xbzrle noticed the page was the same.
56e93d26 951 *
6f37bb8b 952 * @rs: current RAM state
56e93d26
JQ
953 * @block: block that contains the page we want to send
954 * @offset: offset inside the block for the page
955 * @last_stage: if we are at the completion stage
56e93d26 956 */
a0a8aa14 957static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
56e93d26
JQ
958{
959 int pages = -1;
960 uint64_t bytes_xmit;
961 ram_addr_t current_addr;
56e93d26
JQ
962 uint8_t *p;
963 int ret;
964 bool send_async = true;
a08f6890 965 RAMBlock *block = pss->block;
a935e30f 966 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
56e93d26 967
2f68e399 968 p = block->host + offset;
1db9d8e5 969 trace_ram_save_page(block->idstr, (uint64_t)offset, p);
56e93d26
JQ
970
971 /* In doubt sent page as normal */
972 bytes_xmit = 0;
ce25d337 973 ret = ram_control_save_page(rs->f, block->offset,
56e93d26
JQ
974 offset, TARGET_PAGE_SIZE, &bytes_xmit);
975 if (bytes_xmit) {
9360447d 976 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
977 pages = 1;
978 }
979
980 XBZRLE_cache_lock();
981
982 current_addr = block->offset + offset;
983
56e93d26
JQ
984 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
985 if (ret != RAM_SAVE_CONTROL_DELAYED) {
986 if (bytes_xmit > 0) {
9360447d 987 ram_counters.normal++;
56e93d26 988 } else if (bytes_xmit == 0) {
9360447d 989 ram_counters.duplicate++;
56e93d26
JQ
990 }
991 }
992 } else {
7faccdc3 993 pages = save_zero_page(rs, block, offset);
56e93d26
JQ
994 if (pages > 0) {
995 /* Must let xbzrle know, otherwise a previous (now 0'd) cached
996 * page would be stale
997 */
6f37bb8b 998 xbzrle_cache_zero_page(rs, current_addr);
a935e30f 999 ram_release_pages(block->idstr, offset, pages);
6f37bb8b 1000 } else if (!rs->ram_bulk_stage &&
5727309d 1001 !migration_in_postcopy() && migrate_use_xbzrle()) {
204b88b8 1002 pages = save_xbzrle_page(rs, &p, current_addr, block,
072c2511 1003 offset, last_stage);
56e93d26
JQ
1004 if (!last_stage) {
1005 /* Can't send this cached data async, since the cache page
1006 * might get updated before it gets to the wire
1007 */
1008 send_async = false;
1009 }
1010 }
1011 }
1012
1013 /* XBZRLE overflow or normal page */
1014 if (pages == -1) {
9360447d
JQ
1015 ram_counters.transferred +=
1016 save_page_header(rs, rs->f, block, offset | RAM_SAVE_FLAG_PAGE);
56e93d26 1017 if (send_async) {
ce25d337 1018 qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
53f09a10 1019 migrate_release_ram() &
5727309d 1020 migration_in_postcopy());
56e93d26 1021 } else {
ce25d337 1022 qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
56e93d26 1023 }
9360447d 1024 ram_counters.transferred += TARGET_PAGE_SIZE;
56e93d26 1025 pages = 1;
9360447d 1026 ram_counters.normal++;
56e93d26
JQ
1027 }
1028
1029 XBZRLE_cache_unlock();
1030
1031 return pages;
1032}
1033
a7a9a88f
LL
1034static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
1035 ram_addr_t offset)
56e93d26 1036{
53518d94 1037 RAMState *rs = ram_state;
56e93d26 1038 int bytes_sent, blen;
a7a9a88f 1039 uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
56e93d26 1040
2bf3aa85 1041 bytes_sent = save_page_header(rs, f, block, offset |
56e93d26 1042 RAM_SAVE_FLAG_COMPRESS_PAGE);
a7a9a88f 1043 blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
56e93d26 1044 migrate_compress_level());
b3be2896
LL
1045 if (blen < 0) {
1046 bytes_sent = 0;
1047 qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
1048 error_report("compressed data failed!");
1049 } else {
1050 bytes_sent += blen;
5727309d 1051 ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
b3be2896 1052 }
56e93d26
JQ
1053
1054 return bytes_sent;
1055}
1056
ce25d337 1057static void flush_compressed_data(RAMState *rs)
56e93d26
JQ
1058{
1059 int idx, len, thread_count;
1060
1061 if (!migrate_use_compression()) {
1062 return;
1063 }
1064 thread_count = migrate_compress_threads();
a7a9a88f 1065
0d9f9a5c 1066 qemu_mutex_lock(&comp_done_lock);
56e93d26 1067 for (idx = 0; idx < thread_count; idx++) {
a7a9a88f 1068 while (!comp_param[idx].done) {
0d9f9a5c 1069 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26 1070 }
a7a9a88f 1071 }
0d9f9a5c 1072 qemu_mutex_unlock(&comp_done_lock);
a7a9a88f
LL
1073
1074 for (idx = 0; idx < thread_count; idx++) {
1075 qemu_mutex_lock(&comp_param[idx].mutex);
90e56fb4 1076 if (!comp_param[idx].quit) {
ce25d337 1077 len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
9360447d 1078 ram_counters.transferred += len;
56e93d26 1079 }
a7a9a88f 1080 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26
JQ
1081 }
1082}
1083
1084static inline void set_compress_params(CompressParam *param, RAMBlock *block,
1085 ram_addr_t offset)
1086{
1087 param->block = block;
1088 param->offset = offset;
1089}
1090
ce25d337
JQ
1091static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
1092 ram_addr_t offset)
56e93d26
JQ
1093{
1094 int idx, thread_count, bytes_xmit = -1, pages = -1;
1095
1096 thread_count = migrate_compress_threads();
0d9f9a5c 1097 qemu_mutex_lock(&comp_done_lock);
56e93d26
JQ
1098 while (true) {
1099 for (idx = 0; idx < thread_count; idx++) {
1100 if (comp_param[idx].done) {
a7a9a88f 1101 comp_param[idx].done = false;
ce25d337 1102 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
a7a9a88f 1103 qemu_mutex_lock(&comp_param[idx].mutex);
56e93d26 1104 set_compress_params(&comp_param[idx], block, offset);
a7a9a88f
LL
1105 qemu_cond_signal(&comp_param[idx].cond);
1106 qemu_mutex_unlock(&comp_param[idx].mutex);
56e93d26 1107 pages = 1;
9360447d
JQ
1108 ram_counters.normal++;
1109 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
1110 break;
1111 }
1112 }
1113 if (pages > 0) {
1114 break;
1115 } else {
0d9f9a5c 1116 qemu_cond_wait(&comp_done_cond, &comp_done_lock);
56e93d26
JQ
1117 }
1118 }
0d9f9a5c 1119 qemu_mutex_unlock(&comp_done_lock);
56e93d26
JQ
1120
1121 return pages;
1122}
1123
1124/**
1125 * ram_save_compressed_page: compress the given page and send it to the stream
1126 *
3d0684b2 1127 * Returns the number of pages written.
56e93d26 1128 *
6f37bb8b 1129 * @rs: current RAM state
56e93d26
JQ
1130 * @block: block that contains the page we want to send
1131 * @offset: offset inside the block for the page
1132 * @last_stage: if we are at the completion stage
56e93d26 1133 */
a0a8aa14
JQ
1134static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
1135 bool last_stage)
56e93d26
JQ
1136{
1137 int pages = -1;
fc50438e 1138 uint64_t bytes_xmit = 0;
56e93d26 1139 uint8_t *p;
fc50438e 1140 int ret, blen;
a08f6890 1141 RAMBlock *block = pss->block;
a935e30f 1142 ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
56e93d26 1143
2f68e399 1144 p = block->host + offset;
56e93d26 1145
ce25d337 1146 ret = ram_control_save_page(rs->f, block->offset,
56e93d26
JQ
1147 offset, TARGET_PAGE_SIZE, &bytes_xmit);
1148 if (bytes_xmit) {
9360447d 1149 ram_counters.transferred += bytes_xmit;
56e93d26
JQ
1150 pages = 1;
1151 }
56e93d26
JQ
1152 if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
1153 if (ret != RAM_SAVE_CONTROL_DELAYED) {
1154 if (bytes_xmit > 0) {
9360447d 1155 ram_counters.normal++;
56e93d26 1156 } else if (bytes_xmit == 0) {
9360447d 1157 ram_counters.duplicate++;
56e93d26
JQ
1158 }
1159 }
1160 } else {
1161 /* When starting the process of a new block, the first page of
1162 * the block should be sent out before other pages in the same
1163 * block, and all the pages in last block should have been sent
1164 * out, keeping this order is important, because the 'cont' flag
1165 * is used to avoid resending the block name.
1166 */
6f37bb8b 1167 if (block != rs->last_sent_block) {
ce25d337 1168 flush_compressed_data(rs);
7faccdc3 1169 pages = save_zero_page(rs, block, offset);
56e93d26 1170 if (pages == -1) {
fc50438e 1171 /* Make sure the first page is sent out before other pages */
2bf3aa85 1172 bytes_xmit = save_page_header(rs, rs->f, block, offset |
fc50438e 1173 RAM_SAVE_FLAG_COMPRESS_PAGE);
ce25d337 1174 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
fc50438e
LL
1175 migrate_compress_level());
1176 if (blen > 0) {
9360447d
JQ
1177 ram_counters.transferred += bytes_xmit + blen;
1178 ram_counters.normal++;
b3be2896 1179 pages = 1;
fc50438e 1180 } else {
ce25d337 1181 qemu_file_set_error(rs->f, blen);
fc50438e 1182 error_report("compressed data failed!");
b3be2896 1183 }
56e93d26 1184 }
53f09a10 1185 if (pages > 0) {
a935e30f 1186 ram_release_pages(block->idstr, offset, pages);
53f09a10 1187 }
56e93d26 1188 } else {
7faccdc3 1189 pages = save_zero_page(rs, block, offset);
56e93d26 1190 if (pages == -1) {
ce25d337 1191 pages = compress_page_with_multi_thread(rs, block, offset);
53f09a10 1192 } else {
a935e30f 1193 ram_release_pages(block->idstr, offset, pages);
56e93d26
JQ
1194 }
1195 }
1196 }
1197
1198 return pages;
1199}
1200
3d0684b2
JQ
1201/**
1202 * find_dirty_block: find the next dirty page and update any state
1203 * associated with the search process.
b9e60928 1204 *
3d0684b2 1205 * Returns if a page is found
b9e60928 1206 *
6f37bb8b 1207 * @rs: current RAM state
3d0684b2
JQ
1208 * @pss: data about the state of the current dirty page scan
1209 * @again: set to false if the search has scanned the whole of RAM
b9e60928 1210 */
f20e2865 1211static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss, bool *again)
b9e60928 1212{
f20e2865 1213 pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page);
6f37bb8b 1214 if (pss->complete_round && pss->block == rs->last_seen_block &&
a935e30f 1215 pss->page >= rs->last_page) {
b9e60928
DDAG
1216 /*
1217 * We've been once around the RAM and haven't found anything.
1218 * Give up.
1219 */
1220 *again = false;
1221 return false;
1222 }
a935e30f 1223 if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
b9e60928 1224 /* Didn't find anything in this RAM Block */
a935e30f 1225 pss->page = 0;
b9e60928
DDAG
1226 pss->block = QLIST_NEXT_RCU(pss->block, next);
1227 if (!pss->block) {
1228 /* Hit the end of the list */
1229 pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1230 /* Flag that we've looped */
1231 pss->complete_round = true;
6f37bb8b 1232 rs->ram_bulk_stage = false;
b9e60928
DDAG
1233 if (migrate_use_xbzrle()) {
1234 /* If xbzrle is on, stop using the data compression at this
1235 * point. In theory, xbzrle can do better than compression.
1236 */
ce25d337 1237 flush_compressed_data(rs);
b9e60928
DDAG
1238 }
1239 }
1240 /* Didn't find anything this time, but try again on the new block */
1241 *again = true;
1242 return false;
1243 } else {
1244 /* Can go around again, but... */
1245 *again = true;
1246 /* We've found something so probably don't need to */
1247 return true;
1248 }
1249}
1250
3d0684b2
JQ
1251/**
1252 * unqueue_page: gets a page of the queue
1253 *
a82d593b 1254 * Helper for 'get_queued_page' - gets a page off the queue
a82d593b 1255 *
3d0684b2
JQ
1256 * Returns the block of the page (or NULL if none available)
1257 *
ec481c6c 1258 * @rs: current RAM state
3d0684b2 1259 * @offset: used to return the offset within the RAMBlock
a82d593b 1260 */
f20e2865 1261static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset)
a82d593b
DDAG
1262{
1263 RAMBlock *block = NULL;
1264
ec481c6c
JQ
1265 qemu_mutex_lock(&rs->src_page_req_mutex);
1266 if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1267 struct RAMSrcPageRequest *entry =
1268 QSIMPLEQ_FIRST(&rs->src_page_requests);
a82d593b
DDAG
1269 block = entry->rb;
1270 *offset = entry->offset;
a82d593b
DDAG
1271
1272 if (entry->len > TARGET_PAGE_SIZE) {
1273 entry->len -= TARGET_PAGE_SIZE;
1274 entry->offset += TARGET_PAGE_SIZE;
1275 } else {
1276 memory_region_unref(block->mr);
ec481c6c 1277 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
a82d593b
DDAG
1278 g_free(entry);
1279 }
1280 }
ec481c6c 1281 qemu_mutex_unlock(&rs->src_page_req_mutex);
a82d593b
DDAG
1282
1283 return block;
1284}
1285
3d0684b2
JQ
1286/**
1287 * get_queued_page: unqueue a page from the postocpy requests
1288 *
1289 * Skips pages that are already sent (!dirty)
a82d593b 1290 *
3d0684b2 1291 * Returns if a queued page is found
a82d593b 1292 *
6f37bb8b 1293 * @rs: current RAM state
3d0684b2 1294 * @pss: data about the state of the current dirty page scan
a82d593b 1295 */
f20e2865 1296static bool get_queued_page(RAMState *rs, PageSearchStatus *pss)
a82d593b
DDAG
1297{
1298 RAMBlock *block;
1299 ram_addr_t offset;
1300 bool dirty;
1301
1302 do {
f20e2865 1303 block = unqueue_page(rs, &offset);
a82d593b
DDAG
1304 /*
1305 * We're sending this page, and since it's postcopy nothing else
1306 * will dirty it, and we must make sure it doesn't get sent again
1307 * even if this queue request was received after the background
1308 * search already sent it.
1309 */
1310 if (block) {
f20e2865
JQ
1311 unsigned long page;
1312
6b6712ef
JQ
1313 page = offset >> TARGET_PAGE_BITS;
1314 dirty = test_bit(page, block->bmap);
a82d593b 1315 if (!dirty) {
06b10688 1316 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
6b6712ef 1317 page, test_bit(page, block->unsentmap));
a82d593b 1318 } else {
f20e2865 1319 trace_get_queued_page(block->idstr, (uint64_t)offset, page);
a82d593b
DDAG
1320 }
1321 }
1322
1323 } while (block && !dirty);
1324
1325 if (block) {
1326 /*
1327 * As soon as we start servicing pages out of order, then we have
1328 * to kill the bulk stage, since the bulk stage assumes
1329 * in (migration_bitmap_find_and_reset_dirty) that every page is
1330 * dirty, that's no longer true.
1331 */
6f37bb8b 1332 rs->ram_bulk_stage = false;
a82d593b
DDAG
1333
1334 /*
1335 * We want the background search to continue from the queued page
1336 * since the guest is likely to want other pages near to the page
1337 * it just requested.
1338 */
1339 pss->block = block;
a935e30f 1340 pss->page = offset >> TARGET_PAGE_BITS;
a82d593b
DDAG
1341 }
1342
1343 return !!block;
1344}
1345
6c595cde 1346/**
5e58f968
JQ
1347 * migration_page_queue_free: drop any remaining pages in the ram
1348 * request queue
6c595cde 1349 *
3d0684b2
JQ
1350 * It should be empty at the end anyway, but in error cases there may
1351 * be some left. in case that there is any page left, we drop it.
1352 *
6c595cde 1353 */
83c13382 1354static void migration_page_queue_free(RAMState *rs)
6c595cde 1355{
ec481c6c 1356 struct RAMSrcPageRequest *mspr, *next_mspr;
6c595cde
DDAG
1357 /* This queue generally should be empty - but in the case of a failed
1358 * migration might have some droppings in.
1359 */
1360 rcu_read_lock();
ec481c6c 1361 QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
6c595cde 1362 memory_region_unref(mspr->rb->mr);
ec481c6c 1363 QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
6c595cde
DDAG
1364 g_free(mspr);
1365 }
1366 rcu_read_unlock();
1367}
1368
1369/**
3d0684b2
JQ
1370 * ram_save_queue_pages: queue the page for transmission
1371 *
1372 * A request from postcopy destination for example.
1373 *
1374 * Returns zero on success or negative on error
1375 *
3d0684b2
JQ
1376 * @rbname: Name of the RAMBLock of the request. NULL means the
1377 * same that last one.
1378 * @start: starting address from the start of the RAMBlock
1379 * @len: length (in bytes) to send
6c595cde 1380 */
96506894 1381int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
6c595cde
DDAG
1382{
1383 RAMBlock *ramblock;
53518d94 1384 RAMState *rs = ram_state;
6c595cde 1385
9360447d 1386 ram_counters.postcopy_requests++;
6c595cde
DDAG
1387 rcu_read_lock();
1388 if (!rbname) {
1389 /* Reuse last RAMBlock */
68a098f3 1390 ramblock = rs->last_req_rb;
6c595cde
DDAG
1391
1392 if (!ramblock) {
1393 /*
1394 * Shouldn't happen, we can't reuse the last RAMBlock if
1395 * it's the 1st request.
1396 */
1397 error_report("ram_save_queue_pages no previous block");
1398 goto err;
1399 }
1400 } else {
1401 ramblock = qemu_ram_block_by_name(rbname);
1402
1403 if (!ramblock) {
1404 /* We shouldn't be asked for a non-existent RAMBlock */
1405 error_report("ram_save_queue_pages no block '%s'", rbname);
1406 goto err;
1407 }
68a098f3 1408 rs->last_req_rb = ramblock;
6c595cde
DDAG
1409 }
1410 trace_ram_save_queue_pages(ramblock->idstr, start, len);
1411 if (start+len > ramblock->used_length) {
9458ad6b
JQ
1412 error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1413 RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
6c595cde
DDAG
1414 __func__, start, len, ramblock->used_length);
1415 goto err;
1416 }
1417
ec481c6c
JQ
1418 struct RAMSrcPageRequest *new_entry =
1419 g_malloc0(sizeof(struct RAMSrcPageRequest));
6c595cde
DDAG
1420 new_entry->rb = ramblock;
1421 new_entry->offset = start;
1422 new_entry->len = len;
1423
1424 memory_region_ref(ramblock->mr);
ec481c6c
JQ
1425 qemu_mutex_lock(&rs->src_page_req_mutex);
1426 QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1427 qemu_mutex_unlock(&rs->src_page_req_mutex);
6c595cde
DDAG
1428 rcu_read_unlock();
1429
1430 return 0;
1431
1432err:
1433 rcu_read_unlock();
1434 return -1;
1435}
1436
a82d593b 1437/**
3d0684b2 1438 * ram_save_target_page: save one target page
a82d593b 1439 *
3d0684b2 1440 * Returns the number of pages written
a82d593b 1441 *
6f37bb8b 1442 * @rs: current RAM state
3d0684b2 1443 * @ms: current migration state
3d0684b2 1444 * @pss: data about the page we want to send
a82d593b 1445 * @last_stage: if we are at the completion stage
a82d593b 1446 */
a0a8aa14 1447static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1448 bool last_stage)
a82d593b
DDAG
1449{
1450 int res = 0;
1451
1452 /* Check the pages is dirty and if it is send it */
f20e2865 1453 if (migration_bitmap_clear_dirty(rs, pss->block, pss->page)) {
6d358d94
JQ
1454 /*
1455 * If xbzrle is on, stop using the data compression after first
1456 * round of migration even if compression is enabled. In theory,
1457 * xbzrle can do better than compression.
1458 */
6b6712ef
JQ
1459 if (migrate_use_compression() &&
1460 (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
a0a8aa14 1461 res = ram_save_compressed_page(rs, pss, last_stage);
a82d593b 1462 } else {
a0a8aa14 1463 res = ram_save_page(rs, pss, last_stage);
a82d593b
DDAG
1464 }
1465
1466 if (res < 0) {
1467 return res;
1468 }
6b6712ef
JQ
1469 if (pss->block->unsentmap) {
1470 clear_bit(pss->page, pss->block->unsentmap);
a82d593b
DDAG
1471 }
1472 }
1473
1474 return res;
1475}
1476
1477/**
3d0684b2 1478 * ram_save_host_page: save a whole host page
a82d593b 1479 *
3d0684b2
JQ
1480 * Starting at *offset send pages up to the end of the current host
1481 * page. It's valid for the initial offset to point into the middle of
1482 * a host page in which case the remainder of the hostpage is sent.
1483 * Only dirty target pages are sent. Note that the host page size may
1484 * be a huge page for this block.
1eb3fc0a
DDAG
1485 * The saving stops at the boundary of the used_length of the block
1486 * if the RAMBlock isn't a multiple of the host page size.
a82d593b 1487 *
3d0684b2
JQ
1488 * Returns the number of pages written or negative on error
1489 *
6f37bb8b 1490 * @rs: current RAM state
3d0684b2 1491 * @ms: current migration state
3d0684b2 1492 * @pss: data about the page we want to send
a82d593b 1493 * @last_stage: if we are at the completion stage
a82d593b 1494 */
a0a8aa14 1495static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
f20e2865 1496 bool last_stage)
a82d593b
DDAG
1497{
1498 int tmppages, pages = 0;
a935e30f
JQ
1499 size_t pagesize_bits =
1500 qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
4c011c37 1501
a82d593b 1502 do {
f20e2865 1503 tmppages = ram_save_target_page(rs, pss, last_stage);
a82d593b
DDAG
1504 if (tmppages < 0) {
1505 return tmppages;
1506 }
1507
1508 pages += tmppages;
a935e30f 1509 pss->page++;
1eb3fc0a
DDAG
1510 } while ((pss->page & (pagesize_bits - 1)) &&
1511 offset_in_ramblock(pss->block, pss->page << TARGET_PAGE_BITS));
a82d593b
DDAG
1512
1513 /* The offset we leave with is the last one we looked at */
a935e30f 1514 pss->page--;
a82d593b
DDAG
1515 return pages;
1516}
6c595cde 1517
56e93d26 1518/**
3d0684b2 1519 * ram_find_and_save_block: finds a dirty page and sends it to f
56e93d26
JQ
1520 *
1521 * Called within an RCU critical section.
1522 *
3d0684b2 1523 * Returns the number of pages written where zero means no dirty pages
56e93d26 1524 *
6f37bb8b 1525 * @rs: current RAM state
56e93d26 1526 * @last_stage: if we are at the completion stage
a82d593b
DDAG
1527 *
1528 * On systems where host-page-size > target-page-size it will send all the
1529 * pages in a host page that are dirty.
56e93d26
JQ
1530 */
1531
ce25d337 1532static int ram_find_and_save_block(RAMState *rs, bool last_stage)
56e93d26 1533{
b8fb8cb7 1534 PageSearchStatus pss;
56e93d26 1535 int pages = 0;
b9e60928 1536 bool again, found;
56e93d26 1537
0827b9e9
AA
1538 /* No dirty page as there is zero RAM */
1539 if (!ram_bytes_total()) {
1540 return pages;
1541 }
1542
6f37bb8b 1543 pss.block = rs->last_seen_block;
a935e30f 1544 pss.page = rs->last_page;
b8fb8cb7
DDAG
1545 pss.complete_round = false;
1546
1547 if (!pss.block) {
1548 pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1549 }
56e93d26 1550
b9e60928 1551 do {
a82d593b 1552 again = true;
f20e2865 1553 found = get_queued_page(rs, &pss);
b9e60928 1554
a82d593b
DDAG
1555 if (!found) {
1556 /* priority queue empty, so just search for something dirty */
f20e2865 1557 found = find_dirty_block(rs, &pss, &again);
a82d593b 1558 }
f3f491fc 1559
a82d593b 1560 if (found) {
f20e2865 1561 pages = ram_save_host_page(rs, &pss, last_stage);
56e93d26 1562 }
b9e60928 1563 } while (!pages && again);
56e93d26 1564
6f37bb8b 1565 rs->last_seen_block = pss.block;
a935e30f 1566 rs->last_page = pss.page;
56e93d26
JQ
1567
1568 return pages;
1569}
1570
1571void acct_update_position(QEMUFile *f, size_t size, bool zero)
1572{
1573 uint64_t pages = size / TARGET_PAGE_SIZE;
f7ccd61b 1574
56e93d26 1575 if (zero) {
9360447d 1576 ram_counters.duplicate += pages;
56e93d26 1577 } else {
9360447d
JQ
1578 ram_counters.normal += pages;
1579 ram_counters.transferred += size;
56e93d26
JQ
1580 qemu_update_position(f, size);
1581 }
1582}
1583
56e93d26
JQ
1584uint64_t ram_bytes_total(void)
1585{
1586 RAMBlock *block;
1587 uint64_t total = 0;
1588
1589 rcu_read_lock();
99e15582 1590 RAMBLOCK_FOREACH(block) {
56e93d26 1591 total += block->used_length;
99e15582 1592 }
56e93d26
JQ
1593 rcu_read_unlock();
1594 return total;
1595}
1596
f265e0e4 1597static void xbzrle_load_setup(void)
56e93d26 1598{
f265e0e4 1599 XBZRLE.decoded_buf = g_malloc(TARGET_PAGE_SIZE);
56e93d26
JQ
1600}
1601
f265e0e4
JQ
1602static void xbzrle_load_cleanup(void)
1603{
1604 g_free(XBZRLE.decoded_buf);
1605 XBZRLE.decoded_buf = NULL;
1606}
1607
7d7c96be
PX
1608static void ram_state_cleanup(RAMState **rsp)
1609{
b9ccaf6d
DDAG
1610 if (*rsp) {
1611 migration_page_queue_free(*rsp);
1612 qemu_mutex_destroy(&(*rsp)->bitmap_mutex);
1613 qemu_mutex_destroy(&(*rsp)->src_page_req_mutex);
1614 g_free(*rsp);
1615 *rsp = NULL;
1616 }
7d7c96be
PX
1617}
1618
84593a08
PX
1619static void xbzrle_cleanup(void)
1620{
1621 XBZRLE_cache_lock();
1622 if (XBZRLE.cache) {
1623 cache_fini(XBZRLE.cache);
1624 g_free(XBZRLE.encoded_buf);
1625 g_free(XBZRLE.current_buf);
1626 g_free(XBZRLE.zero_target_page);
1627 XBZRLE.cache = NULL;
1628 XBZRLE.encoded_buf = NULL;
1629 XBZRLE.current_buf = NULL;
1630 XBZRLE.zero_target_page = NULL;
1631 }
1632 XBZRLE_cache_unlock();
1633}
1634
f265e0e4 1635static void ram_save_cleanup(void *opaque)
56e93d26 1636{
53518d94 1637 RAMState **rsp = opaque;
6b6712ef 1638 RAMBlock *block;
eb859c53 1639
2ff64038
LZ
1640 /* caller have hold iothread lock or is in a bh, so there is
1641 * no writing race against this migration_bitmap
1642 */
6b6712ef
JQ
1643 memory_global_dirty_log_stop();
1644
1645 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1646 g_free(block->bmap);
1647 block->bmap = NULL;
1648 g_free(block->unsentmap);
1649 block->unsentmap = NULL;
56e93d26
JQ
1650 }
1651
84593a08 1652 xbzrle_cleanup();
f0afa331 1653 compress_threads_save_cleanup();
7d7c96be 1654 ram_state_cleanup(rsp);
56e93d26
JQ
1655}
1656
6f37bb8b 1657static void ram_state_reset(RAMState *rs)
56e93d26 1658{
6f37bb8b
JQ
1659 rs->last_seen_block = NULL;
1660 rs->last_sent_block = NULL;
269ace29 1661 rs->last_page = 0;
6f37bb8b
JQ
1662 rs->last_version = ram_list.version;
1663 rs->ram_bulk_stage = true;
56e93d26
JQ
1664}
1665
1666#define MAX_WAIT 50 /* ms, half buffered_file limit */
1667
4f2e4252
DDAG
1668/*
1669 * 'expected' is the value you expect the bitmap mostly to be full
1670 * of; it won't bother printing lines that are all this value.
1671 * If 'todump' is null the migration bitmap is dumped.
1672 */
6b6712ef
JQ
1673void ram_debug_dump_bitmap(unsigned long *todump, bool expected,
1674 unsigned long pages)
4f2e4252 1675{
4f2e4252
DDAG
1676 int64_t cur;
1677 int64_t linelen = 128;
1678 char linebuf[129];
1679
6b6712ef 1680 for (cur = 0; cur < pages; cur += linelen) {
4f2e4252
DDAG
1681 int64_t curb;
1682 bool found = false;
1683 /*
1684 * Last line; catch the case where the line length
1685 * is longer than remaining ram
1686 */
6b6712ef
JQ
1687 if (cur + linelen > pages) {
1688 linelen = pages - cur;
4f2e4252
DDAG
1689 }
1690 for (curb = 0; curb < linelen; curb++) {
1691 bool thisbit = test_bit(cur + curb, todump);
1692 linebuf[curb] = thisbit ? '1' : '.';
1693 found = found || (thisbit != expected);
1694 }
1695 if (found) {
1696 linebuf[curb] = '\0';
1697 fprintf(stderr, "0x%08" PRIx64 " : %s\n", cur, linebuf);
1698 }
1699 }
1700}
1701
e0b266f0
DDAG
1702/* **** functions for postcopy ***** */
1703
ced1c616
PB
1704void ram_postcopy_migrated_memory_release(MigrationState *ms)
1705{
1706 struct RAMBlock *block;
ced1c616 1707
99e15582 1708 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1709 unsigned long *bitmap = block->bmap;
1710 unsigned long range = block->used_length >> TARGET_PAGE_BITS;
1711 unsigned long run_start = find_next_zero_bit(bitmap, range, 0);
ced1c616
PB
1712
1713 while (run_start < range) {
1714 unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
aaa2064c 1715 ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
ced1c616
PB
1716 (run_end - run_start) << TARGET_PAGE_BITS);
1717 run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1718 }
1719 }
1720}
1721
3d0684b2
JQ
1722/**
1723 * postcopy_send_discard_bm_ram: discard a RAMBlock
1724 *
1725 * Returns zero on success
1726 *
e0b266f0
DDAG
1727 * Callback from postcopy_each_ram_send_discard for each RAMBlock
1728 * Note: At this point the 'unsentmap' is the processed bitmap combined
1729 * with the dirtymap; so a '1' means it's either dirty or unsent.
3d0684b2
JQ
1730 *
1731 * @ms: current migration state
1732 * @pds: state for postcopy
1733 * @start: RAMBlock starting page
1734 * @length: RAMBlock size
e0b266f0
DDAG
1735 */
1736static int postcopy_send_discard_bm_ram(MigrationState *ms,
1737 PostcopyDiscardState *pds,
6b6712ef 1738 RAMBlock *block)
e0b266f0 1739{
6b6712ef 1740 unsigned long end = block->used_length >> TARGET_PAGE_BITS;
e0b266f0 1741 unsigned long current;
6b6712ef 1742 unsigned long *unsentmap = block->unsentmap;
e0b266f0 1743
6b6712ef 1744 for (current = 0; current < end; ) {
e0b266f0
DDAG
1745 unsigned long one = find_next_bit(unsentmap, end, current);
1746
1747 if (one <= end) {
1748 unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1749 unsigned long discard_length;
1750
1751 if (zero >= end) {
1752 discard_length = end - one;
1753 } else {
1754 discard_length = zero - one;
1755 }
d688c62d
DDAG
1756 if (discard_length) {
1757 postcopy_discard_send_range(ms, pds, one, discard_length);
1758 }
e0b266f0
DDAG
1759 current = one + discard_length;
1760 } else {
1761 current = one;
1762 }
1763 }
1764
1765 return 0;
1766}
1767
3d0684b2
JQ
1768/**
1769 * postcopy_each_ram_send_discard: discard all RAMBlocks
1770 *
1771 * Returns 0 for success or negative for error
1772 *
e0b266f0
DDAG
1773 * Utility for the outgoing postcopy code.
1774 * Calls postcopy_send_discard_bm_ram for each RAMBlock
1775 * passing it bitmap indexes and name.
e0b266f0
DDAG
1776 * (qemu_ram_foreach_block ends up passing unscaled lengths
1777 * which would mean postcopy code would have to deal with target page)
3d0684b2
JQ
1778 *
1779 * @ms: current migration state
e0b266f0
DDAG
1780 */
1781static int postcopy_each_ram_send_discard(MigrationState *ms)
1782{
1783 struct RAMBlock *block;
1784 int ret;
1785
99e15582 1786 RAMBLOCK_FOREACH(block) {
6b6712ef
JQ
1787 PostcopyDiscardState *pds =
1788 postcopy_discard_send_init(ms, block->idstr);
e0b266f0
DDAG
1789
1790 /*
1791 * Postcopy sends chunks of bitmap over the wire, but it
1792 * just needs indexes at this point, avoids it having
1793 * target page specific code.
1794 */
6b6712ef 1795 ret = postcopy_send_discard_bm_ram(ms, pds, block);
e0b266f0
DDAG
1796 postcopy_discard_send_finish(ms, pds);
1797 if (ret) {
1798 return ret;
1799 }
1800 }
1801
1802 return 0;
1803}
1804
3d0684b2
JQ
1805/**
1806 * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1807 *
1808 * Helper for postcopy_chunk_hostpages; it's called twice to
1809 * canonicalize the two bitmaps, that are similar, but one is
1810 * inverted.
99e314eb 1811 *
3d0684b2
JQ
1812 * Postcopy requires that all target pages in a hostpage are dirty or
1813 * clean, not a mix. This function canonicalizes the bitmaps.
99e314eb 1814 *
3d0684b2
JQ
1815 * @ms: current migration state
1816 * @unsent_pass: if true we need to canonicalize partially unsent host pages
1817 * otherwise we need to canonicalize partially dirty host pages
1818 * @block: block that contains the page we want to canonicalize
1819 * @pds: state for postcopy
99e314eb
DDAG
1820 */
1821static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1822 RAMBlock *block,
1823 PostcopyDiscardState *pds)
1824{
53518d94 1825 RAMState *rs = ram_state;
6b6712ef
JQ
1826 unsigned long *bitmap = block->bmap;
1827 unsigned long *unsentmap = block->unsentmap;
29c59172 1828 unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
6b6712ef 1829 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
99e314eb
DDAG
1830 unsigned long run_start;
1831
29c59172
DDAG
1832 if (block->page_size == TARGET_PAGE_SIZE) {
1833 /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1834 return;
1835 }
1836
99e314eb
DDAG
1837 if (unsent_pass) {
1838 /* Find a sent page */
6b6712ef 1839 run_start = find_next_zero_bit(unsentmap, pages, 0);
99e314eb
DDAG
1840 } else {
1841 /* Find a dirty page */
6b6712ef 1842 run_start = find_next_bit(bitmap, pages, 0);
99e314eb
DDAG
1843 }
1844
6b6712ef 1845 while (run_start < pages) {
99e314eb
DDAG
1846 bool do_fixup = false;
1847 unsigned long fixup_start_addr;
1848 unsigned long host_offset;
1849
1850 /*
1851 * If the start of this run of pages is in the middle of a host
1852 * page, then we need to fixup this host page.
1853 */
1854 host_offset = run_start % host_ratio;
1855 if (host_offset) {
1856 do_fixup = true;
1857 run_start -= host_offset;
1858 fixup_start_addr = run_start;
1859 /* For the next pass */
1860 run_start = run_start + host_ratio;
1861 } else {
1862 /* Find the end of this run */
1863 unsigned long run_end;
1864 if (unsent_pass) {
6b6712ef 1865 run_end = find_next_bit(unsentmap, pages, run_start + 1);
99e314eb 1866 } else {
6b6712ef 1867 run_end = find_next_zero_bit(bitmap, pages, run_start + 1);
99e314eb
DDAG
1868 }
1869 /*
1870 * If the end isn't at the start of a host page, then the
1871 * run doesn't finish at the end of a host page
1872 * and we need to discard.
1873 */
1874 host_offset = run_end % host_ratio;
1875 if (host_offset) {
1876 do_fixup = true;
1877 fixup_start_addr = run_end - host_offset;
1878 /*
1879 * This host page has gone, the next loop iteration starts
1880 * from after the fixup
1881 */
1882 run_start = fixup_start_addr + host_ratio;
1883 } else {
1884 /*
1885 * No discards on this iteration, next loop starts from
1886 * next sent/dirty page
1887 */
1888 run_start = run_end + 1;
1889 }
1890 }
1891
1892 if (do_fixup) {
1893 unsigned long page;
1894
1895 /* Tell the destination to discard this page */
1896 if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1897 /* For the unsent_pass we:
1898 * discard partially sent pages
1899 * For the !unsent_pass (dirty) we:
1900 * discard partially dirty pages that were sent
1901 * (any partially sent pages were already discarded
1902 * by the previous unsent_pass)
1903 */
1904 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1905 host_ratio);
1906 }
1907
1908 /* Clean up the bitmap */
1909 for (page = fixup_start_addr;
1910 page < fixup_start_addr + host_ratio; page++) {
1911 /* All pages in this host page are now not sent */
1912 set_bit(page, unsentmap);
1913
1914 /*
1915 * Remark them as dirty, updating the count for any pages
1916 * that weren't previously dirty.
1917 */
0d8ec885 1918 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
99e314eb
DDAG
1919 }
1920 }
1921
1922 if (unsent_pass) {
1923 /* Find the next sent page for the next iteration */
6b6712ef 1924 run_start = find_next_zero_bit(unsentmap, pages, run_start);
99e314eb
DDAG
1925 } else {
1926 /* Find the next dirty page for the next iteration */
6b6712ef 1927 run_start = find_next_bit(bitmap, pages, run_start);
99e314eb
DDAG
1928 }
1929 }
1930}
1931
3d0684b2
JQ
1932/**
1933 * postcopy_chuck_hostpages: discrad any partially sent host page
1934 *
99e314eb
DDAG
1935 * Utility for the outgoing postcopy code.
1936 *
1937 * Discard any partially sent host-page size chunks, mark any partially
29c59172
DDAG
1938 * dirty host-page size chunks as all dirty. In this case the host-page
1939 * is the host-page for the particular RAMBlock, i.e. it might be a huge page
99e314eb 1940 *
3d0684b2
JQ
1941 * Returns zero on success
1942 *
1943 * @ms: current migration state
6b6712ef 1944 * @block: block we want to work with
99e314eb 1945 */
6b6712ef 1946static int postcopy_chunk_hostpages(MigrationState *ms, RAMBlock *block)
99e314eb 1947{
6b6712ef
JQ
1948 PostcopyDiscardState *pds =
1949 postcopy_discard_send_init(ms, block->idstr);
99e314eb 1950
6b6712ef
JQ
1951 /* First pass: Discard all partially sent host pages */
1952 postcopy_chunk_hostpages_pass(ms, true, block, pds);
1953 /*
1954 * Second pass: Ensure that all partially dirty host pages are made
1955 * fully dirty.
1956 */
1957 postcopy_chunk_hostpages_pass(ms, false, block, pds);
99e314eb 1958
6b6712ef 1959 postcopy_discard_send_finish(ms, pds);
99e314eb
DDAG
1960 return 0;
1961}
1962
3d0684b2
JQ
1963/**
1964 * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1965 *
1966 * Returns zero on success
1967 *
e0b266f0
DDAG
1968 * Transmit the set of pages to be discarded after precopy to the target
1969 * these are pages that:
1970 * a) Have been previously transmitted but are now dirty again
1971 * b) Pages that have never been transmitted, this ensures that
1972 * any pages on the destination that have been mapped by background
1973 * tasks get discarded (transparent huge pages is the specific concern)
1974 * Hopefully this is pretty sparse
3d0684b2
JQ
1975 *
1976 * @ms: current migration state
e0b266f0
DDAG
1977 */
1978int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1979{
53518d94 1980 RAMState *rs = ram_state;
6b6712ef 1981 RAMBlock *block;
e0b266f0 1982 int ret;
e0b266f0
DDAG
1983
1984 rcu_read_lock();
1985
1986 /* This should be our last sync, the src is now paused */
eb859c53 1987 migration_bitmap_sync(rs);
e0b266f0 1988
6b6712ef
JQ
1989 /* Easiest way to make sure we don't resume in the middle of a host-page */
1990 rs->last_seen_block = NULL;
1991 rs->last_sent_block = NULL;
1992 rs->last_page = 0;
e0b266f0 1993
6b6712ef
JQ
1994 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1995 unsigned long pages = block->used_length >> TARGET_PAGE_BITS;
1996 unsigned long *bitmap = block->bmap;
1997 unsigned long *unsentmap = block->unsentmap;
1998
1999 if (!unsentmap) {
2000 /* We don't have a safe way to resize the sentmap, so
2001 * if the bitmap was resized it will be NULL at this
2002 * point.
2003 */
2004 error_report("migration ram resized during precopy phase");
2005 rcu_read_unlock();
2006 return -EINVAL;
2007 }
2008 /* Deal with TPS != HPS and huge pages */
2009 ret = postcopy_chunk_hostpages(ms, block);
2010 if (ret) {
2011 rcu_read_unlock();
2012 return ret;
2013 }
e0b266f0 2014
6b6712ef
JQ
2015 /*
2016 * Update the unsentmap to be unsentmap = unsentmap | dirty
2017 */
2018 bitmap_or(unsentmap, unsentmap, bitmap, pages);
e0b266f0 2019#ifdef DEBUG_POSTCOPY
6b6712ef 2020 ram_debug_dump_bitmap(unsentmap, true, pages);
e0b266f0 2021#endif
6b6712ef
JQ
2022 }
2023 trace_ram_postcopy_send_discard_bitmap();
e0b266f0
DDAG
2024
2025 ret = postcopy_each_ram_send_discard(ms);
2026 rcu_read_unlock();
2027
2028 return ret;
2029}
2030
3d0684b2
JQ
2031/**
2032 * ram_discard_range: discard dirtied pages at the beginning of postcopy
e0b266f0 2033 *
3d0684b2 2034 * Returns zero on success
e0b266f0 2035 *
36449157
JQ
2036 * @rbname: name of the RAMBlock of the request. NULL means the
2037 * same that last one.
3d0684b2
JQ
2038 * @start: RAMBlock starting page
2039 * @length: RAMBlock size
e0b266f0 2040 */
aaa2064c 2041int ram_discard_range(const char *rbname, uint64_t start, size_t length)
e0b266f0
DDAG
2042{
2043 int ret = -1;
2044
36449157 2045 trace_ram_discard_range(rbname, start, length);
d3a5038c 2046
e0b266f0 2047 rcu_read_lock();
36449157 2048 RAMBlock *rb = qemu_ram_block_by_name(rbname);
e0b266f0
DDAG
2049
2050 if (!rb) {
36449157 2051 error_report("ram_discard_range: Failed to find block '%s'", rbname);
e0b266f0
DDAG
2052 goto err;
2053 }
2054
f9494614
AP
2055 bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(),
2056 length >> qemu_target_page_bits());
d3a5038c 2057 ret = ram_block_discard_range(rb, start, length);
e0b266f0
DDAG
2058
2059err:
2060 rcu_read_unlock();
2061
2062 return ret;
2063}
2064
84593a08
PX
2065/*
2066 * For every allocation, we will try not to crash the VM if the
2067 * allocation failed.
2068 */
2069static int xbzrle_init(void)
2070{
2071 Error *local_err = NULL;
2072
2073 if (!migrate_use_xbzrle()) {
2074 return 0;
2075 }
2076
2077 XBZRLE_cache_lock();
2078
2079 XBZRLE.zero_target_page = g_try_malloc0(TARGET_PAGE_SIZE);
2080 if (!XBZRLE.zero_target_page) {
2081 error_report("%s: Error allocating zero page", __func__);
2082 goto err_out;
2083 }
2084
2085 XBZRLE.cache = cache_init(migrate_xbzrle_cache_size(),
2086 TARGET_PAGE_SIZE, &local_err);
2087 if (!XBZRLE.cache) {
2088 error_report_err(local_err);
2089 goto free_zero_page;
2090 }
2091
2092 XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
2093 if (!XBZRLE.encoded_buf) {
2094 error_report("%s: Error allocating encoded_buf", __func__);
2095 goto free_cache;
2096 }
2097
2098 XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
2099 if (!XBZRLE.current_buf) {
2100 error_report("%s: Error allocating current_buf", __func__);
2101 goto free_encoded_buf;
2102 }
2103
2104 /* We are all good */
2105 XBZRLE_cache_unlock();
2106 return 0;
2107
2108free_encoded_buf:
2109 g_free(XBZRLE.encoded_buf);
2110 XBZRLE.encoded_buf = NULL;
2111free_cache:
2112 cache_fini(XBZRLE.cache);
2113 XBZRLE.cache = NULL;
2114free_zero_page:
2115 g_free(XBZRLE.zero_target_page);
2116 XBZRLE.zero_target_page = NULL;
2117err_out:
2118 XBZRLE_cache_unlock();
2119 return -ENOMEM;
2120}
2121
53518d94 2122static int ram_state_init(RAMState **rsp)
56e93d26 2123{
7d00ee6a
PX
2124 *rsp = g_try_new0(RAMState, 1);
2125
2126 if (!*rsp) {
2127 error_report("%s: Init ramstate fail", __func__);
2128 return -1;
2129 }
53518d94
JQ
2130
2131 qemu_mutex_init(&(*rsp)->bitmap_mutex);
2132 qemu_mutex_init(&(*rsp)->src_page_req_mutex);
2133 QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
56e93d26 2134
7d00ee6a
PX
2135 /*
2136 * Count the total number of pages used by ram blocks not including any
2137 * gaps due to alignment or unplugs.
2138 */
2139 (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2140
2141 ram_state_reset(*rsp);
2142
2143 return 0;
2144}
2145
d6eff5d7 2146static void ram_list_init_bitmaps(void)
7d00ee6a 2147{
d6eff5d7
PX
2148 RAMBlock *block;
2149 unsigned long pages;
56e93d26 2150
0827b9e9
AA
2151 /* Skip setting bitmap if there is no RAM */
2152 if (ram_bytes_total()) {
6b6712ef 2153 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
d6eff5d7 2154 pages = block->max_length >> TARGET_PAGE_BITS;
6b6712ef
JQ
2155 block->bmap = bitmap_new(pages);
2156 bitmap_set(block->bmap, 0, pages);
2157 if (migrate_postcopy_ram()) {
2158 block->unsentmap = bitmap_new(pages);
2159 bitmap_set(block->unsentmap, 0, pages);
2160 }
0827b9e9 2161 }
f3f491fc 2162 }
d6eff5d7
PX
2163}
2164
2165static void ram_init_bitmaps(RAMState *rs)
2166{
2167 /* For memory_global_dirty_log_start below. */
2168 qemu_mutex_lock_iothread();
2169 qemu_mutex_lock_ramlist();
2170 rcu_read_lock();
f3f491fc 2171
d6eff5d7 2172 ram_list_init_bitmaps();
56e93d26 2173 memory_global_dirty_log_start();
d6eff5d7
PX
2174 migration_bitmap_sync(rs);
2175
2176 rcu_read_unlock();
56e93d26 2177 qemu_mutex_unlock_ramlist();
49877834 2178 qemu_mutex_unlock_iothread();
d6eff5d7
PX
2179}
2180
2181static int ram_init_all(RAMState **rsp)
2182{
2183 if (ram_state_init(rsp)) {
2184 return -1;
2185 }
2186
2187 if (xbzrle_init()) {
2188 ram_state_cleanup(rsp);
2189 return -1;
2190 }
2191
2192 ram_init_bitmaps(*rsp);
a91246c9
HZ
2193
2194 return 0;
2195}
2196
3d0684b2
JQ
2197/*
2198 * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
a91246c9
HZ
2199 * long-running RCU critical section. When rcu-reclaims in the code
2200 * start to become numerous it will be necessary to reduce the
2201 * granularity of these critical sections.
2202 */
2203
3d0684b2
JQ
2204/**
2205 * ram_save_setup: Setup RAM for migration
2206 *
2207 * Returns zero to indicate success and negative for error
2208 *
2209 * @f: QEMUFile where to send the data
2210 * @opaque: RAMState pointer
2211 */
a91246c9
HZ
2212static int ram_save_setup(QEMUFile *f, void *opaque)
2213{
53518d94 2214 RAMState **rsp = opaque;
a91246c9
HZ
2215 RAMBlock *block;
2216
2217 /* migration has already setup the bitmap, reuse it. */
2218 if (!migration_in_colo_state()) {
7d00ee6a 2219 if (ram_init_all(rsp) != 0) {
a91246c9 2220 return -1;
53518d94 2221 }
a91246c9 2222 }
53518d94 2223 (*rsp)->f = f;
a91246c9
HZ
2224
2225 rcu_read_lock();
56e93d26
JQ
2226
2227 qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2228
99e15582 2229 RAMBLOCK_FOREACH(block) {
56e93d26
JQ
2230 qemu_put_byte(f, strlen(block->idstr));
2231 qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2232 qemu_put_be64(f, block->used_length);
ef08fb38
DDAG
2233 if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2234 qemu_put_be64(f, block->page_size);
2235 }
56e93d26
JQ
2236 }
2237
2238 rcu_read_unlock();
f0afa331 2239 compress_threads_save_setup();
56e93d26
JQ
2240
2241 ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2242 ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2243
2244 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2245
2246 return 0;
2247}
2248
3d0684b2
JQ
2249/**
2250 * ram_save_iterate: iterative stage for migration
2251 *
2252 * Returns zero to indicate success and negative for error
2253 *
2254 * @f: QEMUFile where to send the data
2255 * @opaque: RAMState pointer
2256 */
56e93d26
JQ
2257static int ram_save_iterate(QEMUFile *f, void *opaque)
2258{
53518d94
JQ
2259 RAMState **temp = opaque;
2260 RAMState *rs = *temp;
56e93d26
JQ
2261 int ret;
2262 int i;
2263 int64_t t0;
5c90308f 2264 int done = 0;
56e93d26 2265
b2557345
PL
2266 if (blk_mig_bulk_active()) {
2267 /* Avoid transferring ram during bulk phase of block migration as
2268 * the bulk phase will usually take a long time and transferring
2269 * ram updates during that time is pointless. */
2270 goto out;
2271 }
2272
56e93d26 2273 rcu_read_lock();
6f37bb8b
JQ
2274 if (ram_list.version != rs->last_version) {
2275 ram_state_reset(rs);
56e93d26
JQ
2276 }
2277
2278 /* Read version before ram_list.blocks */
2279 smp_rmb();
2280
2281 ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2282
2283 t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2284 i = 0;
2285 while ((ret = qemu_file_rate_limit(f)) == 0) {
2286 int pages;
2287
ce25d337 2288 pages = ram_find_and_save_block(rs, false);
56e93d26
JQ
2289 /* no more pages to sent */
2290 if (pages == 0) {
5c90308f 2291 done = 1;
56e93d26
JQ
2292 break;
2293 }
23b28c3c 2294 rs->iterations++;
070afca2 2295
56e93d26
JQ
2296 /* we want to check in the 1st loop, just in case it was the 1st time
2297 and we had to sync the dirty bitmap.
2298 qemu_get_clock_ns() is a bit expensive, so we only check each some
2299 iterations
2300 */
2301 if ((i & 63) == 0) {
2302 uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2303 if (t1 > MAX_WAIT) {
55c4446b 2304 trace_ram_save_iterate_big_wait(t1, i);
56e93d26
JQ
2305 break;
2306 }
2307 }
2308 i++;
2309 }
ce25d337 2310 flush_compressed_data(rs);
56e93d26
JQ
2311 rcu_read_unlock();
2312
2313 /*
2314 * Must occur before EOS (or any QEMUFile operation)
2315 * because of RDMA protocol.
2316 */
2317 ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2318
b2557345 2319out:
56e93d26 2320 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
9360447d 2321 ram_counters.transferred += 8;
56e93d26
JQ
2322
2323 ret = qemu_file_get_error(f);
2324 if (ret < 0) {
2325 return ret;
2326 }
2327
5c90308f 2328 return done;
56e93d26
JQ
2329}
2330
3d0684b2
JQ
2331/**
2332 * ram_save_complete: function called to send the remaining amount of ram
2333 *
2334 * Returns zero to indicate success
2335 *
2336 * Called with iothread lock
2337 *
2338 * @f: QEMUFile where to send the data
2339 * @opaque: RAMState pointer
2340 */
56e93d26
JQ
2341static int ram_save_complete(QEMUFile *f, void *opaque)
2342{
53518d94
JQ
2343 RAMState **temp = opaque;
2344 RAMState *rs = *temp;
6f37bb8b 2345
56e93d26
JQ
2346 rcu_read_lock();
2347
5727309d 2348 if (!migration_in_postcopy()) {
8d820d6f 2349 migration_bitmap_sync(rs);
663e6c1d 2350 }
56e93d26
JQ
2351
2352 ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2353
2354 /* try transferring iterative blocks of memory */
2355
2356 /* flush all remaining blocks regardless of rate limiting */
2357 while (true) {
2358 int pages;
2359
ce25d337 2360 pages = ram_find_and_save_block(rs, !migration_in_colo_state());
56e93d26
JQ
2361 /* no more blocks to sent */
2362 if (pages == 0) {
2363 break;
2364 }
2365 }
2366
ce25d337 2367 flush_compressed_data(rs);
56e93d26 2368 ram_control_after_iterate(f, RAM_CONTROL_FINISH);
56e93d26
JQ
2369
2370 rcu_read_unlock();
d09a6fde 2371
56e93d26
JQ
2372 qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2373
2374 return 0;
2375}
2376
c31b098f 2377static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
47995026
VSO
2378 uint64_t *res_precopy_only,
2379 uint64_t *res_compatible,
2380 uint64_t *res_postcopy_only)
56e93d26 2381{
53518d94
JQ
2382 RAMState **temp = opaque;
2383 RAMState *rs = *temp;
56e93d26
JQ
2384 uint64_t remaining_size;
2385
9edabd4d 2386 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2387
5727309d 2388 if (!migration_in_postcopy() &&
663e6c1d 2389 remaining_size < max_size) {
56e93d26
JQ
2390 qemu_mutex_lock_iothread();
2391 rcu_read_lock();
8d820d6f 2392 migration_bitmap_sync(rs);
56e93d26
JQ
2393 rcu_read_unlock();
2394 qemu_mutex_unlock_iothread();
9edabd4d 2395 remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
56e93d26 2396 }
c31b098f 2397
86e1167e
VSO
2398 if (migrate_postcopy_ram()) {
2399 /* We can do postcopy, and all the data is postcopiable */
47995026 2400 *res_compatible += remaining_size;
86e1167e 2401 } else {
47995026 2402 *res_precopy_only += remaining_size;
86e1167e 2403 }
56e93d26
JQ
2404}
2405
2406static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2407{
2408 unsigned int xh_len;
2409 int xh_flags;
063e760a 2410 uint8_t *loaded_data;
56e93d26 2411
56e93d26
JQ
2412 /* extract RLE header */
2413 xh_flags = qemu_get_byte(f);
2414 xh_len = qemu_get_be16(f);
2415
2416 if (xh_flags != ENCODING_FLAG_XBZRLE) {
2417 error_report("Failed to load XBZRLE page - wrong compression!");
2418 return -1;
2419 }
2420
2421 if (xh_len > TARGET_PAGE_SIZE) {
2422 error_report("Failed to load XBZRLE page - len overflow!");
2423 return -1;
2424 }
f265e0e4 2425 loaded_data = XBZRLE.decoded_buf;
56e93d26 2426 /* load data and decode */
f265e0e4 2427 /* it can change loaded_data to point to an internal buffer */
063e760a 2428 qemu_get_buffer_in_place(f, &loaded_data, xh_len);
56e93d26
JQ
2429
2430 /* decode RLE */
063e760a 2431 if (xbzrle_decode_buffer(loaded_data, xh_len, host,
56e93d26
JQ
2432 TARGET_PAGE_SIZE) == -1) {
2433 error_report("Failed to load XBZRLE page - decode error!");
2434 return -1;
2435 }
2436
2437 return 0;
2438}
2439
3d0684b2
JQ
2440/**
2441 * ram_block_from_stream: read a RAMBlock id from the migration stream
2442 *
2443 * Must be called from within a rcu critical section.
2444 *
56e93d26 2445 * Returns a pointer from within the RCU-protected ram_list.
a7180877 2446 *
3d0684b2
JQ
2447 * @f: QEMUFile where to read the data from
2448 * @flags: Page flags (mostly to see if it's a continuation of previous block)
a7180877 2449 */
3d0684b2 2450static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
56e93d26
JQ
2451{
2452 static RAMBlock *block = NULL;
2453 char id[256];
2454 uint8_t len;
2455
2456 if (flags & RAM_SAVE_FLAG_CONTINUE) {
4c4bad48 2457 if (!block) {
56e93d26
JQ
2458 error_report("Ack, bad migration stream!");
2459 return NULL;
2460 }
4c4bad48 2461 return block;
56e93d26
JQ
2462 }
2463
2464 len = qemu_get_byte(f);
2465 qemu_get_buffer(f, (uint8_t *)id, len);
2466 id[len] = 0;
2467
e3dd7493 2468 block = qemu_ram_block_by_name(id);
4c4bad48
HZ
2469 if (!block) {
2470 error_report("Can't find block %s", id);
2471 return NULL;
56e93d26
JQ
2472 }
2473
4c4bad48
HZ
2474 return block;
2475}
2476
2477static inline void *host_from_ram_block_offset(RAMBlock *block,
2478 ram_addr_t offset)
2479{
2480 if (!offset_in_ramblock(block, offset)) {
2481 return NULL;
2482 }
2483
2484 return block->host + offset;
56e93d26
JQ
2485}
2486
3d0684b2
JQ
2487/**
2488 * ram_handle_compressed: handle the zero page case
2489 *
56e93d26
JQ
2490 * If a page (or a whole RDMA chunk) has been
2491 * determined to be zero, then zap it.
3d0684b2
JQ
2492 *
2493 * @host: host address for the zero page
2494 * @ch: what the page is filled from. We only support zero
2495 * @size: size of the zero page
56e93d26
JQ
2496 */
2497void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2498{
2499 if (ch != 0 || !is_zero_range(host, size)) {
2500 memset(host, ch, size);
2501 }
2502}
2503
2504static void *do_data_decompress(void *opaque)
2505{
2506 DecompressParam *param = opaque;
2507 unsigned long pagesize;
33d151f4
LL
2508 uint8_t *des;
2509 int len;
56e93d26 2510
33d151f4 2511 qemu_mutex_lock(&param->mutex);
90e56fb4 2512 while (!param->quit) {
33d151f4
LL
2513 if (param->des) {
2514 des = param->des;
2515 len = param->len;
2516 param->des = 0;
2517 qemu_mutex_unlock(&param->mutex);
2518
56e93d26 2519 pagesize = TARGET_PAGE_SIZE;
73a8912b
LL
2520 /* uncompress() will return failed in some case, especially
2521 * when the page is dirted when doing the compression, it's
2522 * not a problem because the dirty page will be retransferred
2523 * and uncompress() won't break the data in other pages.
2524 */
33d151f4
LL
2525 uncompress((Bytef *)des, &pagesize,
2526 (const Bytef *)param->compbuf, len);
73a8912b 2527
33d151f4
LL
2528 qemu_mutex_lock(&decomp_done_lock);
2529 param->done = true;
2530 qemu_cond_signal(&decomp_done_cond);
2531 qemu_mutex_unlock(&decomp_done_lock);
2532
2533 qemu_mutex_lock(&param->mutex);
2534 } else {
2535 qemu_cond_wait(&param->cond, &param->mutex);
2536 }
56e93d26 2537 }
33d151f4 2538 qemu_mutex_unlock(&param->mutex);
56e93d26
JQ
2539
2540 return NULL;
2541}
2542
5533b2e9
LL
2543static void wait_for_decompress_done(void)
2544{
2545 int idx, thread_count;
2546
2547 if (!migrate_use_compression()) {
2548 return;
2549 }
2550
2551 thread_count = migrate_decompress_threads();
2552 qemu_mutex_lock(&decomp_done_lock);
2553 for (idx = 0; idx < thread_count; idx++) {
2554 while (!decomp_param[idx].done) {
2555 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2556 }
2557 }
2558 qemu_mutex_unlock(&decomp_done_lock);
2559}
2560
f0afa331 2561static void compress_threads_load_setup(void)
56e93d26
JQ
2562{
2563 int i, thread_count;
2564
3416ab5b
JQ
2565 if (!migrate_use_compression()) {
2566 return;
2567 }
56e93d26
JQ
2568 thread_count = migrate_decompress_threads();
2569 decompress_threads = g_new0(QemuThread, thread_count);
2570 decomp_param = g_new0(DecompressParam, thread_count);
73a8912b
LL
2571 qemu_mutex_init(&decomp_done_lock);
2572 qemu_cond_init(&decomp_done_cond);
56e93d26
JQ
2573 for (i = 0; i < thread_count; i++) {
2574 qemu_mutex_init(&decomp_param[i].mutex);
2575 qemu_cond_init(&decomp_param[i].cond);
2576 decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
73a8912b 2577 decomp_param[i].done = true;
90e56fb4 2578 decomp_param[i].quit = false;
56e93d26
JQ
2579 qemu_thread_create(decompress_threads + i, "decompress",
2580 do_data_decompress, decomp_param + i,
2581 QEMU_THREAD_JOINABLE);
2582 }
2583}
2584
f0afa331 2585static void compress_threads_load_cleanup(void)
56e93d26
JQ
2586{
2587 int i, thread_count;
2588
3416ab5b
JQ
2589 if (!migrate_use_compression()) {
2590 return;
2591 }
56e93d26
JQ
2592 thread_count = migrate_decompress_threads();
2593 for (i = 0; i < thread_count; i++) {
2594 qemu_mutex_lock(&decomp_param[i].mutex);
90e56fb4 2595 decomp_param[i].quit = true;
56e93d26
JQ
2596 qemu_cond_signal(&decomp_param[i].cond);
2597 qemu_mutex_unlock(&decomp_param[i].mutex);
2598 }
2599 for (i = 0; i < thread_count; i++) {
2600 qemu_thread_join(decompress_threads + i);
2601 qemu_mutex_destroy(&decomp_param[i].mutex);
2602 qemu_cond_destroy(&decomp_param[i].cond);
2603 g_free(decomp_param[i].compbuf);
2604 }
2605 g_free(decompress_threads);
2606 g_free(decomp_param);
56e93d26
JQ
2607 decompress_threads = NULL;
2608 decomp_param = NULL;
56e93d26
JQ
2609}
2610
c1bc6626 2611static void decompress_data_with_multi_threads(QEMUFile *f,
56e93d26
JQ
2612 void *host, int len)
2613{
2614 int idx, thread_count;
2615
2616 thread_count = migrate_decompress_threads();
73a8912b 2617 qemu_mutex_lock(&decomp_done_lock);
56e93d26
JQ
2618 while (true) {
2619 for (idx = 0; idx < thread_count; idx++) {
73a8912b 2620 if (decomp_param[idx].done) {
33d151f4
LL
2621 decomp_param[idx].done = false;
2622 qemu_mutex_lock(&decomp_param[idx].mutex);
c1bc6626 2623 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
56e93d26
JQ
2624 decomp_param[idx].des = host;
2625 decomp_param[idx].len = len;
33d151f4
LL
2626 qemu_cond_signal(&decomp_param[idx].cond);
2627 qemu_mutex_unlock(&decomp_param[idx].mutex);
56e93d26
JQ
2628 break;
2629 }
2630 }
2631 if (idx < thread_count) {
2632 break;
73a8912b
LL
2633 } else {
2634 qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
56e93d26
JQ
2635 }
2636 }
73a8912b 2637 qemu_mutex_unlock(&decomp_done_lock);
56e93d26
JQ
2638}
2639
f265e0e4
JQ
2640/**
2641 * ram_load_setup: Setup RAM for migration incoming side
2642 *
2643 * Returns zero to indicate success and negative for error
2644 *
2645 * @f: QEMUFile where to receive the data
2646 * @opaque: RAMState pointer
2647 */
2648static int ram_load_setup(QEMUFile *f, void *opaque)
2649{
2650 xbzrle_load_setup();
f0afa331 2651 compress_threads_load_setup();
f9494614 2652 ramblock_recv_map_init();
f265e0e4
JQ
2653 return 0;
2654}
2655
2656static int ram_load_cleanup(void *opaque)
2657{
f9494614 2658 RAMBlock *rb;
f265e0e4 2659 xbzrle_load_cleanup();
f0afa331 2660 compress_threads_load_cleanup();
f9494614
AP
2661
2662 RAMBLOCK_FOREACH(rb) {
2663 g_free(rb->receivedmap);
2664 rb->receivedmap = NULL;
2665 }
f265e0e4
JQ
2666 return 0;
2667}
2668
3d0684b2
JQ
2669/**
2670 * ram_postcopy_incoming_init: allocate postcopy data structures
2671 *
2672 * Returns 0 for success and negative if there was one error
2673 *
2674 * @mis: current migration incoming state
2675 *
2676 * Allocate data structures etc needed by incoming migration with
2677 * postcopy-ram. postcopy-ram's similarly names
2678 * postcopy_ram_incoming_init does the work.
1caddf8a
DDAG
2679 */
2680int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2681{
b8c48993 2682 unsigned long ram_pages = last_ram_page();
1caddf8a
DDAG
2683
2684 return postcopy_ram_incoming_init(mis, ram_pages);
2685}
2686
3d0684b2
JQ
2687/**
2688 * ram_load_postcopy: load a page in postcopy case
2689 *
2690 * Returns 0 for success or -errno in case of error
2691 *
a7180877
DDAG
2692 * Called in postcopy mode by ram_load().
2693 * rcu_read_lock is taken prior to this being called.
3d0684b2
JQ
2694 *
2695 * @f: QEMUFile where to send the data
a7180877
DDAG
2696 */
2697static int ram_load_postcopy(QEMUFile *f)
2698{
2699 int flags = 0, ret = 0;
2700 bool place_needed = false;
28abd200 2701 bool matching_page_sizes = false;
a7180877
DDAG
2702 MigrationIncomingState *mis = migration_incoming_get_current();
2703 /* Temporary page that is later 'placed' */
2704 void *postcopy_host_page = postcopy_get_tmp_page(mis);
c53b7ddc 2705 void *last_host = NULL;
a3b6ff6d 2706 bool all_zero = false;
a7180877
DDAG
2707
2708 while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2709 ram_addr_t addr;
2710 void *host = NULL;
2711 void *page_buffer = NULL;
2712 void *place_source = NULL;
df9ff5e1 2713 RAMBlock *block = NULL;
a7180877 2714 uint8_t ch;
a7180877
DDAG
2715
2716 addr = qemu_get_be64(f);
7a9ddfbf
PX
2717
2718 /*
2719 * If qemu file error, we should stop here, and then "addr"
2720 * may be invalid
2721 */
2722 ret = qemu_file_get_error(f);
2723 if (ret) {
2724 break;
2725 }
2726
a7180877
DDAG
2727 flags = addr & ~TARGET_PAGE_MASK;
2728 addr &= TARGET_PAGE_MASK;
2729
2730 trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2731 place_needed = false;
bb890ed5 2732 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE)) {
df9ff5e1 2733 block = ram_block_from_stream(f, flags);
4c4bad48
HZ
2734
2735 host = host_from_ram_block_offset(block, addr);
a7180877
DDAG
2736 if (!host) {
2737 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2738 ret = -EINVAL;
2739 break;
2740 }
28abd200 2741 matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
a7180877 2742 /*
28abd200
DDAG
2743 * Postcopy requires that we place whole host pages atomically;
2744 * these may be huge pages for RAMBlocks that are backed by
2745 * hugetlbfs.
a7180877
DDAG
2746 * To make it atomic, the data is read into a temporary page
2747 * that's moved into place later.
2748 * The migration protocol uses, possibly smaller, target-pages
2749 * however the source ensures it always sends all the components
2750 * of a host page in order.
2751 */
2752 page_buffer = postcopy_host_page +
28abd200 2753 ((uintptr_t)host & (block->page_size - 1));
a7180877 2754 /* If all TP are zero then we can optimise the place */
28abd200 2755 if (!((uintptr_t)host & (block->page_size - 1))) {
a7180877 2756 all_zero = true;
c53b7ddc
DDAG
2757 } else {
2758 /* not the 1st TP within the HP */
2759 if (host != (last_host + TARGET_PAGE_SIZE)) {
9af9e0fe 2760 error_report("Non-sequential target page %p/%p",
c53b7ddc
DDAG
2761 host, last_host);
2762 ret = -EINVAL;
2763 break;
2764 }
a7180877
DDAG
2765 }
2766
c53b7ddc 2767
a7180877
DDAG
2768 /*
2769 * If it's the last part of a host page then we place the host
2770 * page
2771 */
2772 place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
28abd200 2773 (block->page_size - 1)) == 0;
a7180877
DDAG
2774 place_source = postcopy_host_page;
2775 }
c53b7ddc 2776 last_host = host;
a7180877
DDAG
2777
2778 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
bb890ed5 2779 case RAM_SAVE_FLAG_ZERO:
a7180877
DDAG
2780 ch = qemu_get_byte(f);
2781 memset(page_buffer, ch, TARGET_PAGE_SIZE);
2782 if (ch) {
2783 all_zero = false;
2784 }
2785 break;
2786
2787 case RAM_SAVE_FLAG_PAGE:
2788 all_zero = false;
2789 if (!place_needed || !matching_page_sizes) {
2790 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2791 } else {
2792 /* Avoids the qemu_file copy during postcopy, which is
2793 * going to do a copy later; can only do it when we
2794 * do this read in one go (matching page sizes)
2795 */
2796 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2797 TARGET_PAGE_SIZE);
2798 }
2799 break;
2800 case RAM_SAVE_FLAG_EOS:
2801 /* normal exit */
2802 break;
2803 default:
2804 error_report("Unknown combination of migration flags: %#x"
2805 " (postcopy mode)", flags);
2806 ret = -EINVAL;
7a9ddfbf
PX
2807 break;
2808 }
2809
2810 /* Detect for any possible file errors */
2811 if (!ret && qemu_file_get_error(f)) {
2812 ret = qemu_file_get_error(f);
a7180877
DDAG
2813 }
2814
7a9ddfbf 2815 if (!ret && place_needed) {
a7180877 2816 /* This gets called at the last target page in the host page */
df9ff5e1
DDAG
2817 void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2818
a7180877 2819 if (all_zero) {
df9ff5e1 2820 ret = postcopy_place_page_zero(mis, place_dest,
8be4620b 2821 block);
a7180877 2822 } else {
df9ff5e1 2823 ret = postcopy_place_page(mis, place_dest,
8be4620b 2824 place_source, block);
a7180877
DDAG
2825 }
2826 }
a7180877
DDAG
2827 }
2828
2829 return ret;
2830}
2831
acab30b8
DHB
2832static bool postcopy_is_advised(void)
2833{
2834 PostcopyState ps = postcopy_state_get();
2835 return ps >= POSTCOPY_INCOMING_ADVISE && ps < POSTCOPY_INCOMING_END;
2836}
2837
2838static bool postcopy_is_running(void)
2839{
2840 PostcopyState ps = postcopy_state_get();
2841 return ps >= POSTCOPY_INCOMING_LISTENING && ps < POSTCOPY_INCOMING_END;
2842}
2843
56e93d26
JQ
2844static int ram_load(QEMUFile *f, void *opaque, int version_id)
2845{
edc60127 2846 int flags = 0, ret = 0, invalid_flags = 0;
56e93d26
JQ
2847 static uint64_t seq_iter;
2848 int len = 0;
a7180877
DDAG
2849 /*
2850 * If system is running in postcopy mode, page inserts to host memory must
2851 * be atomic
2852 */
acab30b8 2853 bool postcopy_running = postcopy_is_running();
ef08fb38 2854 /* ADVISE is earlier, it shows the source has the postcopy capability on */
acab30b8 2855 bool postcopy_advised = postcopy_is_advised();
56e93d26
JQ
2856
2857 seq_iter++;
2858
2859 if (version_id != 4) {
2860 ret = -EINVAL;
2861 }
2862
edc60127
JQ
2863 if (!migrate_use_compression()) {
2864 invalid_flags |= RAM_SAVE_FLAG_COMPRESS_PAGE;
2865 }
56e93d26
JQ
2866 /* This RCU critical section can be very long running.
2867 * When RCU reclaims in the code start to become numerous,
2868 * it will be necessary to reduce the granularity of this
2869 * critical section.
2870 */
2871 rcu_read_lock();
a7180877
DDAG
2872
2873 if (postcopy_running) {
2874 ret = ram_load_postcopy(f);
2875 }
2876
2877 while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
56e93d26 2878 ram_addr_t addr, total_ram_bytes;
a776aa15 2879 void *host = NULL;
56e93d26
JQ
2880 uint8_t ch;
2881
2882 addr = qemu_get_be64(f);
2883 flags = addr & ~TARGET_PAGE_MASK;
2884 addr &= TARGET_PAGE_MASK;
2885
edc60127
JQ
2886 if (flags & invalid_flags) {
2887 if (flags & invalid_flags & RAM_SAVE_FLAG_COMPRESS_PAGE) {
2888 error_report("Received an unexpected compressed page");
2889 }
2890
2891 ret = -EINVAL;
2892 break;
2893 }
2894
bb890ed5 2895 if (flags & (RAM_SAVE_FLAG_ZERO | RAM_SAVE_FLAG_PAGE |
a776aa15 2896 RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
4c4bad48
HZ
2897 RAMBlock *block = ram_block_from_stream(f, flags);
2898
2899 host = host_from_ram_block_offset(block, addr);
a776aa15
DDAG
2900 if (!host) {
2901 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2902 ret = -EINVAL;
2903 break;
2904 }
f9494614 2905 ramblock_recv_bitmap_set(block, host);
1db9d8e5 2906 trace_ram_load_loop(block->idstr, (uint64_t)addr, flags, host);
a776aa15
DDAG
2907 }
2908
56e93d26
JQ
2909 switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2910 case RAM_SAVE_FLAG_MEM_SIZE:
2911 /* Synchronize RAM block list */
2912 total_ram_bytes = addr;
2913 while (!ret && total_ram_bytes) {
2914 RAMBlock *block;
56e93d26
JQ
2915 char id[256];
2916 ram_addr_t length;
2917
2918 len = qemu_get_byte(f);
2919 qemu_get_buffer(f, (uint8_t *)id, len);
2920 id[len] = 0;
2921 length = qemu_get_be64(f);
2922
e3dd7493
DDAG
2923 block = qemu_ram_block_by_name(id);
2924 if (block) {
2925 if (length != block->used_length) {
2926 Error *local_err = NULL;
56e93d26 2927
fa53a0e5 2928 ret = qemu_ram_resize(block, length,
e3dd7493
DDAG
2929 &local_err);
2930 if (local_err) {
2931 error_report_err(local_err);
56e93d26 2932 }
56e93d26 2933 }
ef08fb38
DDAG
2934 /* For postcopy we need to check hugepage sizes match */
2935 if (postcopy_advised &&
2936 block->page_size != qemu_host_page_size) {
2937 uint64_t remote_page_size = qemu_get_be64(f);
2938 if (remote_page_size != block->page_size) {
2939 error_report("Mismatched RAM page size %s "
2940 "(local) %zd != %" PRId64,
2941 id, block->page_size,
2942 remote_page_size);
2943 ret = -EINVAL;
2944 }
2945 }
e3dd7493
DDAG
2946 ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2947 block->idstr);
2948 } else {
56e93d26
JQ
2949 error_report("Unknown ramblock \"%s\", cannot "
2950 "accept migration", id);
2951 ret = -EINVAL;
2952 }
2953
2954 total_ram_bytes -= length;
2955 }
2956 break;
a776aa15 2957
bb890ed5 2958 case RAM_SAVE_FLAG_ZERO:
56e93d26
JQ
2959 ch = qemu_get_byte(f);
2960 ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2961 break;
a776aa15 2962
56e93d26 2963 case RAM_SAVE_FLAG_PAGE:
56e93d26
JQ
2964 qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2965 break;
56e93d26 2966
a776aa15 2967 case RAM_SAVE_FLAG_COMPRESS_PAGE:
56e93d26
JQ
2968 len = qemu_get_be32(f);
2969 if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2970 error_report("Invalid compressed data length: %d", len);
2971 ret = -EINVAL;
2972 break;
2973 }
c1bc6626 2974 decompress_data_with_multi_threads(f, host, len);
56e93d26 2975 break;
a776aa15 2976
56e93d26 2977 case RAM_SAVE_FLAG_XBZRLE:
56e93d26
JQ
2978 if (load_xbzrle(f, addr, host) < 0) {
2979 error_report("Failed to decompress XBZRLE page at "
2980 RAM_ADDR_FMT, addr);
2981 ret = -EINVAL;
2982 break;
2983 }
2984 break;
2985 case RAM_SAVE_FLAG_EOS:
2986 /* normal exit */
2987 break;
2988 default:
2989 if (flags & RAM_SAVE_FLAG_HOOK) {
632e3a5c 2990 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
56e93d26
JQ
2991 } else {
2992 error_report("Unknown combination of migration flags: %#x",
2993 flags);
2994 ret = -EINVAL;
2995 }
2996 }
2997 if (!ret) {
2998 ret = qemu_file_get_error(f);
2999 }
3000 }
3001
5533b2e9 3002 wait_for_decompress_done();
56e93d26 3003 rcu_read_unlock();
55c4446b 3004 trace_ram_load_complete(ret, seq_iter);
56e93d26
JQ
3005 return ret;
3006}
3007
c6467627
VSO
3008static bool ram_has_postcopy(void *opaque)
3009{
3010 return migrate_postcopy_ram();
3011}
3012
56e93d26 3013static SaveVMHandlers savevm_ram_handlers = {
9907e842 3014 .save_setup = ram_save_setup,
56e93d26 3015 .save_live_iterate = ram_save_iterate,
763c906b 3016 .save_live_complete_postcopy = ram_save_complete,
a3e06c3d 3017 .save_live_complete_precopy = ram_save_complete,
c6467627 3018 .has_postcopy = ram_has_postcopy,
56e93d26
JQ
3019 .save_live_pending = ram_save_pending,
3020 .load_state = ram_load,
f265e0e4
JQ
3021 .save_cleanup = ram_save_cleanup,
3022 .load_setup = ram_load_setup,
3023 .load_cleanup = ram_load_cleanup,
56e93d26
JQ
3024};
3025
3026void ram_mig_init(void)
3027{
3028 qemu_mutex_init(&XBZRLE.lock);
6f37bb8b 3029 register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
56e93d26 3030}
This page took 0.661993 seconds and 4 git commands to generate.