migration/ram.c

   1 /*
   2  * QEMU System Emulator
   3  *
   4  * Copyright (c) 2003-2008 Fabrice Bellard
   5  * Copyright (c) 2011-2015 Red Hat Inc
   6  *
   7  * Authors:
   8  *  Juan Quintela <[email protected]>
   9  *
  10  * Permission is hereby granted, free of charge, to any person obtaining a copy
  11  * of this software and associated documentation files (the "Software"), to deal
  12  * in the Software without restriction, including without limitation the rights
  13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  14  * copies of the Software, and to permit persons to whom the Software is
  15  * furnished to do so, subject to the following conditions:
  16  *
  17  * The above copyright notice and this permission notice shall be included in
  18  * all copies or substantial portions of the Software.
  19  *
  20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  25  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  26  * THE SOFTWARE.
  27  */
  28 #include "qemu/osdep.h"
  29 #include "qemu-common.h"
  30 #include "cpu.h"
  31 #include <zlib.h>
  32 #include "qapi-event.h"
  33 #include "qemu/cutils.h"
  34 #include "qemu/bitops.h"
  35 #include "qemu/bitmap.h"
  36 #include "qemu/timer.h"
  37 #include "qemu/main-loop.h"
  38 #include "migration/migration.h"
  39 #include "migration/postcopy-ram.h"
  40 #include "exec/address-spaces.h"
  41 #include "migration/page_cache.h"
  42 #include "qemu/error-report.h"
  43 #include "trace.h"
  44 #include "exec/ram_addr.h"
  45 #include "qemu/rcu_queue.h"
  46 #include "migration/colo.h"
  47
  48 /***********************************************************/
  49 /* ram save/restore */
  50
  51 #define RAM_SAVE_FLAG_FULL     0x01 /* Obsolete, not used anymore */
  52 #define RAM_SAVE_FLAG_COMPRESS 0x02
  53 #define RAM_SAVE_FLAG_MEM_SIZE 0x04
  54 #define RAM_SAVE_FLAG_PAGE     0x08
  55 #define RAM_SAVE_FLAG_EOS      0x10
  56 #define RAM_SAVE_FLAG_CONTINUE 0x20
  57 #define RAM_SAVE_FLAG_XBZRLE   0x40
  58 /* 0x80 is reserved in migration.h start with 0x100 next */
  59 #define RAM_SAVE_FLAG_COMPRESS_PAGE    0x100
  60
  61 static uint8_t *ZERO_TARGET_PAGE;
  62
  63 static inline bool is_zero_range(uint8_t *p, uint64_t size)
  64 {
  65     return buffer_is_zero(p, size);
  66 }
  67
  68 /* struct contains XBZRLE cache and a static page
  69    used by the compression */
  70 static struct {
  71     /* buffer used for XBZRLE encoding */
  72     uint8_t *encoded_buf;
  73     /* buffer for storing page content */
  74     uint8_t *current_buf;
  75     /* Cache for XBZRLE, Protected by lock. */
  76     PageCache *cache;
  77     QemuMutex lock;
  78 } XBZRLE;
  79
  80 /* buffer used for XBZRLE decoding */
  81 static uint8_t *xbzrle_decoded_buf;
  82
  83 static void XBZRLE_cache_lock(void)
  84 {
  85     if (migrate_use_xbzrle())
  86         qemu_mutex_lock(&XBZRLE.lock);
  87 }
  88
  89 static void XBZRLE_cache_unlock(void)
  90 {
  91     if (migrate_use_xbzrle())
  92         qemu_mutex_unlock(&XBZRLE.lock);
  93 }
  94
  95 /**
  96  * xbzrle_cache_resize: resize the xbzrle cache
  97  *
  98  * This function is called from qmp_migrate_set_cache_size in main
  99  * thread, possibly while a migration is in progress.  A running
 100  * migration may be using the cache and might finish during this call,
 101  * hence changes to the cache are protected by XBZRLE.lock().
 102  *
 103  * Returns the new_size or negative in case of error.
 104  *
 105  * @new_size: new cache size
 106  */
 107 int64_t xbzrle_cache_resize(int64_t new_size)
 108 {
 109     PageCache *new_cache;
 110     int64_t ret;
 111
 112     if (new_size < TARGET_PAGE_SIZE) {
 113         return -1;
 114     }
 115
 116     XBZRLE_cache_lock();
 117
 118     if (XBZRLE.cache != NULL) {
 119         if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
 120             goto out_new_size;
 121         }
 122         new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
 123                                         TARGET_PAGE_SIZE);
 124         if (!new_cache) {
 125             error_report("Error creating cache");
 126             ret = -1;
 127             goto out;
 128         }
 129
 130         cache_fini(XBZRLE.cache);
 131         XBZRLE.cache = new_cache;
 132     }
 133
 134 out_new_size:
 135     ret = pow2floor(new_size);
 136 out:
 137     XBZRLE_cache_unlock();
 138     return ret;
 139 }
 140
 141 struct RAMBitmap {
 142     struct rcu_head rcu;
 143     /* Main migration bitmap */
 144     unsigned long *bmap;
 145     /* bitmap of pages that haven't been sent even once
 146      * only maintained and used in postcopy at the moment
 147      * where it's used to send the dirtymap at the start
 148      * of the postcopy phase
 149      */
 150     unsigned long *unsentmap;
 151 };
 152 typedef struct RAMBitmap RAMBitmap;
 153
 154 /*
 155  * An outstanding page request, on the source, having been received
 156  * and queued
 157  */
 158 struct RAMSrcPageRequest {
 159     RAMBlock *rb;
 160     hwaddr    offset;
 161     hwaddr    len;
 162
 163     QSIMPLEQ_ENTRY(RAMSrcPageRequest) next_req;
 164 };
 165
 166 /* State of RAM for migration */
 167 struct RAMState {
 168     /* QEMUFile used for this migration */
 169     QEMUFile *f;
 170     /* Last block that we have visited searching for dirty pages */
 171     RAMBlock *last_seen_block;
 172     /* Last block from where we have sent data */
 173     RAMBlock *last_sent_block;
 174     /* Last dirty target page we have sent */
 175     ram_addr_t last_page;
 176     /* last ram version we have seen */
 177     uint32_t last_version;
 178     /* We are in the first round */
 179     bool ram_bulk_stage;
 180     /* How many times we have dirty too many pages */
 181     int dirty_rate_high_cnt;
 182     /* How many times we have synchronized the bitmap */
 183     uint64_t bitmap_sync_count;
 184     /* these variables are used for bitmap sync */
 185     /* last time we did a full bitmap_sync */
 186     int64_t time_last_bitmap_sync;
 187     /* bytes transferred at start_time */
 188     uint64_t bytes_xfer_prev;
 189     /* number of dirty pages since start_time */
 190     uint64_t num_dirty_pages_period;
 191     /* xbzrle misses since the beginning of the period */
 192     uint64_t xbzrle_cache_miss_prev;
 193     /* number of iterations at the beginning of period */
 194     uint64_t iterations_prev;
 195     /* Accounting fields */
 196     /* number of zero pages.  It used to be pages filled by the same char. */
 197     uint64_t zero_pages;
 198     /* number of normal transferred pages */
 199     uint64_t norm_pages;
 200     /* Iterations since start */
 201     uint64_t iterations;
 202     /* xbzrle transmitted bytes.  Notice that this is with
 203      * compression, they can't be calculated from the pages */
 204     uint64_t xbzrle_bytes;
 205     /* xbzrle transmmited pages */
 206     uint64_t xbzrle_pages;
 207     /* xbzrle number of cache miss */
 208     uint64_t xbzrle_cache_miss;
 209     /* xbzrle miss rate */
 210     double xbzrle_cache_miss_rate;
 211     /* xbzrle number of overflows */
 212     uint64_t xbzrle_overflows;
 213     /* number of dirty bits in the bitmap */
 214     uint64_t migration_dirty_pages;
 215     /* total number of bytes transferred */
 216     uint64_t bytes_transferred;
 217     /* number of dirtied pages in the last second */
 218     uint64_t dirty_pages_rate;
 219     /* Count of requests incoming from destination */
 220     uint64_t postcopy_requests;
 221     /* protects modification of the bitmap */
 222     QemuMutex bitmap_mutex;
 223     /* Ram Bitmap protected by RCU */
 224     RAMBitmap *ram_bitmap;
 225     /* The RAMBlock used in the last src_page_requests */
 226     RAMBlock *last_req_rb;
 227     /* Queue of outstanding page requests from the destination */
 228     QemuMutex src_page_req_mutex;
 229     QSIMPLEQ_HEAD(src_page_requests, RAMSrcPageRequest) src_page_requests;
 230 };
 231 typedef struct RAMState RAMState;
 232
 233 static RAMState ram_state;
 234
 235 uint64_t dup_mig_pages_transferred(void)
 236 {
 237     return ram_state.zero_pages;
 238 }
 239
 240 uint64_t norm_mig_pages_transferred(void)
 241 {
 242     return ram_state.norm_pages;
 243 }
 244
 245 uint64_t xbzrle_mig_bytes_transferred(void)
 246 {
 247     return ram_state.xbzrle_bytes;
 248 }
 249
 250 uint64_t xbzrle_mig_pages_transferred(void)
 251 {
 252     return ram_state.xbzrle_pages;
 253 }
 254
 255 uint64_t xbzrle_mig_pages_cache_miss(void)
 256 {
 257     return ram_state.xbzrle_cache_miss;
 258 }
 259
 260 double xbzrle_mig_cache_miss_rate(void)
 261 {
 262     return ram_state.xbzrle_cache_miss_rate;
 263 }
 264
 265 uint64_t xbzrle_mig_pages_overflow(void)
 266 {
 267     return ram_state.xbzrle_overflows;
 268 }
 269
 270 uint64_t ram_bytes_transferred(void)
 271 {
 272     return ram_state.bytes_transferred;
 273 }
 274
 275 uint64_t ram_bytes_remaining(void)
 276 {
 277     return ram_state.migration_dirty_pages * TARGET_PAGE_SIZE;
 278 }
 279
 280 uint64_t ram_dirty_sync_count(void)
 281 {
 282     return ram_state.bitmap_sync_count;
 283 }
 284
 285 uint64_t ram_dirty_pages_rate(void)
 286 {
 287     return ram_state.dirty_pages_rate;
 288 }
 289
 290 uint64_t ram_postcopy_requests(void)
 291 {
 292     return ram_state.postcopy_requests;
 293 }
 294
 295 /* used by the search for pages to send */
 296 struct PageSearchStatus {
 297     /* Current block being searched */
 298     RAMBlock    *block;
 299     /* Current page to search from */
 300     unsigned long page;
 301     /* Set once we wrap around */
 302     bool         complete_round;
 303 };
 304 typedef struct PageSearchStatus PageSearchStatus;
 305
 306 struct CompressParam {
 307     bool done;
 308     bool quit;
 309     QEMUFile *file;
 310     QemuMutex mutex;
 311     QemuCond cond;
 312     RAMBlock *block;
 313     ram_addr_t offset;
 314 };
 315 typedef struct CompressParam CompressParam;
 316
 317 struct DecompressParam {
 318     bool done;
 319     bool quit;
 320     QemuMutex mutex;
 321     QemuCond cond;
 322     void *des;
 323     uint8_t *compbuf;
 324     int len;
 325 };
 326 typedef struct DecompressParam DecompressParam;
 327
 328 static CompressParam *comp_param;
 329 static QemuThread *compress_threads;
 330 /* comp_done_cond is used to wake up the migration thread when
 331  * one of the compression threads has finished the compression.
 332  * comp_done_lock is used to co-work with comp_done_cond.
 333  */
 334 static QemuMutex comp_done_lock;
 335 static QemuCond comp_done_cond;
 336 /* The empty QEMUFileOps will be used by file in CompressParam */
 337 static const QEMUFileOps empty_ops = { };
 338
 339 static DecompressParam *decomp_param;
 340 static QemuThread *decompress_threads;
 341 static QemuMutex decomp_done_lock;
 342 static QemuCond decomp_done_cond;
 343
 344 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 345                                 ram_addr_t offset);
 346
 347 static void *do_data_compress(void *opaque)
 348 {
 349     CompressParam *param = opaque;
 350     RAMBlock *block;
 351     ram_addr_t offset;
 352
 353     qemu_mutex_lock(&param->mutex);
 354     while (!param->quit) {
 355         if (param->block) {
 356             block = param->block;
 357             offset = param->offset;
 358             param->block = NULL;
 359             qemu_mutex_unlock(&param->mutex);
 360
 361             do_compress_ram_page(param->file, block, offset);
 362
 363             qemu_mutex_lock(&comp_done_lock);
 364             param->done = true;
 365             qemu_cond_signal(&comp_done_cond);
 366             qemu_mutex_unlock(&comp_done_lock);
 367
 368             qemu_mutex_lock(&param->mutex);
 369         } else {
 370             qemu_cond_wait(&param->cond, &param->mutex);
 371         }
 372     }
 373     qemu_mutex_unlock(&param->mutex);
 374
 375     return NULL;
 376 }
 377
 378 static inline void terminate_compression_threads(void)
 379 {
 380     int idx, thread_count;
 381
 382     thread_count = migrate_compress_threads();
 383
 384     for (idx = 0; idx < thread_count; idx++) {
 385         qemu_mutex_lock(&comp_param[idx].mutex);
 386         comp_param[idx].quit = true;
 387         qemu_cond_signal(&comp_param[idx].cond);
 388         qemu_mutex_unlock(&comp_param[idx].mutex);
 389     }
 390 }
 391
 392 void migrate_compress_threads_join(void)
 393 {
 394     int i, thread_count;
 395
 396     if (!migrate_use_compression()) {
 397         return;
 398     }
 399     terminate_compression_threads();
 400     thread_count = migrate_compress_threads();
 401     for (i = 0; i < thread_count; i++) {
 402         qemu_thread_join(compress_threads + i);
 403         qemu_fclose(comp_param[i].file);
 404         qemu_mutex_destroy(&comp_param[i].mutex);
 405         qemu_cond_destroy(&comp_param[i].cond);
 406     }
 407     qemu_mutex_destroy(&comp_done_lock);
 408     qemu_cond_destroy(&comp_done_cond);
 409     g_free(compress_threads);
 410     g_free(comp_param);
 411     compress_threads = NULL;
 412     comp_param = NULL;
 413 }
 414
 415 void migrate_compress_threads_create(void)
 416 {
 417     int i, thread_count;
 418
 419     if (!migrate_use_compression()) {
 420         return;
 421     }
 422     thread_count = migrate_compress_threads();
 423     compress_threads = g_new0(QemuThread, thread_count);
 424     comp_param = g_new0(CompressParam, thread_count);
 425     qemu_cond_init(&comp_done_cond);
 426     qemu_mutex_init(&comp_done_lock);
 427     for (i = 0; i < thread_count; i++) {
 428         /* comp_param[i].file is just used as a dummy buffer to save data,
 429          * set its ops to empty.
 430          */
 431         comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops);
 432         comp_param[i].done = true;
 433         comp_param[i].quit = false;
 434         qemu_mutex_init(&comp_param[i].mutex);
 435         qemu_cond_init(&comp_param[i].cond);
 436         qemu_thread_create(compress_threads + i, "compress",
 437                            do_data_compress, comp_param + i,
 438                            QEMU_THREAD_JOINABLE);
 439     }
 440 }
 441
 442 /**
 443  * save_page_header: write page header to wire
 444  *
 445  * If this is the 1st block, it also writes the block identification
 446  *
 447  * Returns the number of bytes written
 448  *
 449  * @f: QEMUFile where to send the data
 450  * @block: block that contains the page we want to send
 451  * @offset: offset inside the block for the page
 452  *          in the lower bits, it contains flags
 453  */
 454 static size_t save_page_header(RAMState *rs, RAMBlock *block, ram_addr_t offset)
 455 {
 456     size_t size, len;
 457
 458     if (block == rs->last_sent_block) {
 459         offset |= RAM_SAVE_FLAG_CONTINUE;
 460     }
 461     qemu_put_be64(rs->f, offset);
 462     size = 8;
 463
 464     if (!(offset & RAM_SAVE_FLAG_CONTINUE)) {
 465         len = strlen(block->idstr);
 466         qemu_put_byte(rs->f, len);
 467         qemu_put_buffer(rs->f, (uint8_t *)block->idstr, len);
 468         size += 1 + len;
 469         rs->last_sent_block = block;
 470     }
 471     return size;
 472 }
 473
 474 /**
 475  * mig_throttle_guest_down: throotle down the guest
 476  *
 477  * Reduce amount of guest cpu execution to hopefully slow down memory
 478  * writes. If guest dirty memory rate is reduced below the rate at
 479  * which we can transfer pages to the destination then we should be
 480  * able to complete migration. Some workloads dirty memory way too
 481  * fast and will not effectively converge, even with auto-converge.
 482  */
 483 static void mig_throttle_guest_down(void)
 484 {
 485     MigrationState *s = migrate_get_current();
 486     uint64_t pct_initial = s->parameters.cpu_throttle_initial;
 487     uint64_t pct_icrement = s->parameters.cpu_throttle_increment;
 488
 489     /* We have not started throttling yet. Let's start it. */
 490     if (!cpu_throttle_active()) {
 491         cpu_throttle_set(pct_initial);
 492     } else {
 493         /* Throttling already on, just increase the rate */
 494         cpu_throttle_set(cpu_throttle_get_percentage() + pct_icrement);
 495     }
 496 }
 497
 498 /**
 499  * xbzrle_cache_zero_page: insert a zero page in the XBZRLE cache
 500  *
 501  * @rs: current RAM state
 502  * @current_addr: address for the zero page
 503  *
 504  * Update the xbzrle cache to reflect a page that's been sent as all 0.
 505  * The important thing is that a stale (not-yet-0'd) page be replaced
 506  * by the new data.
 507  * As a bonus, if the page wasn't in the cache it gets added so that
 508  * when a small write is made into the 0'd page it gets XBZRLE sent.
 509  */
 510 static void xbzrle_cache_zero_page(RAMState *rs, ram_addr_t current_addr)
 511 {
 512     if (rs->ram_bulk_stage || !migrate_use_xbzrle()) {
 513         return;
 514     }
 515
 516     /* We don't care if this fails to allocate a new cache page
 517      * as long as it updated an old one */
 518     cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE,
 519                  rs->bitmap_sync_count);
 520 }
 521
 522 #define ENCODING_FLAG_XBZRLE 0x1
 523
 524 /**
 525  * save_xbzrle_page: compress and send current page
 526  *
 527  * Returns: 1 means that we wrote the page
 528  *          0 means that page is identical to the one already sent
 529  *          -1 means that xbzrle would be longer than normal
 530  *
 531  * @rs: current RAM state
 532  * @current_data: pointer to the address of the page contents
 533  * @current_addr: addr of the page
 534  * @block: block that contains the page we want to send
 535  * @offset: offset inside the block for the page
 536  * @last_stage: if we are at the completion stage
 537  */
 538 static int save_xbzrle_page(RAMState *rs, uint8_t **current_data,
 539                             ram_addr_t current_addr, RAMBlock *block,
 540                             ram_addr_t offset, bool last_stage)
 541 {
 542     int encoded_len = 0, bytes_xbzrle;
 543     uint8_t *prev_cached_page;
 544
 545     if (!cache_is_cached(XBZRLE.cache, current_addr, rs->bitmap_sync_count)) {
 546         rs->xbzrle_cache_miss++;
 547         if (!last_stage) {
 548             if (cache_insert(XBZRLE.cache, current_addr, *current_data,
 549                              rs->bitmap_sync_count) == -1) {
 550                 return -1;
 551             } else {
 552                 /* update *current_data when the page has been
 553                    inserted into cache */
 554                 *current_data = get_cached_data(XBZRLE.cache, current_addr);
 555             }
 556         }
 557         return -1;
 558     }
 559
 560     prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
 561
 562     /* save current buffer into memory */
 563     memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
 564
 565     /* XBZRLE encoding (if there is no overflow) */
 566     encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
 567                                        TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
 568                                        TARGET_PAGE_SIZE);
 569     if (encoded_len == 0) {
 570         trace_save_xbzrle_page_skipping();
 571         return 0;
 572     } else if (encoded_len == -1) {
 573         trace_save_xbzrle_page_overflow();
 574         rs->xbzrle_overflows++;
 575         /* update data in the cache */
 576         if (!last_stage) {
 577             memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
 578             *current_data = prev_cached_page;
 579         }
 580         return -1;
 581     }
 582
 583     /* we need to update the data in the cache, in order to get the same data */
 584     if (!last_stage) {
 585         memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
 586     }
 587
 588     /* Send XBZRLE based compressed page */
 589     bytes_xbzrle = save_page_header(rs, block,
 590                                     offset | RAM_SAVE_FLAG_XBZRLE);
 591     qemu_put_byte(rs->f, ENCODING_FLAG_XBZRLE);
 592     qemu_put_be16(rs->f, encoded_len);
 593     qemu_put_buffer(rs->f, XBZRLE.encoded_buf, encoded_len);
 594     bytes_xbzrle += encoded_len + 1 + 2;
 595     rs->xbzrle_pages++;
 596     rs->xbzrle_bytes += bytes_xbzrle;
 597     rs->bytes_transferred += bytes_xbzrle;
 598
 599     return 1;
 600 }
 601
 602 /**
 603  * migration_bitmap_find_dirty: find the next dirty page from start
 604  *
 605  * Called with rcu_read_lock() to protect migration_bitmap
 606  *
 607  * Returns the byte offset within memory region of the start of a dirty page
 608  *
 609  * @rs: current RAM state
 610  * @rb: RAMBlock where to search for dirty pages
 611  * @start: page where we start the search
 612  * @page_abs: pointer into where to store the dirty page
 613  */
 614 static inline
 615 unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
 616                                           unsigned long start,
 617                                           unsigned long *page_abs)
 618 {
 619     unsigned long base = rb->offset >> TARGET_PAGE_BITS;
 620     unsigned long nr = base + start;
 621     uint64_t rb_size = rb->used_length;
 622     unsigned long size = base + (rb_size >> TARGET_PAGE_BITS);
 623     unsigned long *bitmap;
 624
 625     unsigned long next;
 626
 627     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 628     if (rs->ram_bulk_stage && nr > base) {
 629         next = nr + 1;
 630     } else {
 631         next = find_next_bit(bitmap, size, nr);
 632     }
 633
 634     *page_abs = next;
 635     return next - base;
 636 }
 637
 638 static inline bool migration_bitmap_clear_dirty(RAMState *rs,
 639                                                 unsigned long page_abs)
 640 {
 641     bool ret;
 642     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 643
 644     ret = test_and_clear_bit(page_abs, bitmap);
 645
 646     if (ret) {
 647         rs->migration_dirty_pages--;
 648     }
 649     return ret;
 650 }
 651
 652 static void migration_bitmap_sync_range(RAMState *rs, RAMBlock *rb,
 653                                         ram_addr_t start, ram_addr_t length)
 654 {
 655     unsigned long *bitmap;
 656     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
 657     rs->migration_dirty_pages +=
 658         cpu_physical_memory_sync_dirty_bitmap(bitmap, rb, start, length,
 659                                               &rs->num_dirty_pages_period);
 660 }
 661
 662 /**
 663  * ram_pagesize_summary: calculate all the pagesizes of a VM
 664  *
 665  * Returns a summary bitmap of the page sizes of all RAMBlocks
 666  *
 667  * For VMs with just normal pages this is equivalent to the host page
 668  * size. If it's got some huge pages then it's the OR of all the
 669  * different page sizes.
 670  */
 671 uint64_t ram_pagesize_summary(void)
 672 {
 673     RAMBlock *block;
 674     uint64_t summary = 0;
 675
 676     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 677         summary |= block->page_size;
 678     }
 679
 680     return summary;
 681 }
 682
 683 static void migration_bitmap_sync(RAMState *rs)
 684 {
 685     RAMBlock *block;
 686     int64_t end_time;
 687     uint64_t bytes_xfer_now;
 688
 689     rs->bitmap_sync_count++;
 690
 691     if (!rs->bytes_xfer_prev) {
 692         rs->bytes_xfer_prev = ram_bytes_transferred();
 693     }
 694
 695     if (!rs->time_last_bitmap_sync) {
 696         rs->time_last_bitmap_sync = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 697     }
 698
 699     trace_migration_bitmap_sync_start();
 700     memory_global_dirty_log_sync();
 701
 702     qemu_mutex_lock(&rs->bitmap_mutex);
 703     rcu_read_lock();
 704     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
 705         migration_bitmap_sync_range(rs, block, 0, block->used_length);
 706     }
 707     rcu_read_unlock();
 708     qemu_mutex_unlock(&rs->bitmap_mutex);
 709
 710     trace_migration_bitmap_sync_end(rs->num_dirty_pages_period);
 711
 712     end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
 713
 714     /* more than 1 second = 1000 millisecons */
 715     if (end_time > rs->time_last_bitmap_sync + 1000) {
 716         if (migrate_auto_converge()) {
 717             /* The following detection logic can be refined later. For now:
 718                Check to see if the dirtied bytes is 50% more than the approx.
 719                amount of bytes that just got transferred since the last time we
 720                were in this routine. If that happens twice, start or increase
 721                throttling */
 722             bytes_xfer_now = ram_bytes_transferred();
 723
 724             if (rs->dirty_pages_rate &&
 725                (rs->num_dirty_pages_period * TARGET_PAGE_SIZE >
 726                    (bytes_xfer_now - rs->bytes_xfer_prev) / 2) &&
 727                (rs->dirty_rate_high_cnt++ >= 2)) {
 728                     trace_migration_throttle();
 729                     rs->dirty_rate_high_cnt = 0;
 730                     mig_throttle_guest_down();
 731              }
 732              rs->bytes_xfer_prev = bytes_xfer_now;
 733         }
 734
 735         if (migrate_use_xbzrle()) {
 736             if (rs->iterations_prev != rs->iterations) {
 737                 rs->xbzrle_cache_miss_rate =
 738                    (double)(rs->xbzrle_cache_miss -
 739                             rs->xbzrle_cache_miss_prev) /
 740                    (rs->iterations - rs->iterations_prev);
 741             }
 742             rs->iterations_prev = rs->iterations;
 743             rs->xbzrle_cache_miss_prev = rs->xbzrle_cache_miss;
 744         }
 745         rs->dirty_pages_rate = rs->num_dirty_pages_period * 1000
 746             / (end_time - rs->time_last_bitmap_sync);
 747         rs->time_last_bitmap_sync = end_time;
 748         rs->num_dirty_pages_period = 0;
 749     }
 750     if (migrate_use_events()) {
 751         qapi_event_send_migration_pass(rs->bitmap_sync_count, NULL);
 752     }
 753 }
 754
 755 /**
 756  * save_zero_page: send the zero page to the stream
 757  *
 758  * Returns the number of pages written.
 759  *
 760  * @rs: current RAM state
 761  * @block: block that contains the page we want to send
 762  * @offset: offset inside the block for the page
 763  * @p: pointer to the page
 764  */
 765 static int save_zero_page(RAMState *rs, RAMBlock *block, ram_addr_t offset,
 766                           uint8_t *p)
 767 {
 768     int pages = -1;
 769
 770     if (is_zero_range(p, TARGET_PAGE_SIZE)) {
 771         rs->zero_pages++;
 772         rs->bytes_transferred +=
 773             save_page_header(rs, block, offset | RAM_SAVE_FLAG_COMPRESS);
 774         qemu_put_byte(rs->f, 0);
 775         rs->bytes_transferred += 1;
 776         pages = 1;
 777     }
 778
 779     return pages;
 780 }
 781
 782 static void ram_release_pages(const char *rbname, uint64_t offset, int pages)
 783 {
 784     if (!migrate_release_ram() || !migration_in_postcopy()) {
 785         return;
 786     }
 787
 788     ram_discard_range(rbname, offset, pages << TARGET_PAGE_BITS);
 789 }
 790
 791 /**
 792  * ram_save_page: send the given page to the stream
 793  *
 794  * Returns the number of pages written.
 795  *          < 0 - error
 796  *          >=0 - Number of pages written - this might legally be 0
 797  *                if xbzrle noticed the page was the same.
 798  *
 799  * @rs: current RAM state
 800  * @block: block that contains the page we want to send
 801  * @offset: offset inside the block for the page
 802  * @last_stage: if we are at the completion stage
 803  */
 804 static int ram_save_page(RAMState *rs, PageSearchStatus *pss, bool last_stage)
 805 {
 806     int pages = -1;
 807     uint64_t bytes_xmit;
 808     ram_addr_t current_addr;
 809     uint8_t *p;
 810     int ret;
 811     bool send_async = true;
 812     RAMBlock *block = pss->block;
 813     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
 814
 815     p = block->host + offset;
 816
 817     /* In doubt sent page as normal */
 818     bytes_xmit = 0;
 819     ret = ram_control_save_page(rs->f, block->offset,
 820                            offset, TARGET_PAGE_SIZE, &bytes_xmit);
 821     if (bytes_xmit) {
 822         rs->bytes_transferred += bytes_xmit;
 823         pages = 1;
 824     }
 825
 826     XBZRLE_cache_lock();
 827
 828     current_addr = block->offset + offset;
 829
 830     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 831         if (ret != RAM_SAVE_CONTROL_DELAYED) {
 832             if (bytes_xmit > 0) {
 833                 rs->norm_pages++;
 834             } else if (bytes_xmit == 0) {
 835                 rs->zero_pages++;
 836             }
 837         }
 838     } else {
 839         pages = save_zero_page(rs, block, offset, p);
 840         if (pages > 0) {
 841             /* Must let xbzrle know, otherwise a previous (now 0'd) cached
 842              * page would be stale
 843              */
 844             xbzrle_cache_zero_page(rs, current_addr);
 845             ram_release_pages(block->idstr, offset, pages);
 846         } else if (!rs->ram_bulk_stage &&
 847                    !migration_in_postcopy() && migrate_use_xbzrle()) {
 848             pages = save_xbzrle_page(rs, &p, current_addr, block,
 849                                      offset, last_stage);
 850             if (!last_stage) {
 851                 /* Can't send this cached data async, since the cache page
 852                  * might get updated before it gets to the wire
 853                  */
 854                 send_async = false;
 855             }
 856         }
 857     }
 858
 859     /* XBZRLE overflow or normal page */
 860     if (pages == -1) {
 861         rs->bytes_transferred += save_page_header(rs, block,
 862                                                   offset | RAM_SAVE_FLAG_PAGE);
 863         if (send_async) {
 864             qemu_put_buffer_async(rs->f, p, TARGET_PAGE_SIZE,
 865                                   migrate_release_ram() &
 866                                   migration_in_postcopy());
 867         } else {
 868             qemu_put_buffer(rs->f, p, TARGET_PAGE_SIZE);
 869         }
 870         rs->bytes_transferred += TARGET_PAGE_SIZE;
 871         pages = 1;
 872         rs->norm_pages++;
 873     }
 874
 875     XBZRLE_cache_unlock();
 876
 877     return pages;
 878 }
 879
 880 static int do_compress_ram_page(QEMUFile *f, RAMBlock *block,
 881                                 ram_addr_t offset)
 882 {
 883     RAMState *rs = &ram_state;
 884     int bytes_sent, blen;
 885     uint8_t *p = block->host + (offset & TARGET_PAGE_MASK);
 886
 887     bytes_sent = save_page_header(rs, block, offset |
 888                                   RAM_SAVE_FLAG_COMPRESS_PAGE);
 889     blen = qemu_put_compression_data(f, p, TARGET_PAGE_SIZE,
 890                                      migrate_compress_level());
 891     if (blen < 0) {
 892         bytes_sent = 0;
 893         qemu_file_set_error(migrate_get_current()->to_dst_file, blen);
 894         error_report("compressed data failed!");
 895     } else {
 896         bytes_sent += blen;
 897         ram_release_pages(block->idstr, offset & TARGET_PAGE_MASK, 1);
 898     }
 899
 900     return bytes_sent;
 901 }
 902
 903 static void flush_compressed_data(RAMState *rs)
 904 {
 905     int idx, len, thread_count;
 906
 907     if (!migrate_use_compression()) {
 908         return;
 909     }
 910     thread_count = migrate_compress_threads();
 911
 912     qemu_mutex_lock(&comp_done_lock);
 913     for (idx = 0; idx < thread_count; idx++) {
 914         while (!comp_param[idx].done) {
 915             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 916         }
 917     }
 918     qemu_mutex_unlock(&comp_done_lock);
 919
 920     for (idx = 0; idx < thread_count; idx++) {
 921         qemu_mutex_lock(&comp_param[idx].mutex);
 922         if (!comp_param[idx].quit) {
 923             len = qemu_put_qemu_file(rs->f, comp_param[idx].file);
 924             rs->bytes_transferred += len;
 925         }
 926         qemu_mutex_unlock(&comp_param[idx].mutex);
 927     }
 928 }
 929
 930 static inline void set_compress_params(CompressParam *param, RAMBlock *block,
 931                                        ram_addr_t offset)
 932 {
 933     param->block = block;
 934     param->offset = offset;
 935 }
 936
 937 static int compress_page_with_multi_thread(RAMState *rs, RAMBlock *block,
 938                                            ram_addr_t offset)
 939 {
 940     int idx, thread_count, bytes_xmit = -1, pages = -1;
 941
 942     thread_count = migrate_compress_threads();
 943     qemu_mutex_lock(&comp_done_lock);
 944     while (true) {
 945         for (idx = 0; idx < thread_count; idx++) {
 946             if (comp_param[idx].done) {
 947                 comp_param[idx].done = false;
 948                 bytes_xmit = qemu_put_qemu_file(rs->f, comp_param[idx].file);
 949                 qemu_mutex_lock(&comp_param[idx].mutex);
 950                 set_compress_params(&comp_param[idx], block, offset);
 951                 qemu_cond_signal(&comp_param[idx].cond);
 952                 qemu_mutex_unlock(&comp_param[idx].mutex);
 953                 pages = 1;
 954                 rs->norm_pages++;
 955                 rs->bytes_transferred += bytes_xmit;
 956                 break;
 957             }
 958         }
 959         if (pages > 0) {
 960             break;
 961         } else {
 962             qemu_cond_wait(&comp_done_cond, &comp_done_lock);
 963         }
 964     }
 965     qemu_mutex_unlock(&comp_done_lock);
 966
 967     return pages;
 968 }
 969
 970 /**
 971  * ram_save_compressed_page: compress the given page and send it to the stream
 972  *
 973  * Returns the number of pages written.
 974  *
 975  * @rs: current RAM state
 976  * @block: block that contains the page we want to send
 977  * @offset: offset inside the block for the page
 978  * @last_stage: if we are at the completion stage
 979  */
 980 static int ram_save_compressed_page(RAMState *rs, PageSearchStatus *pss,
 981                                     bool last_stage)
 982 {
 983     int pages = -1;
 984     uint64_t bytes_xmit = 0;
 985     uint8_t *p;
 986     int ret, blen;
 987     RAMBlock *block = pss->block;
 988     ram_addr_t offset = pss->page << TARGET_PAGE_BITS;
 989
 990     p = block->host + offset;
 991
 992     ret = ram_control_save_page(rs->f, block->offset,
 993                                 offset, TARGET_PAGE_SIZE, &bytes_xmit);
 994     if (bytes_xmit) {
 995         rs->bytes_transferred += bytes_xmit;
 996         pages = 1;
 997     }
 998     if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
 999         if (ret != RAM_SAVE_CONTROL_DELAYED) {
1000             if (bytes_xmit > 0) {
1001                 rs->norm_pages++;
1002             } else if (bytes_xmit == 0) {
1003                 rs->zero_pages++;
1004             }
1005         }
1006     } else {
1007         /* When starting the process of a new block, the first page of
1008          * the block should be sent out before other pages in the same
1009          * block, and all the pages in last block should have been sent
1010          * out, keeping this order is important, because the 'cont' flag
1011          * is used to avoid resending the block name.
1012          */
1013         if (block != rs->last_sent_block) {
1014             flush_compressed_data(rs);
1015             pages = save_zero_page(rs, block, offset, p);
1016             if (pages == -1) {
1017                 /* Make sure the first page is sent out before other pages */
1018                 bytes_xmit = save_page_header(rs, block, offset |
1019                                               RAM_SAVE_FLAG_COMPRESS_PAGE);
1020                 blen = qemu_put_compression_data(rs->f, p, TARGET_PAGE_SIZE,
1021                                                  migrate_compress_level());
1022                 if (blen > 0) {
1023                     rs->bytes_transferred += bytes_xmit + blen;
1024                     rs->norm_pages++;
1025                     pages = 1;
1026                 } else {
1027                     qemu_file_set_error(rs->f, blen);
1028                     error_report("compressed data failed!");
1029                 }
1030             }
1031             if (pages > 0) {
1032                 ram_release_pages(block->idstr, offset, pages);
1033             }
1034         } else {
1035             pages = save_zero_page(rs, block, offset, p);
1036             if (pages == -1) {
1037                 pages = compress_page_with_multi_thread(rs, block, offset);
1038             } else {
1039                 ram_release_pages(block->idstr, offset, pages);
1040             }
1041         }
1042     }
1043
1044     return pages;
1045 }
1046
1047 /**
1048  * find_dirty_block: find the next dirty page and update any state
1049  * associated with the search process.
1050  *
1051  * Returns if a page is found
1052  *
1053  * @rs: current RAM state
1054  * @pss: data about the state of the current dirty page scan
1055  * @again: set to false if the search has scanned the whole of RAM
1056  * @page_abs: pointer into where to store the dirty page
1057  */
1058 static bool find_dirty_block(RAMState *rs, PageSearchStatus *pss,
1059                              bool *again, unsigned long *page_abs)
1060 {
1061     pss->page = migration_bitmap_find_dirty(rs, pss->block, pss->page,
1062                                             page_abs);
1063     if (pss->complete_round && pss->block == rs->last_seen_block &&
1064         pss->page >= rs->last_page) {
1065         /*
1066          * We've been once around the RAM and haven't found anything.
1067          * Give up.
1068          */
1069         *again = false;
1070         return false;
1071     }
1072     if ((pss->page << TARGET_PAGE_BITS) >= pss->block->used_length) {
1073         /* Didn't find anything in this RAM Block */
1074         pss->page = 0;
1075         pss->block = QLIST_NEXT_RCU(pss->block, next);
1076         if (!pss->block) {
1077             /* Hit the end of the list */
1078             pss->block = QLIST_FIRST_RCU(&ram_list.blocks);
1079             /* Flag that we've looped */
1080             pss->complete_round = true;
1081             rs->ram_bulk_stage = false;
1082             if (migrate_use_xbzrle()) {
1083                 /* If xbzrle is on, stop using the data compression at this
1084                  * point. In theory, xbzrle can do better than compression.
1085                  */
1086                 flush_compressed_data(rs);
1087             }
1088         }
1089         /* Didn't find anything this time, but try again on the new block */
1090         *again = true;
1091         return false;
1092     } else {
1093         /* Can go around again, but... */
1094         *again = true;
1095         /* We've found something so probably don't need to */
1096         return true;
1097     }
1098 }
1099
1100 /**
1101  * unqueue_page: gets a page of the queue
1102  *
1103  * Helper for 'get_queued_page' - gets a page off the queue
1104  *
1105  * Returns the block of the page (or NULL if none available)
1106  *
1107  * @rs: current RAM state
1108  * @offset: used to return the offset within the RAMBlock
1109  * @page_abs: pointer into where to store the dirty page
1110  */
1111 static RAMBlock *unqueue_page(RAMState *rs, ram_addr_t *offset,
1112                               unsigned long *page_abs)
1113 {
1114     RAMBlock *block = NULL;
1115
1116     qemu_mutex_lock(&rs->src_page_req_mutex);
1117     if (!QSIMPLEQ_EMPTY(&rs->src_page_requests)) {
1118         struct RAMSrcPageRequest *entry =
1119                                 QSIMPLEQ_FIRST(&rs->src_page_requests);
1120         block = entry->rb;
1121         *offset = entry->offset;
1122         *page_abs = (entry->offset + entry->rb->offset) >> TARGET_PAGE_BITS;
1123
1124         if (entry->len > TARGET_PAGE_SIZE) {
1125             entry->len -= TARGET_PAGE_SIZE;
1126             entry->offset += TARGET_PAGE_SIZE;
1127         } else {
1128             memory_region_unref(block->mr);
1129             QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1130             g_free(entry);
1131         }
1132     }
1133     qemu_mutex_unlock(&rs->src_page_req_mutex);
1134
1135     return block;
1136 }
1137
1138 /**
1139  * get_queued_page: unqueue a page from the postocpy requests
1140  *
1141  * Skips pages that are already sent (!dirty)
1142  *
1143  * Returns if a queued page is found
1144  *
1145  * @rs: current RAM state
1146  * @pss: data about the state of the current dirty page scan
1147  * @page_abs: pointer into where to store the dirty page
1148  */
1149 static bool get_queued_page(RAMState *rs, PageSearchStatus *pss,
1150                             unsigned long *page_abs)
1151 {
1152     RAMBlock  *block;
1153     ram_addr_t offset;
1154     bool dirty;
1155
1156     do {
1157         block = unqueue_page(rs, &offset, page_abs);
1158         /*
1159          * We're sending this page, and since it's postcopy nothing else
1160          * will dirty it, and we must make sure it doesn't get sent again
1161          * even if this queue request was received after the background
1162          * search already sent it.
1163          */
1164         if (block) {
1165             unsigned long *bitmap;
1166             bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1167             dirty = test_bit(*page_abs, bitmap);
1168             if (!dirty) {
1169                 trace_get_queued_page_not_dirty(block->idstr, (uint64_t)offset,
1170                     *page_abs,
1171                     test_bit(*page_abs,
1172                              atomic_rcu_read(&rs->ram_bitmap)->unsentmap));
1173             } else {
1174                 trace_get_queued_page(block->idstr, (uint64_t)offset,
1175                                      *page_abs);
1176             }
1177         }
1178
1179     } while (block && !dirty);
1180
1181     if (block) {
1182         /*
1183          * As soon as we start servicing pages out of order, then we have
1184          * to kill the bulk stage, since the bulk stage assumes
1185          * in (migration_bitmap_find_and_reset_dirty) that every page is
1186          * dirty, that's no longer true.
1187          */
1188         rs->ram_bulk_stage = false;
1189
1190         /*
1191          * We want the background search to continue from the queued page
1192          * since the guest is likely to want other pages near to the page
1193          * it just requested.
1194          */
1195         pss->block = block;
1196         pss->page = offset >> TARGET_PAGE_BITS;
1197     }
1198
1199     return !!block;
1200 }
1201
1202 /**
1203  * migration_page_queue_free: drop any remaining pages in the ram
1204  * request queue
1205  *
1206  * It should be empty at the end anyway, but in error cases there may
1207  * be some left.  in case that there is any page left, we drop it.
1208  *
1209  */
1210 void migration_page_queue_free(void)
1211 {
1212     struct RAMSrcPageRequest *mspr, *next_mspr;
1213     RAMState *rs = &ram_state;
1214     /* This queue generally should be empty - but in the case of a failed
1215      * migration might have some droppings in.
1216      */
1217     rcu_read_lock();
1218     QSIMPLEQ_FOREACH_SAFE(mspr, &rs->src_page_requests, next_req, next_mspr) {
1219         memory_region_unref(mspr->rb->mr);
1220         QSIMPLEQ_REMOVE_HEAD(&rs->src_page_requests, next_req);
1221         g_free(mspr);
1222     }
1223     rcu_read_unlock();
1224 }
1225
1226 /**
1227  * ram_save_queue_pages: queue the page for transmission
1228  *
1229  * A request from postcopy destination for example.
1230  *
1231  * Returns zero on success or negative on error
1232  *
1233  * @rbname: Name of the RAMBLock of the request. NULL means the
1234  *          same that last one.
1235  * @start: starting address from the start of the RAMBlock
1236  * @len: length (in bytes) to send
1237  */
1238 int ram_save_queue_pages(const char *rbname, ram_addr_t start, ram_addr_t len)
1239 {
1240     RAMBlock *ramblock;
1241     RAMState *rs = &ram_state;
1242
1243     rs->postcopy_requests++;
1244     rcu_read_lock();
1245     if (!rbname) {
1246         /* Reuse last RAMBlock */
1247         ramblock = rs->last_req_rb;
1248
1249         if (!ramblock) {
1250             /*
1251              * Shouldn't happen, we can't reuse the last RAMBlock if
1252              * it's the 1st request.
1253              */
1254             error_report("ram_save_queue_pages no previous block");
1255             goto err;
1256         }
1257     } else {
1258         ramblock = qemu_ram_block_by_name(rbname);
1259
1260         if (!ramblock) {
1261             /* We shouldn't be asked for a non-existent RAMBlock */
1262             error_report("ram_save_queue_pages no block '%s'", rbname);
1263             goto err;
1264         }
1265         rs->last_req_rb = ramblock;
1266     }
1267     trace_ram_save_queue_pages(ramblock->idstr, start, len);
1268     if (start+len > ramblock->used_length) {
1269         error_report("%s request overrun start=" RAM_ADDR_FMT " len="
1270                      RAM_ADDR_FMT " blocklen=" RAM_ADDR_FMT,
1271                      __func__, start, len, ramblock->used_length);
1272         goto err;
1273     }
1274
1275     struct RAMSrcPageRequest *new_entry =
1276         g_malloc0(sizeof(struct RAMSrcPageRequest));
1277     new_entry->rb = ramblock;
1278     new_entry->offset = start;
1279     new_entry->len = len;
1280
1281     memory_region_ref(ramblock->mr);
1282     qemu_mutex_lock(&rs->src_page_req_mutex);
1283     QSIMPLEQ_INSERT_TAIL(&rs->src_page_requests, new_entry, next_req);
1284     qemu_mutex_unlock(&rs->src_page_req_mutex);
1285     rcu_read_unlock();
1286
1287     return 0;
1288
1289 err:
1290     rcu_read_unlock();
1291     return -1;
1292 }
1293
1294 /**
1295  * ram_save_target_page: save one target page
1296  *
1297  * Returns the number of pages written
1298  *
1299  * @rs: current RAM state
1300  * @ms: current migration state
1301  * @pss: data about the page we want to send
1302  * @last_stage: if we are at the completion stage
1303  * @page_abs: page number of the dirty page
1304  */
1305 static int ram_save_target_page(RAMState *rs, PageSearchStatus *pss,
1306                                 bool last_stage, unsigned long page_abs)
1307 {
1308     int res = 0;
1309
1310     /* Check the pages is dirty and if it is send it */
1311     if (migration_bitmap_clear_dirty(rs, page_abs)) {
1312         unsigned long *unsentmap;
1313         /*
1314          * If xbzrle is on, stop using the data compression after first
1315          * round of migration even if compression is enabled. In theory,
1316          * xbzrle can do better than compression.
1317          */
1318
1319         if (migrate_use_compression()
1320             && (rs->ram_bulk_stage || !migrate_use_xbzrle())) {
1321             res = ram_save_compressed_page(rs, pss, last_stage);
1322         } else {
1323             res = ram_save_page(rs, pss, last_stage);
1324         }
1325
1326         if (res < 0) {
1327             return res;
1328         }
1329         unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1330         if (unsentmap) {
1331             clear_bit(page_abs, unsentmap);
1332         }
1333     }
1334
1335     return res;
1336 }
1337
1338 /**
1339  * ram_save_host_page: save a whole host page
1340  *
1341  * Starting at *offset send pages up to the end of the current host
1342  * page. It's valid for the initial offset to point into the middle of
1343  * a host page in which case the remainder of the hostpage is sent.
1344  * Only dirty target pages are sent. Note that the host page size may
1345  * be a huge page for this block.
1346  *
1347  * Returns the number of pages written or negative on error
1348  *
1349  * @rs: current RAM state
1350  * @ms: current migration state
1351  * @pss: data about the page we want to send
1352  * @last_stage: if we are at the completion stage
1353  * @page_abs: Page number of the dirty page
1354  */
1355 static int ram_save_host_page(RAMState *rs, PageSearchStatus *pss,
1356                               bool last_stage,
1357                               unsigned long page_abs)
1358 {
1359     int tmppages, pages = 0;
1360     size_t pagesize_bits =
1361         qemu_ram_pagesize(pss->block) >> TARGET_PAGE_BITS;
1362
1363     do {
1364         tmppages = ram_save_target_page(rs, pss, last_stage, page_abs);
1365         if (tmppages < 0) {
1366             return tmppages;
1367         }
1368
1369         pages += tmppages;
1370         pss->page++;
1371         page_abs++;
1372     } while (pss->page & (pagesize_bits - 1));
1373
1374     /* The offset we leave with is the last one we looked at */
1375     pss->page--;
1376     return pages;
1377 }
1378
1379 /**
1380  * ram_find_and_save_block: finds a dirty page and sends it to f
1381  *
1382  * Called within an RCU critical section.
1383  *
1384  * Returns the number of pages written where zero means no dirty pages
1385  *
1386  * @rs: current RAM state
1387  * @last_stage: if we are at the completion stage
1388  *
1389  * On systems where host-page-size > target-page-size it will send all the
1390  * pages in a host page that are dirty.
1391  */
1392
1393 static int ram_find_and_save_block(RAMState *rs, bool last_stage)
1394 {
1395     PageSearchStatus pss;
1396     int pages = 0;
1397     bool again, found;
1398     unsigned long page_abs; /* Page number of the dirty page */
1399
1400     /* No dirty page as there is zero RAM */
1401     if (!ram_bytes_total()) {
1402         return pages;
1403     }
1404
1405     pss.block = rs->last_seen_block;
1406     pss.page = rs->last_page;
1407     pss.complete_round = false;
1408
1409     if (!pss.block) {
1410         pss.block = QLIST_FIRST_RCU(&ram_list.blocks);
1411     }
1412
1413     do {
1414         again = true;
1415         found = get_queued_page(rs, &pss, &page_abs);
1416
1417         if (!found) {
1418             /* priority queue empty, so just search for something dirty */
1419             found = find_dirty_block(rs, &pss, &again, &page_abs);
1420         }
1421
1422         if (found) {
1423             pages = ram_save_host_page(rs, &pss, last_stage, page_abs);
1424         }
1425     } while (!pages && again);
1426
1427     rs->last_seen_block = pss.block;
1428     rs->last_page = pss.page;
1429
1430     return pages;
1431 }
1432
1433 void acct_update_position(QEMUFile *f, size_t size, bool zero)
1434 {
1435     uint64_t pages = size / TARGET_PAGE_SIZE;
1436     RAMState *rs = &ram_state;
1437
1438     if (zero) {
1439         rs->zero_pages += pages;
1440     } else {
1441         rs->norm_pages += pages;
1442         rs->bytes_transferred += size;
1443         qemu_update_position(f, size);
1444     }
1445 }
1446
1447 uint64_t ram_bytes_total(void)
1448 {
1449     RAMBlock *block;
1450     uint64_t total = 0;
1451
1452     rcu_read_lock();
1453     QLIST_FOREACH_RCU(block, &ram_list.blocks, next)
1454         total += block->used_length;
1455     rcu_read_unlock();
1456     return total;
1457 }
1458
1459 void free_xbzrle_decoded_buf(void)
1460 {
1461     g_free(xbzrle_decoded_buf);
1462     xbzrle_decoded_buf = NULL;
1463 }
1464
1465 static void migration_bitmap_free(struct RAMBitmap *bmap)
1466 {
1467     g_free(bmap->bmap);
1468     g_free(bmap->unsentmap);
1469     g_free(bmap);
1470 }
1471
1472 static void ram_migration_cleanup(void *opaque)
1473 {
1474     RAMState *rs = opaque;
1475
1476     /* caller have hold iothread lock or is in a bh, so there is
1477      * no writing race against this migration_bitmap
1478      */
1479     struct RAMBitmap *bitmap = rs->ram_bitmap;
1480     atomic_rcu_set(&rs->ram_bitmap, NULL);
1481     if (bitmap) {
1482         memory_global_dirty_log_stop();
1483         call_rcu(bitmap, migration_bitmap_free, rcu);
1484     }
1485
1486     XBZRLE_cache_lock();
1487     if (XBZRLE.cache) {
1488         cache_fini(XBZRLE.cache);
1489         g_free(XBZRLE.encoded_buf);
1490         g_free(XBZRLE.current_buf);
1491         g_free(ZERO_TARGET_PAGE);
1492         XBZRLE.cache = NULL;
1493         XBZRLE.encoded_buf = NULL;
1494         XBZRLE.current_buf = NULL;
1495     }
1496     XBZRLE_cache_unlock();
1497 }
1498
1499 static void ram_state_reset(RAMState *rs)
1500 {
1501     rs->last_seen_block = NULL;
1502     rs->last_sent_block = NULL;
1503     rs->last_page = 0;
1504     rs->last_version = ram_list.version;
1505     rs->ram_bulk_stage = true;
1506 }
1507
1508 #define MAX_WAIT 50 /* ms, half buffered_file limit */
1509
1510 void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
1511 {
1512     RAMState *rs = &ram_state;
1513
1514     /* called in qemu main thread, so there is
1515      * no writing race against this migration_bitmap
1516      */
1517     if (rs->ram_bitmap) {
1518         struct RAMBitmap *old_bitmap = rs->ram_bitmap, *bitmap;
1519         bitmap = g_new(struct RAMBitmap, 1);
1520         bitmap->bmap = bitmap_new(new);
1521
1522         /* prevent migration_bitmap content from being set bit
1523          * by migration_bitmap_sync_range() at the same time.
1524          * it is safe to migration if migration_bitmap is cleared bit
1525          * at the same time.
1526          */
1527         qemu_mutex_lock(&rs->bitmap_mutex);
1528         bitmap_copy(bitmap->bmap, old_bitmap->bmap, old);
1529         bitmap_set(bitmap->bmap, old, new - old);
1530
1531         /* We don't have a way to safely extend the sentmap
1532          * with RCU; so mark it as missing, entry to postcopy
1533          * will fail.
1534          */
1535         bitmap->unsentmap = NULL;
1536
1537         atomic_rcu_set(&rs->ram_bitmap, bitmap);
1538         qemu_mutex_unlock(&rs->bitmap_mutex);
1539         rs->migration_dirty_pages += new - old;
1540         call_rcu(old_bitmap, migration_bitmap_free, rcu);
1541     }
1542 }
1543
1544 /*
1545  * 'expected' is the value you expect the bitmap mostly to be full
1546  * of; it won't bother printing lines that are all this value.
1547  * If 'todump' is null the migration bitmap is dumped.
1548  */
1549 void ram_debug_dump_bitmap(unsigned long *todump, bool expected)
1550 {
1551     int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
1552     RAMState *rs = &ram_state;
1553     int64_t cur;
1554     int64_t linelen = 128;
1555     char linebuf[129];
1556
1557     if (!todump) {
1558         todump = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1559     }
1560
1561     for (cur = 0; cur < ram_pages; cur += linelen) {
1562         int64_t curb;
1563         bool found = false;
1564         /*
1565          * Last line; catch the case where the line length
1566          * is longer than remaining ram
1567          */
1568         if (cur + linelen > ram_pages) {
1569             linelen = ram_pages - cur;
1570         }
1571         for (curb = 0; curb < linelen; curb++) {
1572             bool thisbit = test_bit(cur + curb, todump);
1573             linebuf[curb] = thisbit ? '1' : '.';
1574             found = found || (thisbit != expected);
1575         }
1576         if (found) {
1577             linebuf[curb] = '\0';
1578             fprintf(stderr,  "0x%08" PRIx64 " : %s\n", cur, linebuf);
1579         }
1580     }
1581 }
1582
1583 /* **** functions for postcopy ***** */
1584
1585 void ram_postcopy_migrated_memory_release(MigrationState *ms)
1586 {
1587     RAMState *rs = &ram_state;
1588     struct RAMBlock *block;
1589     unsigned long *bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1590
1591     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1592         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1593         unsigned long range = first + (block->used_length >> TARGET_PAGE_BITS);
1594         unsigned long run_start = find_next_zero_bit(bitmap, range, first);
1595
1596         while (run_start < range) {
1597             unsigned long run_end = find_next_bit(bitmap, range, run_start + 1);
1598             ram_discard_range(block->idstr, run_start << TARGET_PAGE_BITS,
1599                               (run_end - run_start) << TARGET_PAGE_BITS);
1600             run_start = find_next_zero_bit(bitmap, range, run_end + 1);
1601         }
1602     }
1603 }
1604
1605 /**
1606  * postcopy_send_discard_bm_ram: discard a RAMBlock
1607  *
1608  * Returns zero on success
1609  *
1610  * Callback from postcopy_each_ram_send_discard for each RAMBlock
1611  * Note: At this point the 'unsentmap' is the processed bitmap combined
1612  *       with the dirtymap; so a '1' means it's either dirty or unsent.
1613  *
1614  * @ms: current migration state
1615  * @pds: state for postcopy
1616  * @start: RAMBlock starting page
1617  * @length: RAMBlock size
1618  */
1619 static int postcopy_send_discard_bm_ram(MigrationState *ms,
1620                                         PostcopyDiscardState *pds,
1621                                         unsigned long start,
1622                                         unsigned long length)
1623 {
1624     RAMState *rs = &ram_state;
1625     unsigned long end = start + length; /* one after the end */
1626     unsigned long current;
1627     unsigned long *unsentmap;
1628
1629     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1630     for (current = start; current < end; ) {
1631         unsigned long one = find_next_bit(unsentmap, end, current);
1632
1633         if (one <= end) {
1634             unsigned long zero = find_next_zero_bit(unsentmap, end, one + 1);
1635             unsigned long discard_length;
1636
1637             if (zero >= end) {
1638                 discard_length = end - one;
1639             } else {
1640                 discard_length = zero - one;
1641             }
1642             if (discard_length) {
1643                 postcopy_discard_send_range(ms, pds, one, discard_length);
1644             }
1645             current = one + discard_length;
1646         } else {
1647             current = one;
1648         }
1649     }
1650
1651     return 0;
1652 }
1653
1654 /**
1655  * postcopy_each_ram_send_discard: discard all RAMBlocks
1656  *
1657  * Returns 0 for success or negative for error
1658  *
1659  * Utility for the outgoing postcopy code.
1660  *   Calls postcopy_send_discard_bm_ram for each RAMBlock
1661  *   passing it bitmap indexes and name.
1662  * (qemu_ram_foreach_block ends up passing unscaled lengths
1663  *  which would mean postcopy code would have to deal with target page)
1664  *
1665  * @ms: current migration state
1666  */
1667 static int postcopy_each_ram_send_discard(MigrationState *ms)
1668 {
1669     struct RAMBlock *block;
1670     int ret;
1671
1672     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1673         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1674         PostcopyDiscardState *pds = postcopy_discard_send_init(ms,
1675                                                                first,
1676                                                                block->idstr);
1677
1678         /*
1679          * Postcopy sends chunks of bitmap over the wire, but it
1680          * just needs indexes at this point, avoids it having
1681          * target page specific code.
1682          */
1683         ret = postcopy_send_discard_bm_ram(ms, pds, first,
1684                                     block->used_length >> TARGET_PAGE_BITS);
1685         postcopy_discard_send_finish(ms, pds);
1686         if (ret) {
1687             return ret;
1688         }
1689     }
1690
1691     return 0;
1692 }
1693
1694 /**
1695  * postcopy_chunk_hostpages_pass: canocalize bitmap in hostpages
1696  *
1697  * Helper for postcopy_chunk_hostpages; it's called twice to
1698  * canonicalize the two bitmaps, that are similar, but one is
1699  * inverted.
1700  *
1701  * Postcopy requires that all target pages in a hostpage are dirty or
1702  * clean, not a mix.  This function canonicalizes the bitmaps.
1703  *
1704  * @ms: current migration state
1705  * @unsent_pass: if true we need to canonicalize partially unsent host pages
1706  *               otherwise we need to canonicalize partially dirty host pages
1707  * @block: block that contains the page we want to canonicalize
1708  * @pds: state for postcopy
1709  */
1710 static void postcopy_chunk_hostpages_pass(MigrationState *ms, bool unsent_pass,
1711                                           RAMBlock *block,
1712                                           PostcopyDiscardState *pds)
1713 {
1714     RAMState *rs = &ram_state;
1715     unsigned long *bitmap;
1716     unsigned long *unsentmap;
1717     unsigned int host_ratio = block->page_size / TARGET_PAGE_SIZE;
1718     unsigned long first = block->offset >> TARGET_PAGE_BITS;
1719     unsigned long len = block->used_length >> TARGET_PAGE_BITS;
1720     unsigned long last = first + (len - 1);
1721     unsigned long run_start;
1722
1723     if (block->page_size == TARGET_PAGE_SIZE) {
1724         /* Easy case - TPS==HPS for a non-huge page RAMBlock */
1725         return;
1726     }
1727
1728     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1729     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1730
1731     if (unsent_pass) {
1732         /* Find a sent page */
1733         run_start = find_next_zero_bit(unsentmap, last + 1, first);
1734     } else {
1735         /* Find a dirty page */
1736         run_start = find_next_bit(bitmap, last + 1, first);
1737     }
1738
1739     while (run_start <= last) {
1740         bool do_fixup = false;
1741         unsigned long fixup_start_addr;
1742         unsigned long host_offset;
1743
1744         /*
1745          * If the start of this run of pages is in the middle of a host
1746          * page, then we need to fixup this host page.
1747          */
1748         host_offset = run_start % host_ratio;
1749         if (host_offset) {
1750             do_fixup = true;
1751             run_start -= host_offset;
1752             fixup_start_addr = run_start;
1753             /* For the next pass */
1754             run_start = run_start + host_ratio;
1755         } else {
1756             /* Find the end of this run */
1757             unsigned long run_end;
1758             if (unsent_pass) {
1759                 run_end = find_next_bit(unsentmap, last + 1, run_start + 1);
1760             } else {
1761                 run_end = find_next_zero_bit(bitmap, last + 1, run_start + 1);
1762             }
1763             /*
1764              * If the end isn't at the start of a host page, then the
1765              * run doesn't finish at the end of a host page
1766              * and we need to discard.
1767              */
1768             host_offset = run_end % host_ratio;
1769             if (host_offset) {
1770                 do_fixup = true;
1771                 fixup_start_addr = run_end - host_offset;
1772                 /*
1773                  * This host page has gone, the next loop iteration starts
1774                  * from after the fixup
1775                  */
1776                 run_start = fixup_start_addr + host_ratio;
1777             } else {
1778                 /*
1779                  * No discards on this iteration, next loop starts from
1780                  * next sent/dirty page
1781                  */
1782                 run_start = run_end + 1;
1783             }
1784         }
1785
1786         if (do_fixup) {
1787             unsigned long page;
1788
1789             /* Tell the destination to discard this page */
1790             if (unsent_pass || !test_bit(fixup_start_addr, unsentmap)) {
1791                 /* For the unsent_pass we:
1792                  *     discard partially sent pages
1793                  * For the !unsent_pass (dirty) we:
1794                  *     discard partially dirty pages that were sent
1795                  *     (any partially sent pages were already discarded
1796                  *     by the previous unsent_pass)
1797                  */
1798                 postcopy_discard_send_range(ms, pds, fixup_start_addr,
1799                                             host_ratio);
1800             }
1801
1802             /* Clean up the bitmap */
1803             for (page = fixup_start_addr;
1804                  page < fixup_start_addr + host_ratio; page++) {
1805                 /* All pages in this host page are now not sent */
1806                 set_bit(page, unsentmap);
1807
1808                 /*
1809                  * Remark them as dirty, updating the count for any pages
1810                  * that weren't previously dirty.
1811                  */
1812                 rs->migration_dirty_pages += !test_and_set_bit(page, bitmap);
1813             }
1814         }
1815
1816         if (unsent_pass) {
1817             /* Find the next sent page for the next iteration */
1818             run_start = find_next_zero_bit(unsentmap, last + 1,
1819                                            run_start);
1820         } else {
1821             /* Find the next dirty page for the next iteration */
1822             run_start = find_next_bit(bitmap, last + 1, run_start);
1823         }
1824     }
1825 }
1826
1827 /**
1828  * postcopy_chuck_hostpages: discrad any partially sent host page
1829  *
1830  * Utility for the outgoing postcopy code.
1831  *
1832  * Discard any partially sent host-page size chunks, mark any partially
1833  * dirty host-page size chunks as all dirty.  In this case the host-page
1834  * is the host-page for the particular RAMBlock, i.e. it might be a huge page
1835  *
1836  * Returns zero on success
1837  *
1838  * @ms: current migration state
1839  */
1840 static int postcopy_chunk_hostpages(MigrationState *ms)
1841 {
1842     RAMState *rs = &ram_state;
1843     struct RAMBlock *block;
1844
1845     /* Easiest way to make sure we don't resume in the middle of a host-page */
1846     rs->last_seen_block = NULL;
1847     rs->last_sent_block = NULL;
1848     rs->last_page = 0;
1849
1850     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1851         unsigned long first = block->offset >> TARGET_PAGE_BITS;
1852
1853         PostcopyDiscardState *pds =
1854                          postcopy_discard_send_init(ms, first, block->idstr);
1855
1856         /* First pass: Discard all partially sent host pages */
1857         postcopy_chunk_hostpages_pass(ms, true, block, pds);
1858         /*
1859          * Second pass: Ensure that all partially dirty host pages are made
1860          * fully dirty.
1861          */
1862         postcopy_chunk_hostpages_pass(ms, false, block, pds);
1863
1864         postcopy_discard_send_finish(ms, pds);
1865     } /* ram_list loop */
1866
1867     return 0;
1868 }
1869
1870 /**
1871  * ram_postcopy_send_discard_bitmap: transmit the discard bitmap
1872  *
1873  * Returns zero on success
1874  *
1875  * Transmit the set of pages to be discarded after precopy to the target
1876  * these are pages that:
1877  *     a) Have been previously transmitted but are now dirty again
1878  *     b) Pages that have never been transmitted, this ensures that
1879  *        any pages on the destination that have been mapped by background
1880  *        tasks get discarded (transparent huge pages is the specific concern)
1881  * Hopefully this is pretty sparse
1882  *
1883  * @ms: current migration state
1884  */
1885 int ram_postcopy_send_discard_bitmap(MigrationState *ms)
1886 {
1887     RAMState *rs = &ram_state;
1888     int ret;
1889     unsigned long *bitmap, *unsentmap;
1890
1891     rcu_read_lock();
1892
1893     /* This should be our last sync, the src is now paused */
1894     migration_bitmap_sync(rs);
1895
1896     unsentmap = atomic_rcu_read(&rs->ram_bitmap)->unsentmap;
1897     if (!unsentmap) {
1898         /* We don't have a safe way to resize the sentmap, so
1899          * if the bitmap was resized it will be NULL at this
1900          * point.
1901          */
1902         error_report("migration ram resized during precopy phase");
1903         rcu_read_unlock();
1904         return -EINVAL;
1905     }
1906
1907     /* Deal with TPS != HPS and huge pages */
1908     ret = postcopy_chunk_hostpages(ms);
1909     if (ret) {
1910         rcu_read_unlock();
1911         return ret;
1912     }
1913
1914     /*
1915      * Update the unsentmap to be unsentmap = unsentmap | dirty
1916      */
1917     bitmap = atomic_rcu_read(&rs->ram_bitmap)->bmap;
1918     bitmap_or(unsentmap, unsentmap, bitmap,
1919                last_ram_offset() >> TARGET_PAGE_BITS);
1920
1921
1922     trace_ram_postcopy_send_discard_bitmap();
1923 #ifdef DEBUG_POSTCOPY
1924     ram_debug_dump_bitmap(unsentmap, true);
1925 #endif
1926
1927     ret = postcopy_each_ram_send_discard(ms);
1928     rcu_read_unlock();
1929
1930     return ret;
1931 }
1932
1933 /**
1934  * ram_discard_range: discard dirtied pages at the beginning of postcopy
1935  *
1936  * Returns zero on success
1937  *
1938  * @rbname: name of the RAMBlock of the request. NULL means the
1939  *          same that last one.
1940  * @start: RAMBlock starting page
1941  * @length: RAMBlock size
1942  */
1943 int ram_discard_range(const char *rbname, uint64_t start, size_t length)
1944 {
1945     int ret = -1;
1946
1947     trace_ram_discard_range(rbname, start, length);
1948
1949     rcu_read_lock();
1950     RAMBlock *rb = qemu_ram_block_by_name(rbname);
1951
1952     if (!rb) {
1953         error_report("ram_discard_range: Failed to find block '%s'", rbname);
1954         goto err;
1955     }
1956
1957     ret = ram_block_discard_range(rb, start, length);
1958
1959 err:
1960     rcu_read_unlock();
1961
1962     return ret;
1963 }
1964
1965 static int ram_state_init(RAMState *rs)
1966 {
1967     int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
1968
1969     memset(rs, 0, sizeof(*rs));
1970     qemu_mutex_init(&rs->bitmap_mutex);
1971     qemu_mutex_init(&rs->src_page_req_mutex);
1972     QSIMPLEQ_INIT(&rs->src_page_requests);
1973
1974     if (migrate_use_xbzrle()) {
1975         XBZRLE_cache_lock();
1976         ZERO_TARGET_PAGE = g_malloc0(TARGET_PAGE_SIZE);
1977         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
1978                                   TARGET_PAGE_SIZE,
1979                                   TARGET_PAGE_SIZE);
1980         if (!XBZRLE.cache) {
1981             XBZRLE_cache_unlock();
1982             error_report("Error creating cache");
1983             return -1;
1984         }
1985         XBZRLE_cache_unlock();
1986
1987         /* We prefer not to abort if there is no memory */
1988         XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
1989         if (!XBZRLE.encoded_buf) {
1990             error_report("Error allocating encoded_buf");
1991             return -1;
1992         }
1993
1994         XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
1995         if (!XBZRLE.current_buf) {
1996             error_report("Error allocating current_buf");
1997             g_free(XBZRLE.encoded_buf);
1998             XBZRLE.encoded_buf = NULL;
1999             return -1;
2000         }
2001     }
2002
2003     /* For memory_global_dirty_log_start below.  */
2004     qemu_mutex_lock_iothread();
2005
2006     qemu_mutex_lock_ramlist();
2007     rcu_read_lock();
2008     ram_state_reset(rs);
2009
2010     rs->ram_bitmap = g_new0(struct RAMBitmap, 1);
2011     /* Skip setting bitmap if there is no RAM */
2012     if (ram_bytes_total()) {
2013         ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2014         rs->ram_bitmap->bmap = bitmap_new(ram_bitmap_pages);
2015         bitmap_set(rs->ram_bitmap->bmap, 0, ram_bitmap_pages);
2016
2017         if (migrate_postcopy_ram()) {
2018             rs->ram_bitmap->unsentmap = bitmap_new(ram_bitmap_pages);
2019             bitmap_set(rs->ram_bitmap->unsentmap, 0, ram_bitmap_pages);
2020         }
2021     }
2022
2023     /*
2024      * Count the total number of pages used by ram blocks not including any
2025      * gaps due to alignment or unplugs.
2026      */
2027     rs->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
2028
2029     memory_global_dirty_log_start();
2030     migration_bitmap_sync(rs);
2031     qemu_mutex_unlock_ramlist();
2032     qemu_mutex_unlock_iothread();
2033     rcu_read_unlock();
2034
2035     return 0;
2036 }
2037
2038 /*
2039  * Each of ram_save_setup, ram_save_iterate and ram_save_complete has
2040  * long-running RCU critical section.  When rcu-reclaims in the code
2041  * start to become numerous it will be necessary to reduce the
2042  * granularity of these critical sections.
2043  */
2044
2045 /**
2046  * ram_save_setup: Setup RAM for migration
2047  *
2048  * Returns zero to indicate success and negative for error
2049  *
2050  * @f: QEMUFile where to send the data
2051  * @opaque: RAMState pointer
2052  */
2053 static int ram_save_setup(QEMUFile *f, void *opaque)
2054 {
2055     RAMState *rs = opaque;
2056     RAMBlock *block;
2057
2058     /* migration has already setup the bitmap, reuse it. */
2059     if (!migration_in_colo_state()) {
2060         if (ram_state_init(rs) < 0) {
2061             return -1;
2062          }
2063     }
2064     rs->f = f;
2065
2066     rcu_read_lock();
2067
2068     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
2069
2070     QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
2071         qemu_put_byte(f, strlen(block->idstr));
2072         qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
2073         qemu_put_be64(f, block->used_length);
2074         if (migrate_postcopy_ram() && block->page_size != qemu_host_page_size) {
2075             qemu_put_be64(f, block->page_size);
2076         }
2077     }
2078
2079     rcu_read_unlock();
2080
2081     ram_control_before_iterate(f, RAM_CONTROL_SETUP);
2082     ram_control_after_iterate(f, RAM_CONTROL_SETUP);
2083
2084     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2085
2086     return 0;
2087 }
2088
2089 /**
2090  * ram_save_iterate: iterative stage for migration
2091  *
2092  * Returns zero to indicate success and negative for error
2093  *
2094  * @f: QEMUFile where to send the data
2095  * @opaque: RAMState pointer
2096  */
2097 static int ram_save_iterate(QEMUFile *f, void *opaque)
2098 {
2099     RAMState *rs = opaque;
2100     int ret;
2101     int i;
2102     int64_t t0;
2103     int done = 0;
2104
2105     rcu_read_lock();
2106     if (ram_list.version != rs->last_version) {
2107         ram_state_reset(rs);
2108     }
2109
2110     /* Read version before ram_list.blocks */
2111     smp_rmb();
2112
2113     ram_control_before_iterate(f, RAM_CONTROL_ROUND);
2114
2115     t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2116     i = 0;
2117     while ((ret = qemu_file_rate_limit(f)) == 0) {
2118         int pages;
2119
2120         pages = ram_find_and_save_block(rs, false);
2121         /* no more pages to sent */
2122         if (pages == 0) {
2123             done = 1;
2124             break;
2125         }
2126         rs->iterations++;
2127
2128         /* we want to check in the 1st loop, just in case it was the 1st time
2129            and we had to sync the dirty bitmap.
2130            qemu_get_clock_ns() is a bit expensive, so we only check each some
2131            iterations
2132         */
2133         if ((i & 63) == 0) {
2134             uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
2135             if (t1 > MAX_WAIT) {
2136                 trace_ram_save_iterate_big_wait(t1, i);
2137                 break;
2138             }
2139         }
2140         i++;
2141     }
2142     flush_compressed_data(rs);
2143     rcu_read_unlock();
2144
2145     /*
2146      * Must occur before EOS (or any QEMUFile operation)
2147      * because of RDMA protocol.
2148      */
2149     ram_control_after_iterate(f, RAM_CONTROL_ROUND);
2150
2151     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2152     rs->bytes_transferred += 8;
2153
2154     ret = qemu_file_get_error(f);
2155     if (ret < 0) {
2156         return ret;
2157     }
2158
2159     return done;
2160 }
2161
2162 /**
2163  * ram_save_complete: function called to send the remaining amount of ram
2164  *
2165  * Returns zero to indicate success
2166  *
2167  * Called with iothread lock
2168  *
2169  * @f: QEMUFile where to send the data
2170  * @opaque: RAMState pointer
2171  */
2172 static int ram_save_complete(QEMUFile *f, void *opaque)
2173 {
2174     RAMState *rs = opaque;
2175
2176     rcu_read_lock();
2177
2178     if (!migration_in_postcopy()) {
2179         migration_bitmap_sync(rs);
2180     }
2181
2182     ram_control_before_iterate(f, RAM_CONTROL_FINISH);
2183
2184     /* try transferring iterative blocks of memory */
2185
2186     /* flush all remaining blocks regardless of rate limiting */
2187     while (true) {
2188         int pages;
2189
2190         pages = ram_find_and_save_block(rs, !migration_in_colo_state());
2191         /* no more blocks to sent */
2192         if (pages == 0) {
2193             break;
2194         }
2195     }
2196
2197     flush_compressed_data(rs);
2198     ram_control_after_iterate(f, RAM_CONTROL_FINISH);
2199
2200     rcu_read_unlock();
2201
2202     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
2203
2204     return 0;
2205 }
2206
2207 static void ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
2208                              uint64_t *non_postcopiable_pending,
2209                              uint64_t *postcopiable_pending)
2210 {
2211     RAMState *rs = opaque;
2212     uint64_t remaining_size;
2213
2214     remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2215
2216     if (!migration_in_postcopy() &&
2217         remaining_size < max_size) {
2218         qemu_mutex_lock_iothread();
2219         rcu_read_lock();
2220         migration_bitmap_sync(rs);
2221         rcu_read_unlock();
2222         qemu_mutex_unlock_iothread();
2223         remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
2224     }
2225
2226     /* We can do postcopy, and all the data is postcopiable */
2227     *postcopiable_pending += remaining_size;
2228 }
2229
2230 static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
2231 {
2232     unsigned int xh_len;
2233     int xh_flags;
2234     uint8_t *loaded_data;
2235
2236     if (!xbzrle_decoded_buf) {
2237         xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
2238     }
2239     loaded_data = xbzrle_decoded_buf;
2240
2241     /* extract RLE header */
2242     xh_flags = qemu_get_byte(f);
2243     xh_len = qemu_get_be16(f);
2244
2245     if (xh_flags != ENCODING_FLAG_XBZRLE) {
2246         error_report("Failed to load XBZRLE page - wrong compression!");
2247         return -1;
2248     }
2249
2250     if (xh_len > TARGET_PAGE_SIZE) {
2251         error_report("Failed to load XBZRLE page - len overflow!");
2252         return -1;
2253     }
2254     /* load data and decode */
2255     qemu_get_buffer_in_place(f, &loaded_data, xh_len);
2256
2257     /* decode RLE */
2258     if (xbzrle_decode_buffer(loaded_data, xh_len, host,
2259                              TARGET_PAGE_SIZE) == -1) {
2260         error_report("Failed to load XBZRLE page - decode error!");
2261         return -1;
2262     }
2263
2264     return 0;
2265 }
2266
2267 /**
2268  * ram_block_from_stream: read a RAMBlock id from the migration stream
2269  *
2270  * Must be called from within a rcu critical section.
2271  *
2272  * Returns a pointer from within the RCU-protected ram_list.
2273  *
2274  * @f: QEMUFile where to read the data from
2275  * @flags: Page flags (mostly to see if it's a continuation of previous block)
2276  */
2277 static inline RAMBlock *ram_block_from_stream(QEMUFile *f, int flags)
2278 {
2279     static RAMBlock *block = NULL;
2280     char id[256];
2281     uint8_t len;
2282
2283     if (flags & RAM_SAVE_FLAG_CONTINUE) {
2284         if (!block) {
2285             error_report("Ack, bad migration stream!");
2286             return NULL;
2287         }
2288         return block;
2289     }
2290
2291     len = qemu_get_byte(f);
2292     qemu_get_buffer(f, (uint8_t *)id, len);
2293     id[len] = 0;
2294
2295     block = qemu_ram_block_by_name(id);
2296     if (!block) {
2297         error_report("Can't find block %s", id);
2298         return NULL;
2299     }
2300
2301     return block;
2302 }
2303
2304 static inline void *host_from_ram_block_offset(RAMBlock *block,
2305                                                ram_addr_t offset)
2306 {
2307     if (!offset_in_ramblock(block, offset)) {
2308         return NULL;
2309     }
2310
2311     return block->host + offset;
2312 }
2313
2314 /**
2315  * ram_handle_compressed: handle the zero page case
2316  *
2317  * If a page (or a whole RDMA chunk) has been
2318  * determined to be zero, then zap it.
2319  *
2320  * @host: host address for the zero page
2321  * @ch: what the page is filled from.  We only support zero
2322  * @size: size of the zero page
2323  */
2324 void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
2325 {
2326     if (ch != 0 || !is_zero_range(host, size)) {
2327         memset(host, ch, size);
2328     }
2329 }
2330
2331 static void *do_data_decompress(void *opaque)
2332 {
2333     DecompressParam *param = opaque;
2334     unsigned long pagesize;
2335     uint8_t *des;
2336     int len;
2337
2338     qemu_mutex_lock(&param->mutex);
2339     while (!param->quit) {
2340         if (param->des) {
2341             des = param->des;
2342             len = param->len;
2343             param->des = 0;
2344             qemu_mutex_unlock(&param->mutex);
2345
2346             pagesize = TARGET_PAGE_SIZE;
2347             /* uncompress() will return failed in some case, especially
2348              * when the page is dirted when doing the compression, it's
2349              * not a problem because the dirty page will be retransferred
2350              * and uncompress() won't break the data in other pages.
2351              */
2352             uncompress((Bytef *)des, &pagesize,
2353                        (const Bytef *)param->compbuf, len);
2354
2355             qemu_mutex_lock(&decomp_done_lock);
2356             param->done = true;
2357             qemu_cond_signal(&decomp_done_cond);
2358             qemu_mutex_unlock(&decomp_done_lock);
2359
2360             qemu_mutex_lock(&param->mutex);
2361         } else {
2362             qemu_cond_wait(&param->cond, &param->mutex);
2363         }
2364     }
2365     qemu_mutex_unlock(&param->mutex);
2366
2367     return NULL;
2368 }
2369
2370 static void wait_for_decompress_done(void)
2371 {
2372     int idx, thread_count;
2373
2374     if (!migrate_use_compression()) {
2375         return;
2376     }
2377
2378     thread_count = migrate_decompress_threads();
2379     qemu_mutex_lock(&decomp_done_lock);
2380     for (idx = 0; idx < thread_count; idx++) {
2381         while (!decomp_param[idx].done) {
2382             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2383         }
2384     }
2385     qemu_mutex_unlock(&decomp_done_lock);
2386 }
2387
2388 void migrate_decompress_threads_create(void)
2389 {
2390     int i, thread_count;
2391
2392     thread_count = migrate_decompress_threads();
2393     decompress_threads = g_new0(QemuThread, thread_count);
2394     decomp_param = g_new0(DecompressParam, thread_count);
2395     qemu_mutex_init(&decomp_done_lock);
2396     qemu_cond_init(&decomp_done_cond);
2397     for (i = 0; i < thread_count; i++) {
2398         qemu_mutex_init(&decomp_param[i].mutex);
2399         qemu_cond_init(&decomp_param[i].cond);
2400         decomp_param[i].compbuf = g_malloc0(compressBound(TARGET_PAGE_SIZE));
2401         decomp_param[i].done = true;
2402         decomp_param[i].quit = false;
2403         qemu_thread_create(decompress_threads + i, "decompress",
2404                            do_data_decompress, decomp_param + i,
2405                            QEMU_THREAD_JOINABLE);
2406     }
2407 }
2408
2409 void migrate_decompress_threads_join(void)
2410 {
2411     int i, thread_count;
2412
2413     thread_count = migrate_decompress_threads();
2414     for (i = 0; i < thread_count; i++) {
2415         qemu_mutex_lock(&decomp_param[i].mutex);
2416         decomp_param[i].quit = true;
2417         qemu_cond_signal(&decomp_param[i].cond);
2418         qemu_mutex_unlock(&decomp_param[i].mutex);
2419     }
2420     for (i = 0; i < thread_count; i++) {
2421         qemu_thread_join(decompress_threads + i);
2422         qemu_mutex_destroy(&decomp_param[i].mutex);
2423         qemu_cond_destroy(&decomp_param[i].cond);
2424         g_free(decomp_param[i].compbuf);
2425     }
2426     g_free(decompress_threads);
2427     g_free(decomp_param);
2428     decompress_threads = NULL;
2429     decomp_param = NULL;
2430 }
2431
2432 static void decompress_data_with_multi_threads(QEMUFile *f,
2433                                                void *host, int len)
2434 {
2435     int idx, thread_count;
2436
2437     thread_count = migrate_decompress_threads();
2438     qemu_mutex_lock(&decomp_done_lock);
2439     while (true) {
2440         for (idx = 0; idx < thread_count; idx++) {
2441             if (decomp_param[idx].done) {
2442                 decomp_param[idx].done = false;
2443                 qemu_mutex_lock(&decomp_param[idx].mutex);
2444                 qemu_get_buffer(f, decomp_param[idx].compbuf, len);
2445                 decomp_param[idx].des = host;
2446                 decomp_param[idx].len = len;
2447                 qemu_cond_signal(&decomp_param[idx].cond);
2448                 qemu_mutex_unlock(&decomp_param[idx].mutex);
2449                 break;
2450             }
2451         }
2452         if (idx < thread_count) {
2453             break;
2454         } else {
2455             qemu_cond_wait(&decomp_done_cond, &decomp_done_lock);
2456         }
2457     }
2458     qemu_mutex_unlock(&decomp_done_lock);
2459 }
2460
2461 /**
2462  * ram_postcopy_incoming_init: allocate postcopy data structures
2463  *
2464  * Returns 0 for success and negative if there was one error
2465  *
2466  * @mis: current migration incoming state
2467  *
2468  * Allocate data structures etc needed by incoming migration with
2469  * postcopy-ram. postcopy-ram's similarly names
2470  * postcopy_ram_incoming_init does the work.
2471  */
2472 int ram_postcopy_incoming_init(MigrationIncomingState *mis)
2473 {
2474     size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
2475
2476     return postcopy_ram_incoming_init(mis, ram_pages);
2477 }
2478
2479 /**
2480  * ram_load_postcopy: load a page in postcopy case
2481  *
2482  * Returns 0 for success or -errno in case of error
2483  *
2484  * Called in postcopy mode by ram_load().
2485  * rcu_read_lock is taken prior to this being called.
2486  *
2487  * @f: QEMUFile where to send the data
2488  */
2489 static int ram_load_postcopy(QEMUFile *f)
2490 {
2491     int flags = 0, ret = 0;
2492     bool place_needed = false;
2493     bool matching_page_sizes = false;
2494     MigrationIncomingState *mis = migration_incoming_get_current();
2495     /* Temporary page that is later 'placed' */
2496     void *postcopy_host_page = postcopy_get_tmp_page(mis);
2497     void *last_host = NULL;
2498     bool all_zero = false;
2499
2500     while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2501         ram_addr_t addr;
2502         void *host = NULL;
2503         void *page_buffer = NULL;
2504         void *place_source = NULL;
2505         RAMBlock *block = NULL;
2506         uint8_t ch;
2507
2508         addr = qemu_get_be64(f);
2509         flags = addr & ~TARGET_PAGE_MASK;
2510         addr &= TARGET_PAGE_MASK;
2511
2512         trace_ram_load_postcopy_loop((uint64_t)addr, flags);
2513         place_needed = false;
2514         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE)) {
2515             block = ram_block_from_stream(f, flags);
2516
2517             host = host_from_ram_block_offset(block, addr);
2518             if (!host) {
2519                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2520                 ret = -EINVAL;
2521                 break;
2522             }
2523             matching_page_sizes = block->page_size == TARGET_PAGE_SIZE;
2524             /*
2525              * Postcopy requires that we place whole host pages atomically;
2526              * these may be huge pages for RAMBlocks that are backed by
2527              * hugetlbfs.
2528              * To make it atomic, the data is read into a temporary page
2529              * that's moved into place later.
2530              * The migration protocol uses,  possibly smaller, target-pages
2531              * however the source ensures it always sends all the components
2532              * of a host page in order.
2533              */
2534             page_buffer = postcopy_host_page +
2535                           ((uintptr_t)host & (block->page_size - 1));
2536             /* If all TP are zero then we can optimise the place */
2537             if (!((uintptr_t)host & (block->page_size - 1))) {
2538                 all_zero = true;
2539             } else {
2540                 /* not the 1st TP within the HP */
2541                 if (host != (last_host + TARGET_PAGE_SIZE)) {
2542                     error_report("Non-sequential target page %p/%p",
2543                                   host, last_host);
2544                     ret = -EINVAL;
2545                     break;
2546                 }
2547             }
2548
2549
2550             /*
2551              * If it's the last part of a host page then we place the host
2552              * page
2553              */
2554             place_needed = (((uintptr_t)host + TARGET_PAGE_SIZE) &
2555                                      (block->page_size - 1)) == 0;
2556             place_source = postcopy_host_page;
2557         }
2558         last_host = host;
2559
2560         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2561         case RAM_SAVE_FLAG_COMPRESS:
2562             ch = qemu_get_byte(f);
2563             memset(page_buffer, ch, TARGET_PAGE_SIZE);
2564             if (ch) {
2565                 all_zero = false;
2566             }
2567             break;
2568
2569         case RAM_SAVE_FLAG_PAGE:
2570             all_zero = false;
2571             if (!place_needed || !matching_page_sizes) {
2572                 qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE);
2573             } else {
2574                 /* Avoids the qemu_file copy during postcopy, which is
2575                  * going to do a copy later; can only do it when we
2576                  * do this read in one go (matching page sizes)
2577                  */
2578                 qemu_get_buffer_in_place(f, (uint8_t **)&place_source,
2579                                          TARGET_PAGE_SIZE);
2580             }
2581             break;
2582         case RAM_SAVE_FLAG_EOS:
2583             /* normal exit */
2584             break;
2585         default:
2586             error_report("Unknown combination of migration flags: %#x"
2587                          " (postcopy mode)", flags);
2588             ret = -EINVAL;
2589         }
2590
2591         if (place_needed) {
2592             /* This gets called at the last target page in the host page */
2593             void *place_dest = host + TARGET_PAGE_SIZE - block->page_size;
2594
2595             if (all_zero) {
2596                 ret = postcopy_place_page_zero(mis, place_dest,
2597                                                block->page_size);
2598             } else {
2599                 ret = postcopy_place_page(mis, place_dest,
2600                                           place_source, block->page_size);
2601             }
2602         }
2603         if (!ret) {
2604             ret = qemu_file_get_error(f);
2605         }
2606     }
2607
2608     return ret;
2609 }
2610
2611 static int ram_load(QEMUFile *f, void *opaque, int version_id)
2612 {
2613     int flags = 0, ret = 0;
2614     static uint64_t seq_iter;
2615     int len = 0;
2616     /*
2617      * If system is running in postcopy mode, page inserts to host memory must
2618      * be atomic
2619      */
2620     bool postcopy_running = postcopy_state_get() >= POSTCOPY_INCOMING_LISTENING;
2621     /* ADVISE is earlier, it shows the source has the postcopy capability on */
2622     bool postcopy_advised = postcopy_state_get() >= POSTCOPY_INCOMING_ADVISE;
2623
2624     seq_iter++;
2625
2626     if (version_id != 4) {
2627         ret = -EINVAL;
2628     }
2629
2630     /* This RCU critical section can be very long running.
2631      * When RCU reclaims in the code start to become numerous,
2632      * it will be necessary to reduce the granularity of this
2633      * critical section.
2634      */
2635     rcu_read_lock();
2636
2637     if (postcopy_running) {
2638         ret = ram_load_postcopy(f);
2639     }
2640
2641     while (!postcopy_running && !ret && !(flags & RAM_SAVE_FLAG_EOS)) {
2642         ram_addr_t addr, total_ram_bytes;
2643         void *host = NULL;
2644         uint8_t ch;
2645
2646         addr = qemu_get_be64(f);
2647         flags = addr & ~TARGET_PAGE_MASK;
2648         addr &= TARGET_PAGE_MASK;
2649
2650         if (flags & (RAM_SAVE_FLAG_COMPRESS | RAM_SAVE_FLAG_PAGE |
2651                      RAM_SAVE_FLAG_COMPRESS_PAGE | RAM_SAVE_FLAG_XBZRLE)) {
2652             RAMBlock *block = ram_block_from_stream(f, flags);
2653
2654             host = host_from_ram_block_offset(block, addr);
2655             if (!host) {
2656                 error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
2657                 ret = -EINVAL;
2658                 break;
2659             }
2660         }
2661
2662         switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
2663         case RAM_SAVE_FLAG_MEM_SIZE:
2664             /* Synchronize RAM block list */
2665             total_ram_bytes = addr;
2666             while (!ret && total_ram_bytes) {
2667                 RAMBlock *block;
2668                 char id[256];
2669                 ram_addr_t length;
2670
2671                 len = qemu_get_byte(f);
2672                 qemu_get_buffer(f, (uint8_t *)id, len);
2673                 id[len] = 0;
2674                 length = qemu_get_be64(f);
2675
2676                 block = qemu_ram_block_by_name(id);
2677                 if (block) {
2678                     if (length != block->used_length) {
2679                         Error *local_err = NULL;
2680
2681                         ret = qemu_ram_resize(block, length,
2682                                               &local_err);
2683                         if (local_err) {
2684                             error_report_err(local_err);
2685                         }
2686                     }
2687                     /* For postcopy we need to check hugepage sizes match */
2688                     if (postcopy_advised &&
2689                         block->page_size != qemu_host_page_size) {
2690                         uint64_t remote_page_size = qemu_get_be64(f);
2691                         if (remote_page_size != block->page_size) {
2692                             error_report("Mismatched RAM page size %s "
2693                                          "(local) %zd != %" PRId64,
2694                                          id, block->page_size,
2695                                          remote_page_size);
2696                             ret = -EINVAL;
2697                         }
2698                     }
2699                     ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
2700                                           block->idstr);
2701                 } else {
2702                     error_report("Unknown ramblock \"%s\", cannot "
2703                                  "accept migration", id);
2704                     ret = -EINVAL;
2705                 }
2706
2707                 total_ram_bytes -= length;
2708             }
2709             break;
2710
2711         case RAM_SAVE_FLAG_COMPRESS:
2712             ch = qemu_get_byte(f);
2713             ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
2714             break;
2715
2716         case RAM_SAVE_FLAG_PAGE:
2717             qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
2718             break;
2719
2720         case RAM_SAVE_FLAG_COMPRESS_PAGE:
2721             len = qemu_get_be32(f);
2722             if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) {
2723                 error_report("Invalid compressed data length: %d", len);
2724                 ret = -EINVAL;
2725                 break;
2726             }
2727             decompress_data_with_multi_threads(f, host, len);
2728             break;
2729
2730         case RAM_SAVE_FLAG_XBZRLE:
2731             if (load_xbzrle(f, addr, host) < 0) {
2732                 error_report("Failed to decompress XBZRLE page at "
2733                              RAM_ADDR_FMT, addr);
2734                 ret = -EINVAL;
2735                 break;
2736             }
2737             break;
2738         case RAM_SAVE_FLAG_EOS:
2739             /* normal exit */
2740             break;
2741         default:
2742             if (flags & RAM_SAVE_FLAG_HOOK) {
2743                 ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
2744             } else {
2745                 error_report("Unknown combination of migration flags: %#x",
2746                              flags);
2747                 ret = -EINVAL;
2748             }
2749         }
2750         if (!ret) {
2751             ret = qemu_file_get_error(f);
2752         }
2753     }
2754
2755     wait_for_decompress_done();
2756     rcu_read_unlock();
2757     trace_ram_load_complete(ret, seq_iter);
2758     return ret;
2759 }
2760
2761 static SaveVMHandlers savevm_ram_handlers = {
2762     .save_live_setup = ram_save_setup,
2763     .save_live_iterate = ram_save_iterate,
2764     .save_live_complete_postcopy = ram_save_complete,
2765     .save_live_complete_precopy = ram_save_complete,
2766     .save_live_pending = ram_save_pending,
2767     .load_state = ram_load,
2768     .cleanup = ram_migration_cleanup,
2769 };
2770
2771 void ram_mig_init(void)
2772 {
2773     qemu_mutex_init(&XBZRLE.lock);
2774     register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, &ram_state);
2775 }