migration/postcopy-ram.c

   1 /*
   2  * Postcopy migration for RAM
   3  *
   4  * Copyright 2013-2015 Red Hat, Inc. and/or its affiliates
   5  *
   6  * Authors:
   7  *  Dave Gilbert  <[email protected]>
   8  *
   9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  10  * See the COPYING file in the top-level directory.
  11  *
  12  */
  13
  14 /*
  15  * Postcopy is a migration technique where the execution flips from the
  16  * source to the destination before all the data has been copied.
  17  */
  18
  19 #include "qemu/osdep.h"
  20
  21 #include "qemu-common.h"
  22 #include "migration/migration.h"
  23 #include "postcopy-ram.h"
  24 #include "sysemu/sysemu.h"
  25 #include "sysemu/balloon.h"
  26 #include "qemu/error-report.h"
  27 #include "trace.h"
  28
  29 /* Arbitrary limit on size of each discard command,
  30  * keeps them around ~200 bytes
  31  */
  32 #define MAX_DISCARDS_PER_COMMAND 12
  33
  34 struct PostcopyDiscardState {
  35     const char *ramblock_name;
  36     uint16_t cur_entry;
  37     /*
  38      * Start and length of a discard range (bytes)
  39      */
  40     uint64_t start_list[MAX_DISCARDS_PER_COMMAND];
  41     uint64_t length_list[MAX_DISCARDS_PER_COMMAND];
  42     unsigned int nsentwords;
  43     unsigned int nsentcmds;
  44 };
  45
  46 /* Postcopy needs to detect accesses to pages that haven't yet been copied
  47  * across, and efficiently map new pages in, the techniques for doing this
  48  * are target OS specific.
  49  */
  50 #if defined(__linux__)
  51
  52 #include <poll.h>
  53 #include <sys/ioctl.h>
  54 #include <sys/syscall.h>
  55 #include <asm/types.h> /* for __u64 */
  56 #endif
  57
  58 #if defined(__linux__) && defined(__NR_userfaultfd) && defined(CONFIG_EVENTFD)
  59 #include <sys/eventfd.h>
  60 #include <linux/userfaultfd.h>
  61
  62 static bool ufd_version_check(int ufd)
  63 {
  64     struct uffdio_api api_struct;
  65     uint64_t ioctl_mask;
  66
  67     api_struct.api = UFFD_API;
  68     api_struct.features = 0;
  69     if (ioctl(ufd, UFFDIO_API, &api_struct)) {
  70         error_report("postcopy_ram_supported_by_host: UFFDIO_API failed: %s",
  71                      strerror(errno));
  72         return false;
  73     }
  74
  75     ioctl_mask = (__u64)1 << _UFFDIO_REGISTER |
  76                  (__u64)1 << _UFFDIO_UNREGISTER;
  77     if ((api_struct.ioctls & ioctl_mask) != ioctl_mask) {
  78         error_report("Missing userfault features: %" PRIx64,
  79                      (uint64_t)(~api_struct.ioctls & ioctl_mask));
  80         return false;
  81     }
  82
  83     if (getpagesize() != ram_pagesize_summary()) {
  84         bool have_hp = false;
  85         /* We've got a huge page */
  86 #ifdef UFFD_FEATURE_MISSING_HUGETLBFS
  87         have_hp = api_struct.features & UFFD_FEATURE_MISSING_HUGETLBFS;
  88 #endif
  89         if (!have_hp) {
  90             error_report("Userfault on this host does not support huge pages");
  91             return false;
  92         }
  93     }
  94     return true;
  95 }
  96
  97 /* Callback from postcopy_ram_supported_by_host block iterator.
  98  */
  99 static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
 100                              ram_addr_t offset, ram_addr_t length, void *opaque)
 101 {
 102     RAMBlock *rb = qemu_ram_block_by_name(block_name);
 103     size_t pagesize = qemu_ram_pagesize(rb);
 104
 105     if (qemu_ram_is_shared(rb)) {
 106         error_report("Postcopy on shared RAM (%s) is not yet supported",
 107                      block_name);
 108         return 1;
 109     }
 110
 111     if (length % pagesize) {
 112         error_report("Postcopy requires RAM blocks to be a page size multiple,"
 113                      " block %s is 0x" RAM_ADDR_FMT " bytes with a "
 114                      "page size of 0x%zx", block_name, length, pagesize);
 115         return 1;
 116     }
 117     return 0;
 118 }
 119
 120 /*
 121  * Note: This has the side effect of munlock'ing all of RAM, that's
 122  * normally fine since if the postcopy succeeds it gets turned back on at the
 123  * end.
 124  */
 125 bool postcopy_ram_supported_by_host(void)
 126 {
 127     long pagesize = getpagesize();
 128     int ufd = -1;
 129     bool ret = false; /* Error unless we change it */
 130     void *testarea = NULL;
 131     struct uffdio_register reg_struct;
 132     struct uffdio_range range_struct;
 133     uint64_t feature_mask;
 134
 135     if (qemu_target_page_size() > pagesize) {
 136         error_report("Target page size bigger than host page size");
 137         goto out;
 138     }
 139
 140     ufd = syscall(__NR_userfaultfd, O_CLOEXEC);
 141     if (ufd == -1) {
 142         error_report("%s: userfaultfd not available: %s", __func__,
 143                      strerror(errno));
 144         goto out;
 145     }
 146
 147     /* Version and features check */
 148     if (!ufd_version_check(ufd)) {
 149         goto out;
 150     }
 151
 152     /* We don't support postcopy with shared RAM yet */
 153     if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
 154         goto out;
 155     }
 156
 157     /*
 158      * userfault and mlock don't go together; we'll put it back later if
 159      * it was enabled.
 160      */
 161     if (munlockall()) {
 162         error_report("%s: munlockall: %s", __func__,  strerror(errno));
 163         return -1;
 164     }
 165
 166     /*
 167      *  We need to check that the ops we need are supported on anon memory
 168      *  To do that we need to register a chunk and see the flags that
 169      *  are returned.
 170      */
 171     testarea = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE |
 172                                     MAP_ANONYMOUS, -1, 0);
 173     if (testarea == MAP_FAILED) {
 174         error_report("%s: Failed to map test area: %s", __func__,
 175                      strerror(errno));
 176         goto out;
 177     }
 178     g_assert(((size_t)testarea & (pagesize-1)) == 0);
 179
 180     reg_struct.range.start = (uintptr_t)testarea;
 181     reg_struct.range.len = pagesize;
 182     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 183
 184     if (ioctl(ufd, UFFDIO_REGISTER, &reg_struct)) {
 185         error_report("%s userfault register: %s", __func__, strerror(errno));
 186         goto out;
 187     }
 188
 189     range_struct.start = (uintptr_t)testarea;
 190     range_struct.len = pagesize;
 191     if (ioctl(ufd, UFFDIO_UNREGISTER, &range_struct)) {
 192         error_report("%s userfault unregister: %s", __func__, strerror(errno));
 193         goto out;
 194     }
 195
 196     feature_mask = (__u64)1 << _UFFDIO_WAKE |
 197                    (__u64)1 << _UFFDIO_COPY |
 198                    (__u64)1 << _UFFDIO_ZEROPAGE;
 199     if ((reg_struct.ioctls & feature_mask) != feature_mask) {
 200         error_report("Missing userfault map features: %" PRIx64,
 201                      (uint64_t)(~reg_struct.ioctls & feature_mask));
 202         goto out;
 203     }
 204
 205     /* Success! */
 206     ret = true;
 207 out:
 208     if (testarea) {
 209         munmap(testarea, pagesize);
 210     }
 211     if (ufd != -1) {
 212         close(ufd);
 213     }
 214     return ret;
 215 }
 216
 217 /*
 218  * Setup an area of RAM so that it *can* be used for postcopy later; this
 219  * must be done right at the start prior to pre-copy.
 220  * opaque should be the MIS.
 221  */
 222 static int init_range(const char *block_name, void *host_addr,
 223                       ram_addr_t offset, ram_addr_t length, void *opaque)
 224 {
 225     trace_postcopy_init_range(block_name, host_addr, offset, length);
 226
 227     /*
 228      * We need the whole of RAM to be truly empty for postcopy, so things
 229      * like ROMs and any data tables built during init must be zero'd
 230      * - we're going to get the copy from the source anyway.
 231      * (Precopy will just overwrite this data, so doesn't need the discard)
 232      */
 233     if (ram_discard_range(block_name, 0, length)) {
 234         return -1;
 235     }
 236
 237     return 0;
 238 }
 239
 240 /*
 241  * At the end of migration, undo the effects of init_range
 242  * opaque should be the MIS.
 243  */
 244 static int cleanup_range(const char *block_name, void *host_addr,
 245                         ram_addr_t offset, ram_addr_t length, void *opaque)
 246 {
 247     MigrationIncomingState *mis = opaque;
 248     struct uffdio_range range_struct;
 249     trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
 250
 251     /*
 252      * We turned off hugepage for the precopy stage with postcopy enabled
 253      * we can turn it back on now.
 254      */
 255     qemu_madvise(host_addr, length, QEMU_MADV_HUGEPAGE);
 256
 257     /*
 258      * We can also turn off userfault now since we should have all the
 259      * pages.   It can be useful to leave it on to debug postcopy
 260      * if you're not sure it's always getting every page.
 261      */
 262     range_struct.start = (uintptr_t)host_addr;
 263     range_struct.len = length;
 264
 265     if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) {
 266         error_report("%s: userfault unregister %s", __func__, strerror(errno));
 267
 268         return -1;
 269     }
 270
 271     return 0;
 272 }
 273
 274 /*
 275  * Initialise postcopy-ram, setting the RAM to a state where we can go into
 276  * postcopy later; must be called prior to any precopy.
 277  * called from arch_init's similarly named ram_postcopy_incoming_init
 278  */
 279 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 280 {
 281     if (qemu_ram_foreach_block(init_range, NULL)) {
 282         return -1;
 283     }
 284
 285     return 0;
 286 }
 287
 288 /*
 289  * At the end of a migration where postcopy_ram_incoming_init was called.
 290  */
 291 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 292 {
 293     trace_postcopy_ram_incoming_cleanup_entry();
 294
 295     if (mis->have_fault_thread) {
 296         uint64_t tmp64;
 297
 298         if (qemu_ram_foreach_block(cleanup_range, mis)) {
 299             return -1;
 300         }
 301         /*
 302          * Tell the fault_thread to exit, it's an eventfd that should
 303          * currently be at 0, we're going to increment it to 1
 304          */
 305         tmp64 = 1;
 306         if (write(mis->userfault_quit_fd, &tmp64, 8) == 8) {
 307             trace_postcopy_ram_incoming_cleanup_join();
 308             qemu_thread_join(&mis->fault_thread);
 309         } else {
 310             /* Not much we can do here, but may as well report it */
 311             error_report("%s: incrementing userfault_quit_fd: %s", __func__,
 312                          strerror(errno));
 313         }
 314         trace_postcopy_ram_incoming_cleanup_closeuf();
 315         close(mis->userfault_fd);
 316         close(mis->userfault_quit_fd);
 317         mis->have_fault_thread = false;
 318     }
 319
 320     qemu_balloon_inhibit(false);
 321
 322     if (enable_mlock) {
 323         if (os_mlock() < 0) {
 324             error_report("mlock: %s", strerror(errno));
 325             /*
 326              * It doesn't feel right to fail at this point, we have a valid
 327              * VM state.
 328              */
 329         }
 330     }
 331
 332     postcopy_state_set(POSTCOPY_INCOMING_END);
 333     migrate_send_rp_shut(mis, qemu_file_get_error(mis->from_src_file) != 0);
 334
 335     if (mis->postcopy_tmp_page) {
 336         munmap(mis->postcopy_tmp_page, mis->largest_page_size);
 337         mis->postcopy_tmp_page = NULL;
 338     }
 339     if (mis->postcopy_tmp_zero_page) {
 340         munmap(mis->postcopy_tmp_zero_page, mis->largest_page_size);
 341         mis->postcopy_tmp_zero_page = NULL;
 342     }
 343     trace_postcopy_ram_incoming_cleanup_exit();
 344     return 0;
 345 }
 346
 347 /*
 348  * Disable huge pages on an area
 349  */
 350 static int nhp_range(const char *block_name, void *host_addr,
 351                     ram_addr_t offset, ram_addr_t length, void *opaque)
 352 {
 353     trace_postcopy_nhp_range(block_name, host_addr, offset, length);
 354
 355     /*
 356      * Before we do discards we need to ensure those discards really
 357      * do delete areas of the page, even if THP thinks a hugepage would
 358      * be a good idea, so force hugepages off.
 359      */
 360     qemu_madvise(host_addr, length, QEMU_MADV_NOHUGEPAGE);
 361
 362     return 0;
 363 }
 364
 365 /*
 366  * Userfault requires us to mark RAM as NOHUGEPAGE prior to discard
 367  * however leaving it until after precopy means that most of the precopy
 368  * data is still THPd
 369  */
 370 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 371 {
 372     if (qemu_ram_foreach_block(nhp_range, mis)) {
 373         return -1;
 374     }
 375
 376     postcopy_state_set(POSTCOPY_INCOMING_DISCARD);
 377
 378     return 0;
 379 }
 380
 381 /*
 382  * Mark the given area of RAM as requiring notification to unwritten areas
 383  * Used as a  callback on qemu_ram_foreach_block.
 384  *   host_addr: Base of area to mark
 385  *   offset: Offset in the whole ram arena
 386  *   length: Length of the section
 387  *   opaque: MigrationIncomingState pointer
 388  * Returns 0 on success
 389  */
 390 static int ram_block_enable_notify(const char *block_name, void *host_addr,
 391                                    ram_addr_t offset, ram_addr_t length,
 392                                    void *opaque)
 393 {
 394     MigrationIncomingState *mis = opaque;
 395     struct uffdio_register reg_struct;
 396
 397     reg_struct.range.start = (uintptr_t)host_addr;
 398     reg_struct.range.len = length;
 399     reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
 400
 401     /* Now tell our userfault_fd that it's responsible for this area */
 402     if (ioctl(mis->userfault_fd, UFFDIO_REGISTER, &reg_struct)) {
 403         error_report("%s userfault register: %s", __func__, strerror(errno));
 404         return -1;
 405     }
 406     if (!(reg_struct.ioctls & ((__u64)1 << _UFFDIO_COPY))) {
 407         error_report("%s userfault: Region doesn't support COPY", __func__);
 408         return -1;
 409     }
 410
 411     return 0;
 412 }
 413
 414 /*
 415  * Handle faults detected by the USERFAULT markings
 416  */
 417 static void *postcopy_ram_fault_thread(void *opaque)
 418 {
 419     MigrationIncomingState *mis = opaque;
 420     struct uffd_msg msg;
 421     int ret;
 422     RAMBlock *rb = NULL;
 423     RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */
 424
 425     trace_postcopy_ram_fault_thread_entry();
 426     qemu_sem_post(&mis->fault_thread_sem);
 427
 428     while (true) {
 429         ram_addr_t rb_offset;
 430         struct pollfd pfd[2];
 431
 432         /*
 433          * We're mainly waiting for the kernel to give us a faulting HVA,
 434          * however we can be told to quit via userfault_quit_fd which is
 435          * an eventfd
 436          */
 437         pfd[0].fd = mis->userfault_fd;
 438         pfd[0].events = POLLIN;
 439         pfd[0].revents = 0;
 440         pfd[1].fd = mis->userfault_quit_fd;
 441         pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
 442         pfd[1].revents = 0;
 443
 444         if (poll(pfd, 2, -1 /* Wait forever */) == -1) {
 445             error_report("%s: userfault poll: %s", __func__, strerror(errno));
 446             break;
 447         }
 448
 449         if (pfd[1].revents) {
 450             trace_postcopy_ram_fault_thread_quit();
 451             break;
 452         }
 453
 454         ret = read(mis->userfault_fd, &msg, sizeof(msg));
 455         if (ret != sizeof(msg)) {
 456             if (errno == EAGAIN) {
 457                 /*
 458                  * if a wake up happens on the other thread just after
 459                  * the poll, there is nothing to read.
 460                  */
 461                 continue;
 462             }
 463             if (ret < 0) {
 464                 error_report("%s: Failed to read full userfault message: %s",
 465                              __func__, strerror(errno));
 466                 break;
 467             } else {
 468                 error_report("%s: Read %d bytes from userfaultfd expected %zd",
 469                              __func__, ret, sizeof(msg));
 470                 break; /* Lost alignment, don't know what we'd read next */
 471             }
 472         }
 473         if (msg.event != UFFD_EVENT_PAGEFAULT) {
 474             error_report("%s: Read unexpected event %ud from userfaultfd",
 475                          __func__, msg.event);
 476             continue; /* It's not a page fault, shouldn't happen */
 477         }
 478
 479         rb = qemu_ram_block_from_host(
 480                  (void *)(uintptr_t)msg.arg.pagefault.address,
 481                  true, &rb_offset);
 482         if (!rb) {
 483             error_report("postcopy_ram_fault_thread: Fault outside guest: %"
 484                          PRIx64, (uint64_t)msg.arg.pagefault.address);
 485             break;
 486         }
 487
 488         rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
 489         trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
 490                                                 qemu_ram_get_idstr(rb),
 491                                                 rb_offset);
 492
 493         /*
 494          * Send the request to the source - we want to request one
 495          * of our host page sizes (which is >= TPS)
 496          */
 497         if (rb != last_rb) {
 498             last_rb = rb;
 499             migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
 500                                      rb_offset, qemu_ram_pagesize(rb));
 501         } else {
 502             /* Save some space */
 503             migrate_send_rp_req_pages(mis, NULL,
 504                                      rb_offset, qemu_ram_pagesize(rb));
 505         }
 506     }
 507     trace_postcopy_ram_fault_thread_exit();
 508     return NULL;
 509 }
 510
 511 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 512 {
 513     /* Open the fd for the kernel to give us userfaults */
 514     mis->userfault_fd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
 515     if (mis->userfault_fd == -1) {
 516         error_report("%s: Failed to open userfault fd: %s", __func__,
 517                      strerror(errno));
 518         return -1;
 519     }
 520
 521     /*
 522      * Although the host check already tested the API, we need to
 523      * do the check again as an ABI handshake on the new fd.
 524      */
 525     if (!ufd_version_check(mis->userfault_fd)) {
 526         return -1;
 527     }
 528
 529     /* Now an eventfd we use to tell the fault-thread to quit */
 530     mis->userfault_quit_fd = eventfd(0, EFD_CLOEXEC);
 531     if (mis->userfault_quit_fd == -1) {
 532         error_report("%s: Opening userfault_quit_fd: %s", __func__,
 533                      strerror(errno));
 534         close(mis->userfault_fd);
 535         return -1;
 536     }
 537
 538     qemu_sem_init(&mis->fault_thread_sem, 0);
 539     qemu_thread_create(&mis->fault_thread, "postcopy/fault",
 540                        postcopy_ram_fault_thread, mis, QEMU_THREAD_JOINABLE);
 541     qemu_sem_wait(&mis->fault_thread_sem);
 542     qemu_sem_destroy(&mis->fault_thread_sem);
 543     mis->have_fault_thread = true;
 544
 545     /* Mark so that we get notified of accesses to unwritten areas */
 546     if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
 547         return -1;
 548     }
 549
 550     /*
 551      * Ballooning can mark pages as absent while we're postcopying
 552      * that would cause false userfaults.
 553      */
 554     qemu_balloon_inhibit(true);
 555
 556     trace_postcopy_ram_enable_notify();
 557
 558     return 0;
 559 }
 560
 561 /*
 562  * Place a host page (from) at (host) atomically
 563  * returns 0 on success
 564  */
 565 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 566                         size_t pagesize)
 567 {
 568     struct uffdio_copy copy_struct;
 569
 570     copy_struct.dst = (uint64_t)(uintptr_t)host;
 571     copy_struct.src = (uint64_t)(uintptr_t)from;
 572     copy_struct.len = pagesize;
 573     copy_struct.mode = 0;
 574
 575     /* copy also acks to the kernel waking the stalled thread up
 576      * TODO: We can inhibit that ack and only do it if it was requested
 577      * which would be slightly cheaper, but we'd have to be careful
 578      * of the order of updating our page state.
 579      */
 580     if (ioctl(mis->userfault_fd, UFFDIO_COPY, &copy_struct)) {
 581         int e = errno;
 582         error_report("%s: %s copy host: %p from: %p (size: %zd)",
 583                      __func__, strerror(e), host, from, pagesize);
 584
 585         return -e;
 586     }
 587
 588     trace_postcopy_place_page(host);
 589     return 0;
 590 }
 591
 592 /*
 593  * Place a zero page at (host) atomically
 594  * returns 0 on success
 595  */
 596 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 597                              size_t pagesize)
 598 {
 599     trace_postcopy_place_page_zero(host);
 600
 601     if (pagesize == getpagesize()) {
 602         struct uffdio_zeropage zero_struct;
 603         zero_struct.range.start = (uint64_t)(uintptr_t)host;
 604         zero_struct.range.len = getpagesize();
 605         zero_struct.mode = 0;
 606
 607         if (ioctl(mis->userfault_fd, UFFDIO_ZEROPAGE, &zero_struct)) {
 608             int e = errno;
 609             error_report("%s: %s zero host: %p",
 610                          __func__, strerror(e), host);
 611
 612             return -e;
 613         }
 614     } else {
 615         /* The kernel can't use UFFDIO_ZEROPAGE for hugepages */
 616         if (!mis->postcopy_tmp_zero_page) {
 617             mis->postcopy_tmp_zero_page = mmap(NULL, mis->largest_page_size,
 618                                                PROT_READ | PROT_WRITE,
 619                                                MAP_PRIVATE | MAP_ANONYMOUS,
 620                                                -1, 0);
 621             if (mis->postcopy_tmp_zero_page == MAP_FAILED) {
 622                 int e = errno;
 623                 mis->postcopy_tmp_zero_page = NULL;
 624                 error_report("%s: %s mapping large zero page",
 625                              __func__, strerror(e));
 626                 return -e;
 627             }
 628             memset(mis->postcopy_tmp_zero_page, '\0', mis->largest_page_size);
 629         }
 630         return postcopy_place_page(mis, host, mis->postcopy_tmp_zero_page,
 631                                    pagesize);
 632     }
 633
 634     return 0;
 635 }
 636
 637 /*
 638  * Returns a target page of memory that can be mapped at a later point in time
 639  * using postcopy_place_page
 640  * The same address is used repeatedly, postcopy_place_page just takes the
 641  * backing page away.
 642  * Returns: Pointer to allocated page
 643  *
 644  */
 645 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 646 {
 647     if (!mis->postcopy_tmp_page) {
 648         mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size,
 649                              PROT_READ | PROT_WRITE, MAP_PRIVATE |
 650                              MAP_ANONYMOUS, -1, 0);
 651         if (mis->postcopy_tmp_page == MAP_FAILED) {
 652             mis->postcopy_tmp_page = NULL;
 653             error_report("%s: %s", __func__, strerror(errno));
 654             return NULL;
 655         }
 656     }
 657
 658     return mis->postcopy_tmp_page;
 659 }
 660
 661 #else
 662 /* No target OS support, stubs just fail */
 663 bool postcopy_ram_supported_by_host(void)
 664 {
 665     error_report("%s: No OS support", __func__);
 666     return false;
 667 }
 668
 669 int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
 670 {
 671     error_report("postcopy_ram_incoming_init: No OS support");
 672     return -1;
 673 }
 674
 675 int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis)
 676 {
 677     assert(0);
 678     return -1;
 679 }
 680
 681 int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
 682 {
 683     assert(0);
 684     return -1;
 685 }
 686
 687 int postcopy_ram_enable_notify(MigrationIncomingState *mis)
 688 {
 689     assert(0);
 690     return -1;
 691 }
 692
 693 int postcopy_place_page(MigrationIncomingState *mis, void *host, void *from,
 694                         size_t pagesize)
 695 {
 696     assert(0);
 697     return -1;
 698 }
 699
 700 int postcopy_place_page_zero(MigrationIncomingState *mis, void *host,
 701                         size_t pagesize)
 702 {
 703     assert(0);
 704     return -1;
 705 }
 706
 707 void *postcopy_get_tmp_page(MigrationIncomingState *mis)
 708 {
 709     assert(0);
 710     return NULL;
 711 }
 712
 713 #endif
 714
 715 /* ------------------------------------------------------------------------- */
 716
 717 /**
 718  * postcopy_discard_send_init: Called at the start of each RAMBlock before
 719  *   asking to discard individual ranges.
 720  *
 721  * @ms: The current migration state.
 722  * @offset: the bitmap offset of the named RAMBlock in the migration
 723  *   bitmap.
 724  * @name: RAMBlock that discards will operate on.
 725  *
 726  * returns: a new PDS.
 727  */
 728 PostcopyDiscardState *postcopy_discard_send_init(MigrationState *ms,
 729                                                  const char *name)
 730 {
 731     PostcopyDiscardState *res = g_malloc0(sizeof(PostcopyDiscardState));
 732
 733     if (res) {
 734         res->ramblock_name = name;
 735     }
 736
 737     return res;
 738 }
 739
 740 /**
 741  * postcopy_discard_send_range: Called by the bitmap code for each chunk to
 742  *   discard. May send a discard message, may just leave it queued to
 743  *   be sent later.
 744  *
 745  * @ms: Current migration state.
 746  * @pds: Structure initialised by postcopy_discard_send_init().
 747  * @start,@length: a range of pages in the migration bitmap in the
 748  *   RAM block passed to postcopy_discard_send_init() (length=1 is one page)
 749  */
 750 void postcopy_discard_send_range(MigrationState *ms, PostcopyDiscardState *pds,
 751                                 unsigned long start, unsigned long length)
 752 {
 753     size_t tp_size = qemu_target_page_size();
 754     /* Convert to byte offsets within the RAM block */
 755     pds->start_list[pds->cur_entry] = start  * tp_size;
 756     pds->length_list[pds->cur_entry] = length * tp_size;
 757     trace_postcopy_discard_send_range(pds->ramblock_name, start, length);
 758     pds->cur_entry++;
 759     pds->nsentwords++;
 760
 761     if (pds->cur_entry == MAX_DISCARDS_PER_COMMAND) {
 762         /* Full set, ship it! */
 763         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 764                                               pds->ramblock_name,
 765                                               pds->cur_entry,
 766                                               pds->start_list,
 767                                               pds->length_list);
 768         pds->nsentcmds++;
 769         pds->cur_entry = 0;
 770     }
 771 }
 772
 773 /**
 774  * postcopy_discard_send_finish: Called at the end of each RAMBlock by the
 775  * bitmap code. Sends any outstanding discard messages, frees the PDS
 776  *
 777  * @ms: Current migration state.
 778  * @pds: Structure initialised by postcopy_discard_send_init().
 779  */
 780 void postcopy_discard_send_finish(MigrationState *ms, PostcopyDiscardState *pds)
 781 {
 782     /* Anything unsent? */
 783     if (pds->cur_entry) {
 784         qemu_savevm_send_postcopy_ram_discard(ms->to_dst_file,
 785                                               pds->ramblock_name,
 786                                               pds->cur_entry,
 787                                               pds->start_list,
 788                                               pds->length_list);
 789         pds->nsentcmds++;
 790     }
 791
 792     trace_postcopy_discard_send_finish(pds->ramblock_name, pds->nsentwords,
 793                                        pds->nsentcmds);
 794
 795     g_free(pds);
 796 }
 797
 798 /*
 799  * Current state of incoming postcopy; note this is not part of
 800  * MigrationIncomingState since it's state is used during cleanup
 801  * at the end as MIS is being freed.
 802  */
 803 static PostcopyState incoming_postcopy_state;
 804
 805 PostcopyState  postcopy_state_get(void)
 806 {
 807     return atomic_mb_read(&incoming_postcopy_state);
 808 }
 809
 810 /* Set the state and return the old state */
 811 PostcopyState postcopy_state_set(PostcopyState new_state)
 812 {
 813     return atomic_xchg(&incoming_postcopy_state, new_state);
 814 }