#include "sysemu/balloon.h"
#include "qemu/error-report.h"
#include "trace.h"
+#include "hw/boards.h"
/* Arbitrary limit on size of each discard command,
* keeps them around ~200 bytes
static struct PostcopyBlocktimeContext *blocktime_context_new(void)
{
+ MachineState *ms = MACHINE(qdev_get_machine());
+ unsigned int smp_cpus = ms->smp.cpus;
PostcopyBlocktimeContext *ctx = g_new0(PostcopyBlocktimeContext, 1);
ctx->page_fault_vcpu_time = g_new0(uint32_t, smp_cpus);
ctx->vcpu_addr = g_new0(uintptr_t, smp_cpus);
static uint32List *get_vcpu_blocktime_list(PostcopyBlocktimeContext *ctx)
{
+ MachineState *ms = MACHINE(qdev_get_machine());
uint32List *list = NULL, *entry = NULL;
int i;
- for (i = smp_cpus - 1; i >= 0; i--) {
+ for (i = ms->smp.cpus - 1; i >= 0; i--) {
entry = g_new0(uint32List, 1);
entry->value = ctx->vcpu_blocktime[i];
entry->next = list;
/* Callback from postcopy_ram_supported_by_host block iterator.
*/
-static int test_ramblock_postcopiable(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int test_ramblock_postcopiable(RAMBlock *rb, void *opaque)
{
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
+ const char *block_name = qemu_ram_get_idstr(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
size_t pagesize = qemu_ram_pagesize(rb);
if (length % pagesize) {
}
/* We don't support postcopy with shared RAM yet */
- if (qemu_ram_foreach_block(test_ramblock_postcopiable, NULL)) {
+ if (foreach_not_ignored_block(test_ramblock_postcopiable, NULL)) {
goto out;
}
* must be done right at the start prior to pre-copy.
* opaque should be the MIS.
*/
-static int init_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int init_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
trace_postcopy_init_range(block_name, host_addr, offset, length);
/*
* At the end of migration, undo the effects of init_range
* opaque should be the MIS.
*/
-static int cleanup_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int cleanup_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
MigrationIncomingState *mis = opaque;
struct uffdio_range range_struct;
trace_postcopy_cleanup_range(block_name, host_addr, offset, length);
* postcopy later; must be called prior to any precopy.
* called from arch_init's similarly named ram_postcopy_incoming_init
*/
-int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+int postcopy_ram_incoming_init(MigrationIncomingState *mis)
{
- if (qemu_ram_foreach_block(init_range, NULL)) {
+ if (foreach_not_ignored_block(init_range, NULL)) {
return -1;
}
return 0;
}
+/*
+ * Manage a single vote to the QEMU balloon inhibitor for all postcopy usage,
+ * last caller wins.
+ */
+static void postcopy_balloon_inhibit(bool state)
+{
+ static bool cur_state = false;
+
+ if (state != cur_state) {
+ qemu_balloon_inhibit(state);
+ cur_state = state;
+ }
+}
+
/*
* At the end of a migration where postcopy_ram_incoming_init was called.
*/
if (mis->have_fault_thread) {
Error *local_err = NULL;
+ /* Let the fault thread quit */
+ atomic_set(&mis->fault_thread_quit, 1);
+ postcopy_fault_thread_notify(mis);
+ trace_postcopy_ram_incoming_cleanup_join();
+ qemu_thread_join(&mis->fault_thread);
+
if (postcopy_notify(POSTCOPY_NOTIFY_INBOUND_END, &local_err)) {
error_report_err(local_err);
return -1;
}
- if (qemu_ram_foreach_block(cleanup_range, mis)) {
+ if (foreach_not_ignored_block(cleanup_range, mis)) {
return -1;
}
- /* Let the fault thread quit */
- atomic_set(&mis->fault_thread_quit, 1);
- postcopy_fault_thread_notify(mis);
- trace_postcopy_ram_incoming_cleanup_join();
- qemu_thread_join(&mis->fault_thread);
trace_postcopy_ram_incoming_cleanup_closeuf();
close(mis->userfault_fd);
mis->have_fault_thread = false;
}
- qemu_balloon_inhibit(false);
+ postcopy_balloon_inhibit(false);
if (enable_mlock) {
if (os_mlock() < 0) {
/*
* Disable huge pages on an area
*/
-static int nhp_range(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length, void *opaque)
+static int nhp_range(RAMBlock *rb, void *opaque)
{
+ const char *block_name = qemu_ram_get_idstr(rb);
+ void *host_addr = qemu_ram_get_host_addr(rb);
+ ram_addr_t offset = qemu_ram_get_offset(rb);
+ ram_addr_t length = qemu_ram_get_used_length(rb);
trace_postcopy_nhp_range(block_name, host_addr, offset, length);
/*
*/
int postcopy_ram_prepare_discard(MigrationIncomingState *mis)
{
- if (qemu_ram_foreach_block(nhp_range, mis)) {
+ if (foreach_not_ignored_block(nhp_range, mis)) {
return -1;
}
/*
* Mark the given area of RAM as requiring notification to unwritten areas
- * Used as a callback on qemu_ram_foreach_block.
+ * Used as a callback on foreach_not_ignored_block.
* host_addr: Base of area to mark
* offset: Offset in the whole ram arena
* length: Length of the section
* opaque: MigrationIncomingState pointer
* Returns 0 on success
*/
-static int ram_block_enable_notify(const char *block_name, void *host_addr,
- ram_addr_t offset, ram_addr_t length,
- void *opaque)
+static int ram_block_enable_notify(RAMBlock *rb, void *opaque)
{
MigrationIncomingState *mis = opaque;
struct uffdio_register reg_struct;
- reg_struct.range.start = (uintptr_t)host_addr;
- reg_struct.range.len = length;
+ reg_struct.range.start = (uintptr_t)qemu_ram_get_host_addr(rb);
+ reg_struct.range.len = qemu_ram_get_used_length(rb);
reg_struct.mode = UFFDIO_REGISTER_MODE_MISSING;
/* Now tell our userfault_fd that it's responsible for this area */
return -1;
}
if (reg_struct.ioctls & ((__u64)1 << _UFFDIO_ZEROPAGE)) {
- RAMBlock *rb = qemu_ram_block_by_name(block_name);
qemu_ram_set_uf_zeroable(rb);
}
{
MigrationIncomingState *mis = migration_incoming_get_current();
PostcopyBlocktimeContext *dc = mis->blocktime_ctx;
+ MachineState *ms = MACHINE(qdev_get_machine());
+ unsigned int smp_cpus = ms->smp.cpus;
int i, affected_cpu = 0;
bool vcpu_total_blocktime = false;
uint32_t read_vcpu_time, low_time_offset;
affected_cpu);
}
+static bool postcopy_pause_fault_thread(MigrationIncomingState *mis)
+{
+ trace_postcopy_pause_fault_thread();
+
+ qemu_sem_wait(&mis->postcopy_pause_sem_fault);
+
+ trace_postcopy_pause_fault_thread_continued();
+
+ return true;
+}
+
/*
* Handle faults detected by the USERFAULT markings
*/
RAMBlock *rb = NULL;
trace_postcopy_ram_fault_thread_entry();
+ rcu_register_thread();
mis->last_rb = NULL; /* last RAMBlock we sent part of */
qemu_sem_post(&mis->fault_thread_sem);
break;
}
+ if (!mis->to_src_file) {
+ /*
+ * Possibly someone tells us that the return path is
+ * broken already using the event. We should hold until
+ * the channel is rebuilt.
+ */
+ if (postcopy_pause_fault_thread(mis)) {
+ mis->last_rb = NULL;
+ /* Continue to read the userfaultfd */
+ } else {
+ error_report("%s: paused but don't allow to continue",
+ __func__);
+ break;
+ }
+ }
+
if (pfd[1].revents) {
uint64_t tmp64 = 0;
(uintptr_t)(msg.arg.pagefault.address),
msg.arg.pagefault.feat.ptid, rb);
+retry:
/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != mis->last_rb) {
mis->last_rb = rb;
- migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
- rb_offset, qemu_ram_pagesize(rb));
+ ret = migrate_send_rp_req_pages(mis,
+ qemu_ram_get_idstr(rb),
+ rb_offset,
+ qemu_ram_pagesize(rb));
} else {
/* Save some space */
- migrate_send_rp_req_pages(mis, NULL,
- rb_offset, qemu_ram_pagesize(rb));
+ ret = migrate_send_rp_req_pages(mis,
+ NULL,
+ rb_offset,
+ qemu_ram_pagesize(rb));
+ }
+
+ if (ret) {
+ /* May be network failure, try to wait for recovery */
+ if (ret == -EIO && postcopy_pause_fault_thread(mis)) {
+ /* We got reconnected somehow, try to continue */
+ mis->last_rb = NULL;
+ goto retry;
+ } else {
+ /* This is a unavoidable fault */
+ error_report("%s: migrate_send_rp_req_pages() get %d",
+ __func__, ret);
+ break;
+ }
}
}
}
}
}
+ rcu_unregister_thread();
trace_postcopy_ram_fault_thread_exit();
g_free(pfd);
return NULL;
mis->have_fault_thread = true;
/* Mark so that we get notified of accesses to unwritten areas */
- if (qemu_ram_foreach_block(ram_block_enable_notify, mis)) {
+ if (foreach_not_ignored_block(ram_block_enable_notify, mis)) {
+ error_report("ram_block_enable_notify failed");
return -1;
}
* Ballooning can mark pages as absent while we're postcopying
* that would cause false userfaults.
*/
- qemu_balloon_inhibit(true);
+ postcopy_balloon_inhibit(true);
trace_postcopy_ram_enable_notify();
return false;
}
-int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages)
+int postcopy_ram_incoming_init(MigrationIncomingState *mis)
{
error_report("postcopy_ram_incoming_init: No OS support");
return -1;