[qemu.git] / target / i386 / hax-mem.c

/*
 * HAX memory mapping operations
 *
 * Copyright (c) 2015-16 Intel Corporation
 * Copyright 2016 Google, Inc.
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 */

#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/address-spaces.h"
#include "qemu/error-report.h"

#include "target/i386/hax-i386.h"
#include "qemu/queue.h"

#define DEBUG_HAX_MEM 0

#define DPRINTF(fmt, ...) \
    do { \
        if (DEBUG_HAX_MEM) { \
            fprintf(stdout, fmt, ## __VA_ARGS__); \
        } \
    } while (0)

/**
 * HAXMapping: describes a pending guest physical memory mapping
 *
 * @start_pa: a guest physical address marking the start of the region; must be
 *            page-aligned
 * @size: a guest physical address marking the end of the region; must be
 *          page-aligned
 * @host_va: the host virtual address of the start of the mapping
 * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
 * @entry: additional fields for linking #HAXMapping instances together
 */
typedef struct HAXMapping {
    uint64_t start_pa;
    uint32_t size;
    uint64_t host_va;
    int flags;
    QTAILQ_ENTRY(HAXMapping) entry;
} HAXMapping;

/*
 * A doubly-linked list (actually a tail queue) of the pending page mappings
 * for the ongoing memory transaction.
 *
 * It is used to optimize the number of page mapping updates done through the
 * kernel module. For example, it's effective when a driver is digging an MMIO
 * hole inside an existing memory mapping. It will get a deletion of the whole
 * region, then the addition of the 2 remaining RAM areas around the hole and
 * finally the memory transaction commit. During the commit, it will effectively
 * send to the kernel only the removal of the pages from the MMIO hole after
 * having computed locally the result of the deletion and additions.
 */
static QTAILQ_HEAD(, HAXMapping) mappings =
    QTAILQ_HEAD_INITIALIZER(mappings);

/**
 * hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
 */
static void hax_mapping_dump_list(void)
{
    HAXMapping *entry;

    DPRINTF("%s updates:\n", __func__);
    QTAILQ_FOREACH(entry, &mappings, entry) {
        DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
                "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
                entry->start_pa, entry->start_pa + entry->size, entry->host_va,
                entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
    }
}

static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
                                      uint32_t size, uint64_t host_va,
                                      uint8_t flags)
{
    HAXMapping *entry;

    entry = g_malloc0(sizeof(*entry));
    entry->start_pa = start_pa;
    entry->size = size;
    entry->host_va = host_va;
    entry->flags = flags;
    if (!next) {
        QTAILQ_INSERT_TAIL(&mappings, entry, entry);
    } else {
        QTAILQ_INSERT_BEFORE(next, entry, entry);
    }
}

static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
                                    uint8_t flags)
{
    /* removed then added without change for the read-only flag */
    bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;

    return (entry->host_va == host_va) && nop_flags;
}

static void hax_update_mapping(uint64_t start_pa, uint32_t size,
                               uint64_t host_va, uint8_t flags)
{
    uint64_t end_pa = start_pa + size;
    HAXMapping *entry, *next;

    QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
        uint32_t chunk_sz;
        if (start_pa >= entry->start_pa + entry->size) {
            continue;
        }
        if (start_pa < entry->start_pa) {
            chunk_sz = end_pa <= entry->start_pa ? size
                                                 : entry->start_pa - start_pa;
            hax_insert_mapping_before(entry, start_pa, chunk_sz,
                                      host_va, flags);
            start_pa += chunk_sz;
            host_va += chunk_sz;
            size -= chunk_sz;
        } else if (start_pa > entry->start_pa) {
            /* split the existing chunk at start_pa */
            chunk_sz = start_pa - entry->start_pa;
            hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
                                      entry->host_va, entry->flags);
            entry->start_pa += chunk_sz;
            entry->host_va += chunk_sz;
            entry->size -= chunk_sz;
        }
        /* now start_pa == entry->start_pa */
        chunk_sz = MIN(size, entry->size);
        if (chunk_sz) {
            bool nop = hax_mapping_is_opposite(entry, host_va, flags);
            bool partial = chunk_sz < entry->size;
            if (partial) {
                /* remove the beginning of the existing chunk */
                entry->start_pa += chunk_sz;
                entry->host_va += chunk_sz;
                entry->size -= chunk_sz;
                if (!nop) {
                    hax_insert_mapping_before(entry, start_pa, chunk_sz,
                                              host_va, flags);
                }
            } else { /* affects the full mapping entry */
                if (nop) { /* no change to this mapping, remove it */
                    QTAILQ_REMOVE(&mappings, entry, entry);
                    g_free(entry);
                } else { /* update mapping properties */
                    entry->host_va = host_va;
                    entry->flags = flags;
                }
            }
            start_pa += chunk_sz;
            host_va += chunk_sz;
            size -= chunk_sz;
        }
        if (!size) { /* we are done */
            break;
        }
    }
    if (size) { /* add the leftover */
        hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
    }
}

static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
{
    MemoryRegion *mr = section->mr;
    hwaddr start_pa = section->offset_within_address_space;
    ram_addr_t size = int128_get64(section->size);
    unsigned int delta;
    uint64_t host_va;
    uint32_t max_mapping_size;

    /* We only care about RAM and ROM regions */
    if (!memory_region_is_ram(mr)) {
        if (memory_region_is_romd(mr)) {
            /* HAXM kernel module does not support ROMD yet  */
            warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
                        start_pa, start_pa + size);
        }
        return;
    }

    /* Adjust start_pa and size so that they are page-aligned. (Cf
     * kvm_set_phys_mem() in kvm-all.c).
     */
    delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
    delta &= ~qemu_real_host_page_mask;
    if (delta > size) {
        return;
    }
    start_pa += delta;
    size -= delta;
    size &= qemu_real_host_page_mask;
    if (!size || (start_pa & ~qemu_real_host_page_mask)) {
        return;
    }

    host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
            + section->offset_within_region + delta;
    if (memory_region_is_rom(section->mr)) {
        flags |= HAX_RAM_INFO_ROM;
    }

    /*
     * The kernel module interface uses 32-bit sizes:
     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
     *
     * If the mapping size is longer than 32 bits, we can't process it in one
     * call into the kernel. Instead, we split the mapping into smaller ones,
     * and call hax_update_mapping() on each.
     */
    max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
    while (size > max_mapping_size) {
        hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
        start_pa += max_mapping_size;
        size -= max_mapping_size;
        host_va += max_mapping_size;
    }
    /* Now size <= max_mapping_size */
    hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
}

static void hax_region_add(MemoryListener *listener,
                           MemoryRegionSection *section)
{
    memory_region_ref(section->mr);
    hax_process_section(section, 0);
}

static void hax_region_del(MemoryListener *listener,
                           MemoryRegionSection *section)
{
    hax_process_section(section, HAX_RAM_INFO_INVALID);
    memory_region_unref(section->mr);
}

static void hax_transaction_begin(MemoryListener *listener)
{
    g_assert(QTAILQ_EMPTY(&mappings));
}

static void hax_transaction_commit(MemoryListener *listener)
{
    if (!QTAILQ_EMPTY(&mappings)) {
        HAXMapping *entry, *next;

        if (DEBUG_HAX_MEM) {
            hax_mapping_dump_list();
        }
        QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
            if (entry->flags & HAX_RAM_INFO_INVALID) {
                /* for unmapping, put the values expected by the kernel */
                entry->flags = HAX_RAM_INFO_INVALID;
                entry->host_va = 0;
            }
            if (hax_set_ram(entry->start_pa, entry->size,
                            entry->host_va, entry->flags)) {
                fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
                        PRIx32 " flags %02x\n", __func__, entry->start_pa,
                        entry->size, entry->flags);
            }
            QTAILQ_REMOVE(&mappings, entry, entry);
            g_free(entry);
        }
    }
}

/* currently we fake the dirty bitmap sync, always dirty */
static void hax_log_sync(MemoryListener *listener,
                         MemoryRegionSection *section)
{
    MemoryRegion *mr = section->mr;

    if (!memory_region_is_ram(mr)) {
        /* Skip MMIO regions */
        return;
    }

    memory_region_set_dirty(mr, 0, int128_get64(section->size));
}

static MemoryListener hax_memory_listener = {
    .begin = hax_transaction_begin,
    .commit = hax_transaction_commit,
    .region_add = hax_region_add,
    .region_del = hax_region_del,
    .log_sync = hax_log_sync,
    .priority = 10,
};

static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size)
{
    /*
     * We must register each RAM block with the HAXM kernel module, or
     * hax_set_ram() will fail for any mapping into the RAM block:
     * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
     *
     * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
     * host physical pages for the RAM block as part of this registration
     * process, hence the name hax_populate_ram().
     */
    if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
        fprintf(stderr, "HAX failed to populate RAM\n");
        abort();
    }
}

static struct RAMBlockNotifier hax_ram_notifier = {
    .ram_block_added = hax_ram_block_added,
};

void hax_memory_init(void)
{
    ram_block_notifier_add(&hax_ram_notifier);
    memory_listener_register(&hax_memory_listener, &address_space_memory);
}
Commit	Line	Data
47c1c8c1 VP	1	/*
	2	* HAX memory mapping operations
	3	*
	4	* Copyright (c) 2015-16 Intel Corporation
	5	* Copyright 2016 Google, Inc.
	6	*
	7	* This work is licensed under the terms of the GNU GPL, version 2. See
	8	* the COPYING file in the top-level directory.
	9	*/
	10
	11	#include "qemu/osdep.h"
	12	#include "cpu.h"
	13	#include "exec/address-spaces.h"
b62e39b4	14	#include "qemu/error-report.h"
47c1c8c1 VP	15
	16	#include "target/i386/hax-i386.h"
	17	#include "qemu/queue.h"
	18
	19	#define DEBUG_HAX_MEM 0
	20
	21	#define DPRINTF(fmt, ...) \
	22	do { \
	23	if (DEBUG_HAX_MEM) { \
	24	fprintf(stdout, fmt, ## __VA_ARGS__); \
	25	} \
	26	} while (0)
	27
	28	/**
	29	* HAXMapping: describes a pending guest physical memory mapping
	30	*
	31	* @start_pa: a guest physical address marking the start of the region; must be
	32	* page-aligned
	33	* @size: a guest physical address marking the end of the region; must be
	34	* page-aligned
	35	* @host_va: the host virtual address of the start of the mapping
	36	* @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID
	37	* @entry: additional fields for linking #HAXMapping instances together
	38	*/
	39	typedef struct HAXMapping {
	40	uint64_t start_pa;
	41	uint32_t size;
	42	uint64_t host_va;
	43	int flags;
	44	QTAILQ_ENTRY(HAXMapping) entry;
	45	} HAXMapping;
	46
	47	/*
	48	* A doubly-linked list (actually a tail queue) of the pending page mappings
	49	* for the ongoing memory transaction.
	50	*
	51	* It is used to optimize the number of page mapping updates done through the
	52	* kernel module. For example, it's effective when a driver is digging an MMIO
	53	* hole inside an existing memory mapping. It will get a deletion of the whole
	54	* region, then the addition of the 2 remaining RAM areas around the hole and
	55	* finally the memory transaction commit. During the commit, it will effectively
	56	* send to the kernel only the removal of the pages from the MMIO hole after
	57	* having computed locally the result of the deletion and additions.
	58	*/
b58deb34	59	static QTAILQ_HEAD(, HAXMapping) mappings =
47c1c8c1 VP	60	QTAILQ_HEAD_INITIALIZER(mappings);
	61
	62	/**
	63	* hax_mapping_dump_list: dumps @mappings to stdout (for debugging)
	64	*/
	65	static void hax_mapping_dump_list(void)
	66	{
	67	HAXMapping *entry;
	68
	69	DPRINTF("%s updates:\n", __func__);
	70	QTAILQ_FOREACH(entry, &mappings, entry) {
	71	DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64
	72	"%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+',
	73	entry->start_pa, entry->start_pa + entry->size, entry->host_va,
	74	entry->flags & HAX_RAM_INFO_ROM ? " ROM" : "");
	75	}
	76	}
	77
	78	static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa,
	79	uint32_t size, uint64_t host_va,
	80	uint8_t flags)
	81	{
	82	HAXMapping *entry;
	83
	84	entry = g_malloc0(sizeof(*entry));
	85	entry->start_pa = start_pa;
	86	entry->size = size;
	87	entry->host_va = host_va;
	88	entry->flags = flags;
	89	if (!next) {
	90	QTAILQ_INSERT_TAIL(&mappings, entry, entry);
	91	} else {
	92	QTAILQ_INSERT_BEFORE(next, entry, entry);
	93	}
	94	}
	95
	96	static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va,
	97	uint8_t flags)
	98	{
	99	/* removed then added without change for the read-only flag */
	100	bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID;
	101
	102	return (entry->host_va == host_va) && nop_flags;
	103	}
	104
	105	static void hax_update_mapping(uint64_t start_pa, uint32_t size,
	106	uint64_t host_va, uint8_t flags)
	107	{
	108	uint64_t end_pa = start_pa + size;
47c1c8c1 VP	109	HAXMapping entry, next;
	110
	111	QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
8a3c3d99	112	uint32_t chunk_sz;
47c1c8c1 VP	113	if (start_pa >= entry->start_pa + entry->size) {
	114	continue;
	115	}
	116	if (start_pa < entry->start_pa) {
	117	chunk_sz = end_pa <= entry->start_pa ? size
	118	: entry->start_pa - start_pa;
	119	hax_insert_mapping_before(entry, start_pa, chunk_sz,
	120	host_va, flags);
	121	start_pa += chunk_sz;
	122	host_va += chunk_sz;
	123	size -= chunk_sz;
8a3c3d99 YN	124	} else if (start_pa > entry->start_pa) {
	125	/* split the existing chunk at start_pa */
	126	chunk_sz = start_pa - entry->start_pa;
	127	hax_insert_mapping_before(entry, entry->start_pa, chunk_sz,
	128	entry->host_va, entry->flags);
	129	entry->start_pa += chunk_sz;
	130	entry->host_va += chunk_sz;
	131	entry->size -= chunk_sz;
47c1c8c1	132	}
8a3c3d99	133	/* now start_pa == entry->start_pa */
47c1c8c1 VP	134	chunk_sz = MIN(size, entry->size);
	135	if (chunk_sz) {
	136	bool nop = hax_mapping_is_opposite(entry, host_va, flags);
	137	bool partial = chunk_sz < entry->size;
	138	if (partial) {
	139	/* remove the beginning of the existing chunk */
	140	entry->start_pa += chunk_sz;
	141	entry->host_va += chunk_sz;
	142	entry->size -= chunk_sz;
	143	if (!nop) {
	144	hax_insert_mapping_before(entry, start_pa, chunk_sz,
	145	host_va, flags);
	146	}
	147	} else { /* affects the full mapping entry */
	148	if (nop) { /* no change to this mapping, remove it */
	149	QTAILQ_REMOVE(&mappings, entry, entry);
	150	g_free(entry);
	151	} else { /* update mapping properties */
	152	entry->host_va = host_va;
	153	entry->flags = flags;
	154	}
	155	}
	156	start_pa += chunk_sz;
	157	host_va += chunk_sz;
	158	size -= chunk_sz;
	159	}
	160	if (!size) { /* we are done */
	161	break;
	162	}
	163	}
	164	if (size) { /* add the leftover */
	165	hax_insert_mapping_before(NULL, start_pa, size, host_va, flags);
	166	}
	167	}
	168
	169	static void hax_process_section(MemoryRegionSection *section, uint8_t flags)
	170	{
	171	MemoryRegion *mr = section->mr;
	172	hwaddr start_pa = section->offset_within_address_space;
	173	ram_addr_t size = int128_get64(section->size);
	174	unsigned int delta;
	175	uint64_t host_va;
7a5235c9	176	uint32_t max_mapping_size;
47c1c8c1	177
8a3c3d99	178	/* We only care about RAM and ROM regions */
47c1c8c1	179	if (!memory_region_is_ram(mr)) {
8a3c3d99 YN	180	if (memory_region_is_romd(mr)) {
8a3c3d99 YN	181	/* HAXM kernel module does not support ROMD yet */
b62e39b4 AF	182	warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64,
b62e39b4 AF	183	start_pa, start_pa + size);
8a3c3d99	184	}
47c1c8c1 VP	185	return;
	186	}
	187
	188	/* Adjust start_pa and size so that they are page-aligned. (Cf
	189	* kvm_set_phys_mem() in kvm-all.c).
	190	*/
	191	delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
	192	delta &= ~qemu_real_host_page_mask;
	193	if (delta > size) {
	194	return;
	195	}
	196	start_pa += delta;
	197	size -= delta;
	198	size &= qemu_real_host_page_mask;
	199	if (!size \|\| (start_pa & ~qemu_real_host_page_mask)) {
	200	return;
	201	}
	202
	203	host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
	204	+ section->offset_within_region + delta;
	205	if (memory_region_is_rom(section->mr)) {
	206	flags \|= HAX_RAM_INFO_ROM;
	207	}
	208
7a5235c9 YN	209	/*
	210	* The kernel module interface uses 32-bit sizes:
	211	* https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram
	212	*
	213	* If the mapping size is longer than 32 bits, we can't process it in one
	214	* call into the kernel. Instead, we split the mapping into smaller ones,
	215	* and call hax_update_mapping() on each.
	216	*/
	217	max_mapping_size = UINT32_MAX & qemu_real_host_page_mask;
	218	while (size > max_mapping_size) {
	219	hax_update_mapping(start_pa, max_mapping_size, host_va, flags);
	220	start_pa += max_mapping_size;
	221	size -= max_mapping_size;
	222	host_va += max_mapping_size;
	223	}
	224	/* Now size <= max_mapping_size */
	225	hax_update_mapping(start_pa, (uint32_t)size, host_va, flags);
47c1c8c1 VP	226	}
	227
	228	static void hax_region_add(MemoryListener *listener,
	229	MemoryRegionSection *section)
	230	{
	231	memory_region_ref(section->mr);
	232	hax_process_section(section, 0);
	233	}
	234
	235	static void hax_region_del(MemoryListener *listener,
	236	MemoryRegionSection *section)
	237	{
	238	hax_process_section(section, HAX_RAM_INFO_INVALID);
	239	memory_region_unref(section->mr);
	240	}
	241
	242	static void hax_transaction_begin(MemoryListener *listener)
	243	{
	244	g_assert(QTAILQ_EMPTY(&mappings));
	245	}
	246
	247	static void hax_transaction_commit(MemoryListener *listener)
	248	{
	249	if (!QTAILQ_EMPTY(&mappings)) {
	250	HAXMapping entry, next;
	251
	252	if (DEBUG_HAX_MEM) {
	253	hax_mapping_dump_list();
	254	}
	255	QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) {
	256	if (entry->flags & HAX_RAM_INFO_INVALID) {
	257	/* for unmapping, put the values expected by the kernel */
	258	entry->flags = HAX_RAM_INFO_INVALID;
	259	entry->host_va = 0;
	260	}
	261	if (hax_set_ram(entry->start_pa, entry->size,
	262	entry->host_va, entry->flags)) {
	263	fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%"
	264	PRIx32 " flags %02x\n", __func__, entry->start_pa,
	265	entry->size, entry->flags);
	266	}
	267	QTAILQ_REMOVE(&mappings, entry, entry);
	268	g_free(entry);
	269	}
	270	}
	271	}
	272
	273	/* currently we fake the dirty bitmap sync, always dirty */
	274	static void hax_log_sync(MemoryListener *listener,
	275	MemoryRegionSection *section)
	276	{
	277	MemoryRegion *mr = section->mr;
	278
	279	if (!memory_region_is_ram(mr)) {
	280	/* Skip MMIO regions */
	281	return;
	282	}
	283
	284	memory_region_set_dirty(mr, 0, int128_get64(section->size));
	285	}
	286
	287	static MemoryListener hax_memory_listener = {
	288	.begin = hax_transaction_begin,
	289	.commit = hax_transaction_commit,
290	.region_add = hax_region_add,
291	.region_del = hax_region_del,
292	.log_sync = hax_log_sync,
293	.priority = 10,
294	};
295
296	static void hax_ram_block_added(RAMBlockNotifier n, void host, size_t size)
297	{
298	/*
7a5235c9 YN	299	* We must register each RAM block with the HAXM kernel module, or
	300	* hax_set_ram() will fail for any mapping into the RAM block:
	301	* https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram
	302	*
	303	* Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all
	304	* host physical pages for the RAM block as part of this registration
	305	* process, hence the name hax_populate_ram().
47c1c8c1 VP	306	*/
47c1c8c1 VP	307	if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) {
7a5235c9	308	fprintf(stderr, "HAX failed to populate RAM\n");
47c1c8c1 VP	309	abort();
	310	}
	311	}
	312
	313	static struct RAMBlockNotifier hax_ram_notifier = {
	314	.ram_block_added = hax_ram_block_added,
	315	};
	316
	317	void hax_memory_init(void)
	318	{
	319	ram_block_notifier_add(&hax_ram_notifier);
	320	memory_listener_register(&hax_memory_listener, &address_space_memory);
	321	}