#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
+#include "sysemu/numa.h"
+#include "sysemu/hw_accel.h"
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
-#include "trace.h"
+#include "trace-root.h"
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+
#endif
#include "exec/cpu-all.h"
#include "qemu/rcu_queue.h"
MemoryListener tcg_as_listener;
};
+struct DirtyBitmapSnapshot {
+ ram_addr_t start;
+ ram_addr_t end;
+ unsigned long dirty[];
+};
+
#endif
#if !defined(CONFIG_USER_ONLY)
return dirty;
}
+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+ (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+ DirtyMemoryBlocks *blocks;
+ unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+ ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+ ram_addr_t last = QEMU_ALIGN_UP(start + length, align);
+ DirtyBitmapSnapshot *snap;
+ unsigned long page, end, dest;
+
+ snap = g_malloc0(sizeof(*snap) +
+ ((last - first) >> (TARGET_PAGE_BITS + 3)));
+ snap->start = first;
+ snap->end = last;
+
+ page = first >> TARGET_PAGE_BITS;
+ end = last >> TARGET_PAGE_BITS;
+ dest = 0;
+
+ rcu_read_lock();
+
+ blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+ while (page < end) {
+ unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+ assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+ assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL)));
+ offset >>= BITS_PER_LEVEL;
+
+ bitmap_copy_and_clear_atomic(snap->dirty + dest,
+ blocks->blocks[idx] + offset,
+ num);
+ page += num;
+ dest += num >> BITS_PER_LEVEL;
+ }
+
+ rcu_read_unlock();
+
+ if (tcg_enabled()) {
+ tlb_reset_dirty_range_all(start, length);
+ }
+
+ return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+ ram_addr_t start,
+ ram_addr_t length)
+{
+ unsigned long page, end;
+
+ assert(start >= snap->start);
+ assert(start + length <= snap->end);
+
+ end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+ page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+ while (page < end) {
+ if (test_bit(page, snap->dirty)) {
+ return true;
+ }
+ page++;
+ }
+ return false;
+}
+
/* Called from RCU critical section */
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
MemoryRegionSection *section,
qemu_mutex_unlock(&ram_list.mutex);
}
+#ifdef __linux__
+/*
+ * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
+ * may or may not name the same files / on the same filesystem now as
+ * when we actually open and map them. Iterate over the file
+ * descriptors instead, and use qemu_fd_getpagesize().
+ */
+static int find_max_supported_pagesize(Object *obj, void *opaque)
+{
+ char *mem_path;
+ long *hpsize_min = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+ mem_path = object_property_get_str(obj, "mem-path", NULL);
+ if (mem_path) {
+ long hpsize = qemu_mempath_getpagesize(mem_path);
+ if (hpsize < *hpsize_min) {
+ *hpsize_min = hpsize;
+ }
+ } else {
+ *hpsize_min = getpagesize();
+ }
+ }
+
+ return 0;
+}
+
+long qemu_getrampagesize(void)
+{
+ long hpsize = LONG_MAX;
+ long mainrampagesize;
+ Object *memdev_root;
+
+ if (mem_path) {
+ mainrampagesize = qemu_mempath_getpagesize(mem_path);
+ } else {
+ mainrampagesize = getpagesize();
+ }
+
+ /* it's possible we have memory-backend objects with
+ * hugepage-backed RAM. these may get mapped into system
+ * address space via -numa parameters or memory hotplug
+ * hooks. we want to take these into account, but we
+ * also want to make sure these supported hugepage
+ * sizes are applicable across the entire range of memory
+ * we may boot from, so we take the min across all
+ * backends, and assume normal pages in cases where a
+ * backend isn't backed by hugepages.
+ */
+ memdev_root = object_resolve_path("/objects", NULL);
+ if (memdev_root) {
+ object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
+ }
+ if (hpsize == LONG_MAX) {
+ /* No additional memory regions found ==> Report main RAM page size */
+ return mainrampagesize;
+ }
+
+ /* If NUMA is disabled or the NUMA nodes are not backed with a
+ * memory-backend, then there is at least one node using "normal" RAM,
+ * so if its page size is smaller we have got to report that size instead.
+ */
+ if (hpsize > mainrampagesize &&
+ (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
+ static bool warned;
+ if (!warned) {
+ error_report("Huge page support disabled (n/a for main memory).");
+ warned = true;
+ }
+ return mainrampagesize;
+ }
+
+ return hpsize;
+}
+#else
+long qemu_getrampagesize(void)
+{
+ return getpagesize();
+}
+#endif
+
#ifdef __linux__
static int64_t get_file_size(int fd)
{
}
if (mem_prealloc) {
- os_mem_prealloc(fd, area, memory, errp);
+ os_mem_prealloc(fd, area, memory, smp_cpus, errp);
if (errp && *errp) {
goto error;
}
return offset;
}
-ram_addr_t last_ram_offset(void)
+unsigned long last_ram_page(void)
{
RAMBlock *block;
ram_addr_t last = 0;
last = MAX(last, block->offset + block->max_length);
}
rcu_read_unlock();
- return last;
+ return last >> TARGET_PAGE_BITS;
}
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
return rb->idstr;
}
+bool qemu_ram_is_shared(RAMBlock *rb)
+{
+ return rb->flags & RAM_SHARED;
+}
+
/* Called with iothread lock held. */
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
{
return rb->page_size;
}
+/* Returns the largest size of page in use */
+size_t qemu_ram_pagesize_largest(void)
+{
+ RAMBlock *block;
+ size_t largest = 0;
+
+ QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ largest = MAX(largest, qemu_ram_pagesize(block));
+ }
+
+ return largest;
+}
+
static int memory_try_enable_merging(void *addr, size_t len)
{
if (!machine_mem_merge(current_machine)) {
ram_addr_t old_ram_size, new_ram_size;
Error *err = NULL;
- old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+ old_ram_size = last_ram_page();
qemu_mutex_lock_ramlist();
new_block->offset = find_ram_offset(new_block->max_length);
new_ram_size = MAX(old_ram_size,
(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
if (new_ram_size > old_ram_size) {
- migration_bitmap_extend(old_ram_size, new_ram_size);
dirty_memory_extend(old_ram_size, new_ram_size);
}
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
return;
}
vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
+ vaddr = cc->adjust_watchpoint_address(cpu, vaddr, len);
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
if (cpu_watchpoint_address_matches(wp, vaddr, len)
&& (wp->flags & flags)) {
}
cpu->watchpoint_hit = wp;
- /* The tb_lock will be reset when cpu_loop_exit or
- * cpu_loop_exit_noexc longjmp back into the cpu_exec
- * main loop.
+ /* Both tb_lock and iothread_mutex will be reset when
+ * cpu_loop_exit or cpu_loop_exit_noexc longjmp
+ * back into the cpu_exec main loop.
*/
tb_lock();
tb_check_watchpoint(cpu);
memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
NULL, UINT64_MAX);
+
+ /* io_mem_notdirty calls tb_invalidate_phys_page_fast,
+ * which can be called without the iothread mutex.
+ */
memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
NULL, UINT64_MAX);
+ memory_region_clear_global_locking(&io_mem_notdirty);
+
memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
NULL, UINT64_MAX);
}
break;
case 4:
/* 32 bit write access */
- val = ldl_p(buf);
+ val = (uint32_t)ldl_p(buf);
result |= memory_region_dispatch_write(mr, addr1, val, 4,
attrs);
break;
hwaddr len,
bool is_write)
{
- hwaddr l, xlat;
- MemoryRegion *mr;
- void *ptr;
-
- assert(len > 0);
-
- l = len;
- mr = address_space_translate(as, addr, &xlat, &l, is_write);
- if (!memory_access_is_direct(mr, is_write)) {
- return -EINVAL;
- }
-
- l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
- ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
-
- cache->xlat = xlat;
- cache->is_write = is_write;
- cache->mr = mr;
- cache->ptr = ptr;
- cache->len = l;
- memory_region_ref(cache->mr);
-
- return l;
+ cache->len = len;
+ cache->as = as;
+ cache->xlat = addr;
+ return len;
}
void address_space_cache_invalidate(MemoryRegionCache *cache,
hwaddr addr,
hwaddr access_len)
{
- assert(cache->is_write);
- invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
}
void address_space_cache_destroy(MemoryRegionCache *cache)
{
- if (!cache->mr) {
- return;
- }
-
- if (xen_enabled()) {
- xen_invalidate_map_cache_entry(cache->ptr);
- }
- memory_region_unref(cache->mr);
-}
-
-/* Called from RCU critical section. This function has the same
- * semantics as address_space_translate, but it only works on a
- * predefined range of a MemoryRegion that was mapped with
- * address_space_cache_init.
- */
-static inline MemoryRegion *address_space_translate_cached(
- MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
- hwaddr *plen, bool is_write)
-{
- assert(addr < cache->len && *plen <= cache->len - addr);
- *xlat = addr + cache->xlat;
- return cache->mr;
+ cache->as = NULL;
}
#define ARG1_DECL MemoryRegionCache *cache
#define ARG1 cache
#define SUFFIX _cached
-#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
+#define TRANSLATE(addr, ...) \
+ address_space_translate(cache->as, cache->xlat + (addr), __VA_ARGS__)
#define IS_DIRECT(mr, is_write) true
-#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat))
-#define INVALIDATE(mr, ofs, len) ((void)0)
-#define RCU_READ_LOCK() ((void)0)
-#define RCU_READ_UNLOCK() ((void)0)
+#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs)
+#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
+#define RCU_READ_LOCK() rcu_read_lock()
+#define RCU_READ_UNLOCK() rcu_read_unlock()
#include "memory_ldst.inc.c"
/* virtual memory access for debug (includes writing to ROM) */
hwaddr phys_addr;
target_ulong page;
+ cpu_synchronize_state(cpu);
while (len > 0) {
int asidx;
MemTxAttrs attrs;
* Allows code that needs to deal with migration bitmaps etc to still be built
* target independent.
*/
-size_t qemu_target_page_bits(void)
+size_t qemu_target_page_size(void)
{
- return TARGET_PAGE_BITS;
+ return TARGET_PAGE_SIZE;
}
#endif
rcu_read_unlock();
return ret;
}
+
+/*
+ * Unmap pages of memory from start to start+length such that
+ * they a) read as 0, b) Trigger whatever fault mechanism
+ * the OS provides for postcopy.
+ * The pages must be unmapped by the end of the function.
+ * Returns: 0 on success, none-0 on failure
+ *
+ */
+int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length)
+{
+ int ret = -1;
+
+ uint8_t *host_startaddr = rb->host + start;
+
+ if ((uintptr_t)host_startaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned start address: %p",
+ host_startaddr);
+ goto err;
+ }
+
+ if ((start + length) <= rb->used_length) {
+ uint8_t *host_endaddr = host_startaddr + length;
+ if ((uintptr_t)host_endaddr & (rb->page_size - 1)) {
+ error_report("ram_block_discard_range: Unaligned end address: %p",
+ host_endaddr);
+ goto err;
+ }
+
+ errno = ENOTSUP; /* If we are missing MADVISE etc */
+
+ if (rb->page_size == qemu_host_page_size) {
+#if defined(CONFIG_MADVISE)
+ /* Note: We need the madvise MADV_DONTNEED behaviour of definitely
+ * freeing the page.
+ */
+ ret = madvise(host_startaddr, length, MADV_DONTNEED);
+#endif
+ } else {
+ /* Huge page case - unfortunately it can't do DONTNEED, but
+ * it can do the equivalent by FALLOC_FL_PUNCH_HOLE in the
+ * huge page file.
+ */
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ start, length);
+#endif
+ }
+ if (ret) {
+ ret = -errno;
+ error_report("ram_block_discard_range: Failed to discard range "
+ "%s:%" PRIx64 " +%zx (%d)",
+ rb->idstr, start, length, ret);
+ }
+ } else {
+ error_report("ram_block_discard_range: Overrun block '%s' (%" PRIu64
+ "/%zx/" RAM_ADDR_FMT")",
+ rb->idstr, start, length, rb->used_length);
+ }
+
+err:
+ return ret;
+}
+
#endif