#include "qemu/cutils.h"
#include "cpu.h"
#include "exec/exec-all.h"
+#include "exec/target_page.h"
#include "tcg.h"
#include "hw/qdev-core.h"
#if !defined(CONFIG_USER_ONLY)
#include "exec/memory.h"
#include "exec/ioport.h"
#include "sysemu/dma.h"
+#include "sysemu/numa.h"
+#include "sysemu/hw_accel.h"
#include "exec/address-spaces.h"
#include "sysemu/xen-mapcache.h"
#include "trace-root.h"
#include "qemu/mmap-alloc.h"
#endif
+#include "monitor/monitor.h"
+
//#define DEBUG_SUBPAGE
#if !defined(CONFIG_USER_ONLY)
MemoryListener tcg_as_listener;
};
+struct DirtyBitmapSnapshot {
+ ram_addr_t start;
+ ram_addr_t end;
+ unsigned long dirty[];
+};
+
#endif
#if !defined(CONFIG_USER_ONLY)
int128_getlo(section->size), addr);
}
-static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
- Node *nodes, MemoryRegionSection *sections)
+static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr)
{
- PhysPageEntry *p;
+ PhysPageEntry lp = d->phys_map, *p;
+ Node *nodes = d->map.nodes;
+ MemoryRegionSection *sections = d->map.sections;
hwaddr index = addr >> TARGET_PAGE_BITS;
int i;
section_covers_addr(section, addr)) {
update = false;
} else {
- section = phys_page_find(d->phys_map, addr, d->map.nodes,
- d->map.sections);
+ section = phys_page_find(d, addr);
update = true;
}
if (resolve_subpage && section->mr->subpage) {
}
/* Called from RCU critical section */
-IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
- bool is_write)
+static MemoryRegionSection address_space_do_translate(AddressSpace *as,
+ hwaddr addr,
+ hwaddr *xlat,
+ hwaddr *plen,
+ bool is_write,
+ bool is_mmio)
{
- IOMMUTLBEntry iotlb = {0};
+ IOMMUTLBEntry iotlb;
MemoryRegionSection *section;
MemoryRegion *mr;
for (;;) {
AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
- section = address_space_lookup_region(d, addr, false);
- addr = addr - section->offset_within_address_space
- + section->offset_within_region;
+ section = address_space_translate_internal(d, addr, &addr, plen, is_mmio);
mr = section->mr;
if (!mr->iommu_ops) {
break;
}
- iotlb = mr->iommu_ops->translate(mr, addr, is_write);
+ iotlb = mr->iommu_ops->translate(mr, addr, is_write ?
+ IOMMU_WO : IOMMU_RO);
+ addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+ | (addr & iotlb.addr_mask));
+ *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
if (!(iotlb.perm & (1 << is_write))) {
- iotlb.target_as = NULL;
- break;
+ goto translate_fail;
}
- addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
- | (addr & iotlb.addr_mask));
as = iotlb.target_as;
}
- return iotlb;
+ *xlat = addr;
+
+ return *section;
+
+translate_fail:
+ return (MemoryRegionSection) { .mr = &io_mem_unassigned };
}
/* Called from RCU critical section */
-MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
- hwaddr *xlat, hwaddr *plen,
- bool is_write)
+IOMMUTLBEntry address_space_get_iotlb_entry(AddressSpace *as, hwaddr addr,
+ bool is_write)
{
- IOMMUTLBEntry iotlb;
- MemoryRegionSection *section;
- MemoryRegion *mr;
+ MemoryRegionSection section;
+ hwaddr xlat, plen;
- for (;;) {
- AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
- section = address_space_translate_internal(d, addr, &addr, plen, true);
- mr = section->mr;
+ /* Try to get maximum page mask during translation. */
+ plen = (hwaddr)-1;
- if (!mr->iommu_ops) {
- break;
- }
+ /* This can never be MMIO. */
+ section = address_space_do_translate(as, addr, &xlat, &plen,
+ is_write, false);
- iotlb = mr->iommu_ops->translate(mr, addr, is_write);
- addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
- | (addr & iotlb.addr_mask));
- *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
- if (!(iotlb.perm & (1 << is_write))) {
- mr = &io_mem_unassigned;
- break;
- }
+ /* Illegal translation */
+ if (section.mr == &io_mem_unassigned) {
+ goto iotlb_fail;
+ }
- as = iotlb.target_as;
+ /* Convert memory region offset into address space offset */
+ xlat += section.offset_within_address_space -
+ section.offset_within_region;
+
+ if (plen == (hwaddr)-1) {
+ /*
+ * We use default page size here. Logically it only happens
+ * for identity mappings.
+ */
+ plen = TARGET_PAGE_SIZE;
}
+ /* Convert to address mask */
+ plen -= 1;
+
+ return (IOMMUTLBEntry) {
+ .target_as = section.address_space,
+ .iova = addr & ~plen,
+ .translated_addr = xlat & ~plen,
+ .addr_mask = plen,
+ /* IOTLBs are for DMAs, and DMA only allows on RAMs. */
+ .perm = IOMMU_RW,
+ };
+
+iotlb_fail:
+ return (IOMMUTLBEntry) {0};
+}
+
+/* Called from RCU critical section */
+MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
+ hwaddr *xlat, hwaddr *plen,
+ bool is_write)
+{
+ MemoryRegion *mr;
+ MemoryRegionSection section;
+
+ /* This can be MMIO, so setup MMIO bit. */
+ section = address_space_do_translate(as, addr, xlat, plen, is_write, true);
+ mr = section.mr;
+
if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
*plen = MIN(page, *plen);
}
- *xlat = addr;
return mr;
}
if (block && addr - block->offset < block->max_length) {
return block;
}
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
if (addr - block->offset < block->max_length) {
goto found;
}
return dirty;
}
+DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
+ (ram_addr_t start, ram_addr_t length, unsigned client)
+{
+ DirtyMemoryBlocks *blocks;
+ unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL);
+ ram_addr_t first = QEMU_ALIGN_DOWN(start, align);
+ ram_addr_t last = QEMU_ALIGN_UP(start + length, align);
+ DirtyBitmapSnapshot *snap;
+ unsigned long page, end, dest;
+
+ snap = g_malloc0(sizeof(*snap) +
+ ((last - first) >> (TARGET_PAGE_BITS + 3)));
+ snap->start = first;
+ snap->end = last;
+
+ page = first >> TARGET_PAGE_BITS;
+ end = last >> TARGET_PAGE_BITS;
+ dest = 0;
+
+ rcu_read_lock();
+
+ blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
+
+ while (page < end) {
+ unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
+ unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
+
+ assert(QEMU_IS_ALIGNED(offset, (1 << BITS_PER_LEVEL)));
+ assert(QEMU_IS_ALIGNED(num, (1 << BITS_PER_LEVEL)));
+ offset >>= BITS_PER_LEVEL;
+
+ bitmap_copy_and_clear_atomic(snap->dirty + dest,
+ blocks->blocks[idx] + offset,
+ num);
+ page += num;
+ dest += num >> BITS_PER_LEVEL;
+ }
+
+ rcu_read_unlock();
+
+ if (tcg_enabled()) {
+ tlb_reset_dirty_range_all(start, length);
+ }
+
+ return snap;
+}
+
+bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
+ ram_addr_t start,
+ ram_addr_t length)
+{
+ unsigned long page, end;
+
+ assert(start >= snap->start);
+ assert(start + length <= snap->end);
+
+ end = TARGET_PAGE_ALIGN(start + length - snap->start) >> TARGET_PAGE_BITS;
+ page = (start - snap->start) >> TARGET_PAGE_BITS;
+
+ while (page < end) {
+ if (test_bit(page, snap->dirty)) {
+ return true;
+ }
+ page++;
+ }
+ return false;
+}
+
/* Called from RCU critical section */
hwaddr memory_region_section_get_iotlb(CPUState *cpu,
MemoryRegionSection *section,
subpage_t *subpage;
hwaddr base = section->offset_within_address_space
& TARGET_PAGE_MASK;
- MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
- d->map.nodes, d->map.sections);
+ MemoryRegionSection *existing = phys_page_find(d, base);
MemoryRegionSection subsection = {
.offset_within_address_space = base,
.size = int128_make64(TARGET_PAGE_SIZE),
qemu_mutex_unlock(&ram_list.mutex);
}
+void ram_block_dump(Monitor *mon)
+{
+ RAMBlock *block;
+ char *psize;
+
+ rcu_read_lock();
+ monitor_printf(mon, "%24s %8s %18s %18s %18s\n",
+ "Block Name", "PSize", "Offset", "Used", "Total");
+ RAMBLOCK_FOREACH(block) {
+ psize = size_to_str(block->page_size);
+ monitor_printf(mon, "%24s %8s 0x%016" PRIx64 " 0x%016" PRIx64
+ " 0x%016" PRIx64 "\n", block->idstr, psize,
+ (uint64_t)block->offset,
+ (uint64_t)block->used_length,
+ (uint64_t)block->max_length);
+ g_free(psize);
+ }
+ rcu_read_unlock();
+}
+
+#ifdef __linux__
+/*
+ * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
+ * may or may not name the same files / on the same filesystem now as
+ * when we actually open and map them. Iterate over the file
+ * descriptors instead, and use qemu_fd_getpagesize().
+ */
+static int find_max_supported_pagesize(Object *obj, void *opaque)
+{
+ char *mem_path;
+ long *hpsize_min = opaque;
+
+ if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
+ mem_path = object_property_get_str(obj, "mem-path", NULL);
+ if (mem_path) {
+ long hpsize = qemu_mempath_getpagesize(mem_path);
+ if (hpsize < *hpsize_min) {
+ *hpsize_min = hpsize;
+ }
+ } else {
+ *hpsize_min = getpagesize();
+ }
+ }
+
+ return 0;
+}
+
+long qemu_getrampagesize(void)
+{
+ long hpsize = LONG_MAX;
+ long mainrampagesize;
+ Object *memdev_root;
+
+ if (mem_path) {
+ mainrampagesize = qemu_mempath_getpagesize(mem_path);
+ } else {
+ mainrampagesize = getpagesize();
+ }
+
+ /* it's possible we have memory-backend objects with
+ * hugepage-backed RAM. these may get mapped into system
+ * address space via -numa parameters or memory hotplug
+ * hooks. we want to take these into account, but we
+ * also want to make sure these supported hugepage
+ * sizes are applicable across the entire range of memory
+ * we may boot from, so we take the min across all
+ * backends, and assume normal pages in cases where a
+ * backend isn't backed by hugepages.
+ */
+ memdev_root = object_resolve_path("/objects", NULL);
+ if (memdev_root) {
+ object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
+ }
+ if (hpsize == LONG_MAX) {
+ /* No additional memory regions found ==> Report main RAM page size */
+ return mainrampagesize;
+ }
+
+ /* If NUMA is disabled or the NUMA nodes are not backed with a
+ * memory-backend, then there is at least one node using "normal" RAM,
+ * so if its page size is smaller we have got to report that size instead.
+ */
+ if (hpsize > mainrampagesize &&
+ (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
+ static bool warned;
+ if (!warned) {
+ error_report("Huge page support disabled (n/a for main memory).");
+ warned = true;
+ }
+ return mainrampagesize;
+ }
+
+ return hpsize;
+}
+#else
+long qemu_getrampagesize(void)
+{
+ return getpagesize();
+}
+#endif
+
#ifdef __linux__
static int64_t get_file_size(int fd)
{
}
if (mem_prealloc) {
- os_mem_prealloc(fd, area, memory, errp);
+ os_mem_prealloc(fd, area, memory, smp_cpus, errp);
if (errp && *errp) {
goto error;
}
return 0;
}
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
ram_addr_t end, next = RAM_ADDR_MAX;
end = block->offset + block->max_length;
- QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(next_block) {
if (next_block->offset >= end) {
next = MIN(next, next_block->offset);
}
return offset;
}
-ram_addr_t last_ram_offset(void)
+unsigned long last_ram_page(void)
{
RAMBlock *block;
ram_addr_t last = 0;
rcu_read_lock();
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
last = MAX(last, block->offset + block->max_length);
}
rcu_read_unlock();
- return last;
+ return last >> TARGET_PAGE_BITS;
}
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
return rb->idstr;
}
+bool qemu_ram_is_shared(RAMBlock *rb)
+{
+ return rb->flags & RAM_SHARED;
+}
+
/* Called with iothread lock held. */
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
{
pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
rcu_read_lock();
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
if (block != new_block &&
!strcmp(block->idstr, new_block->idstr)) {
fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
RAMBlock *block;
size_t largest = 0;
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
largest = MAX(largest, qemu_ram_pagesize(block));
}
ram_addr_t old_ram_size, new_ram_size;
Error *err = NULL;
- old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+ old_ram_size = last_ram_page();
qemu_mutex_lock_ramlist();
new_block->offset = find_ram_offset(new_block->max_length);
new_ram_size = MAX(old_ram_size,
(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
if (new_ram_size > old_ram_size) {
- migration_bitmap_extend(old_ram_size, new_ram_size);
dirty_memory_extend(old_ram_size, new_ram_size);
}
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.
*/
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
last_block = block;
if (block->max_length < new_block->max_length) {
break;
int flags;
void *area, *vaddr;
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
offset = addr - block->offset;
if (offset < block->max_length) {
vaddr = ramblock_ptr(block, offset);
* In that case just map until the end of the page.
*/
if (block->offset == 0) {
- return xen_map_cache(addr, 0, 0);
+ return xen_map_cache(addr, 0, 0, false);
}
- block->host = xen_map_cache(block->offset, block->max_length, 1);
+ block->host = xen_map_cache(block->offset, block->max_length, 1, false);
}
return ramblock_ptr(block, addr);
}
* In that case just map the requested area.
*/
if (block->offset == 0) {
- return xen_map_cache(addr, *size, 1);
+ return xen_map_cache(addr, *size, 1, true);
}
- block->host = xen_map_cache(block->offset, block->max_length, 1);
+ block->host = xen_map_cache(block->offset, block->max_length, 1, true);
}
return ramblock_ptr(block, addr);
goto found;
}
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
/* This case append when the block is not mapped. */
if (block->host == NULL) {
continue;
{
RAMBlock *block;
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
if (!strcmp(name, block->idstr)) {
return block;
}
hwaddr len,
bool is_write)
{
- hwaddr l, xlat;
- MemoryRegion *mr;
- void *ptr;
-
- assert(len > 0);
-
- l = len;
- mr = address_space_translate(as, addr, &xlat, &l, is_write);
- if (!memory_access_is_direct(mr, is_write)) {
- return -EINVAL;
- }
-
- l = address_space_extend_translation(as, addr, len, mr, xlat, l, is_write);
- ptr = qemu_ram_ptr_length(mr->ram_block, xlat, &l);
-
- cache->xlat = xlat;
- cache->is_write = is_write;
- cache->mr = mr;
- cache->ptr = ptr;
- cache->len = l;
- memory_region_ref(cache->mr);
-
- return l;
+ cache->len = len;
+ cache->as = as;
+ cache->xlat = addr;
+ return len;
}
void address_space_cache_invalidate(MemoryRegionCache *cache,
hwaddr addr,
hwaddr access_len)
{
- assert(cache->is_write);
- invalidate_and_set_dirty(cache->mr, addr + cache->xlat, access_len);
}
void address_space_cache_destroy(MemoryRegionCache *cache)
{
- if (!cache->mr) {
- return;
- }
-
- if (xen_enabled()) {
- xen_invalidate_map_cache_entry(cache->ptr);
- }
- memory_region_unref(cache->mr);
- cache->mr = NULL;
-}
-
-/* Called from RCU critical section. This function has the same
- * semantics as address_space_translate, but it only works on a
- * predefined range of a MemoryRegion that was mapped with
- * address_space_cache_init.
- */
-static inline MemoryRegion *address_space_translate_cached(
- MemoryRegionCache *cache, hwaddr addr, hwaddr *xlat,
- hwaddr *plen, bool is_write)
-{
- assert(addr < cache->len && *plen <= cache->len - addr);
- *xlat = addr + cache->xlat;
- return cache->mr;
+ cache->as = NULL;
}
#define ARG1_DECL MemoryRegionCache *cache
#define ARG1 cache
#define SUFFIX _cached
-#define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__)
+#define TRANSLATE(addr, ...) \
+ address_space_translate(cache->as, cache->xlat + (addr), __VA_ARGS__)
#define IS_DIRECT(mr, is_write) true
-#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat))
-#define INVALIDATE(mr, ofs, len) ((void)0)
-#define RCU_READ_LOCK() ((void)0)
-#define RCU_READ_UNLOCK() ((void)0)
+#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs)
+#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len)
+#define RCU_READ_LOCK() rcu_read_lock()
+#define RCU_READ_UNLOCK() rcu_read_unlock()
#include "memory_ldst.inc.c"
/* virtual memory access for debug (includes writing to ROM) */
hwaddr phys_addr;
target_ulong page;
+ cpu_synchronize_state(cpu);
while (len > 0) {
int asidx;
MemTxAttrs attrs;
* Allows code that needs to deal with migration bitmaps etc to still be built
* target independent.
*/
-size_t qemu_target_page_bits(void)
+size_t qemu_target_page_size(void)
+{
+ return TARGET_PAGE_SIZE;
+}
+
+int qemu_target_page_bits(void)
{
return TARGET_PAGE_BITS;
}
+int qemu_target_page_bits_min(void)
+{
+ return TARGET_PAGE_BITS_MIN;
+}
#endif
/*
int ret = 0;
rcu_read_lock();
- QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
+ RAMBLOCK_FOREACH(block) {
ret = func(block->idstr, block->host, block->offset,
block->used_length, opaque);
if (ret) {