X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/b895de502717b83b4e5f089df617cb23530c4d2d..319bd5edb92addc725b2701d73381c415146c287:/exec.c diff --git a/exec.c b/exec.c index af90e914cf..4aaa14b075 100644 --- a/exec.c +++ b/exec.c @@ -16,7 +16,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ + #include "qemu/osdep.h" +#include "qemu-common.h" #include "qapi/error.h" #include "qemu/cutils.h" @@ -32,17 +34,18 @@ #endif #include "sysemu/kvm.h" #include "sysemu/sysemu.h" +#include "sysemu/tcg.h" #include "qemu/timer.h" #include "qemu/config-file.h" #include "qemu/error-report.h" +#include "qemu/qemu-print.h" #if defined(CONFIG_USER_ONLY) #include "qemu.h" #else /* !CONFIG_USER_ONLY */ -#include "hw/hw.h" #include "exec/memory.h" #include "exec/ioport.h" #include "sysemu/dma.h" -#include "sysemu/numa.h" +#include "sysemu/hostmem.h" #include "sysemu/hw_accel.h" #include "exec/address-spaces.h" #include "sysemu/xen-mapcache.h" @@ -87,26 +90,6 @@ AddressSpace address_space_memory; MemoryRegion io_mem_rom, io_mem_notdirty; static MemoryRegion io_mem_unassigned; - -/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ -#define RAM_PREALLOC (1 << 0) - -/* RAM is mmap-ed with MAP_SHARED */ -#define RAM_SHARED (1 << 1) - -/* Only a portion of RAM (used_length) is actually used, and migrated. - * This used_length size can change across reboots. - */ -#define RAM_RESIZEABLE (1 << 2) - -/* UFFDIO_ZEROPAGE is available on this RAMBlock to atomically - * zero the page and wake waiting processes. - * (Set during postcopy) - */ -#define RAM_UF_ZEROPAGE (1 << 3) - -/* RAM can be migrated */ -#define RAM_MIGRATABLE (1 << 4) #endif #ifdef TARGET_PAGE_BITS_VARY @@ -114,7 +97,8 @@ int target_page_bits; bool target_page_bits_decided; #endif -struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); +CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus); + /* current CPU in the current thread. It is only valid inside cpu_exec() */ __thread CPUState *current_cpu; @@ -402,12 +386,6 @@ static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr addr) } } -bool memory_region_is_unassigned(MemoryRegion *mr) -{ - return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device - && mr != &io_mem_watch; -} - /* Called from RCU critical section */ static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d, hwaddr addr, @@ -501,8 +479,15 @@ static MemoryRegionSection address_space_translate_iommu(IOMMUMemoryRegion *iomm do { hwaddr addr = *xlat; IOMMUMemoryRegionClass *imrc = memory_region_get_iommu_class_nocheck(iommu_mr); - IOMMUTLBEntry iotlb = imrc->translate(iommu_mr, addr, is_write ? - IOMMU_WO : IOMMU_RO); + int iommu_idx = 0; + IOMMUTLBEntry iotlb; + + if (imrc->attrs_to_index) { + iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); + } + + iotlb = imrc->translate(iommu_mr, addr, is_write ? + IOMMU_WO : IOMMU_RO, iommu_idx); if (!(iotlb.perm & (1 << is_write))) { goto unassigned; @@ -646,18 +631,146 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat, return mr; } +typedef struct TCGIOMMUNotifier { + IOMMUNotifier n; + MemoryRegion *mr; + CPUState *cpu; + int iommu_idx; + bool active; +} TCGIOMMUNotifier; + +static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n); + + if (!notifier->active) { + return; + } + tlb_flush(notifier->cpu); + notifier->active = false; + /* We leave the notifier struct on the list to avoid reallocating it later. + * Generally the number of IOMMUs a CPU deals with will be small. + * In any case we can't unregister the iommu notifier from a notify + * callback. + */ +} + +static void tcg_register_iommu_notifier(CPUState *cpu, + IOMMUMemoryRegion *iommu_mr, + int iommu_idx) +{ + /* Make sure this CPU has an IOMMU notifier registered for this + * IOMMU/IOMMU index combination, so that we can flush its TLB + * when the IOMMU tells us the mappings we've cached have changed. + */ + MemoryRegion *mr = MEMORY_REGION(iommu_mr); + TCGIOMMUNotifier *notifier; + int i; + + for (i = 0; i < cpu->iommu_notifiers->len; i++) { + notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); + if (notifier->mr == mr && notifier->iommu_idx == iommu_idx) { + break; + } + } + if (i == cpu->iommu_notifiers->len) { + /* Not found, add a new entry at the end of the array */ + cpu->iommu_notifiers = g_array_set_size(cpu->iommu_notifiers, i + 1); + notifier = g_new0(TCGIOMMUNotifier, 1); + g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i) = notifier; + + notifier->mr = mr; + notifier->iommu_idx = iommu_idx; + notifier->cpu = cpu; + /* Rather than trying to register interest in the specific part + * of the iommu's address space that we've accessed and then + * expand it later as subsequent accesses touch more of it, we + * just register interest in the whole thing, on the assumption + * that iommu reconfiguration will be rare. + */ + iommu_notifier_init(¬ifier->n, + tcg_iommu_unmap_notify, + IOMMU_NOTIFIER_UNMAP, + 0, + HWADDR_MAX, + iommu_idx); + memory_region_register_iommu_notifier(notifier->mr, ¬ifier->n); + } + + if (!notifier->active) { + notifier->active = true; + } +} + +static void tcg_iommu_free_notifier_list(CPUState *cpu) +{ + /* Destroy the CPU's notifier list */ + int i; + TCGIOMMUNotifier *notifier; + + for (i = 0; i < cpu->iommu_notifiers->len; i++) { + notifier = g_array_index(cpu->iommu_notifiers, TCGIOMMUNotifier *, i); + memory_region_unregister_iommu_notifier(notifier->mr, ¬ifier->n); + g_free(notifier); + } + g_array_free(cpu->iommu_notifiers, true); +} + /* Called from RCU critical section */ MemoryRegionSection * address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr, - hwaddr *xlat, hwaddr *plen) + hwaddr *xlat, hwaddr *plen, + MemTxAttrs attrs, int *prot) { MemoryRegionSection *section; + IOMMUMemoryRegion *iommu_mr; + IOMMUMemoryRegionClass *imrc; + IOMMUTLBEntry iotlb; + int iommu_idx; AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch); - section = address_space_translate_internal(d, addr, xlat, plen, false); + for (;;) { + section = address_space_translate_internal(d, addr, &addr, plen, false); + + iommu_mr = memory_region_get_iommu(section->mr); + if (!iommu_mr) { + break; + } + + imrc = memory_region_get_iommu_class_nocheck(iommu_mr); + + iommu_idx = imrc->attrs_to_index(iommu_mr, attrs); + tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx); + /* We need all the permissions, so pass IOMMU_NONE so the IOMMU + * doesn't short-cut its translation table walk. + */ + iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx); + addr = ((iotlb.translated_addr & ~iotlb.addr_mask) + | (addr & iotlb.addr_mask)); + /* Update the caller's prot bits to remove permissions the IOMMU + * is giving us a failure response for. If we get down to no + * permissions left at all we can give up now. + */ + if (!(iotlb.perm & IOMMU_RO)) { + *prot &= ~(PAGE_READ | PAGE_EXEC); + } + if (!(iotlb.perm & IOMMU_WO)) { + *prot &= ~PAGE_WRITE; + } + + if (!*prot) { + goto translate_fail; + } + + d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as)); + } assert(!memory_region_is_iommu(section->mr)); + *xlat = addr; return section; + +translate_fail: + return &d->map.sections[PHYS_SECTION_UNASSIGNED]; } #endif @@ -816,6 +929,9 @@ void cpu_exec_unrealizefn(CPUState *cpu) if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { vmstate_unregister(NULL, &vmstate_cpu_common, cpu); } +#ifndef CONFIG_USER_ONLY + tcg_iommu_free_notifier_list(cpu); +#endif } Property cpu_common_props[] = { @@ -855,6 +971,7 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) tcg_target_initialized = true; cc->tcg_initialize(); } + tlb_init(cpu); #ifndef CONFIG_USER_ONLY if (qdev_get_vmsd(DEVICE(cpu)) == NULL) { @@ -863,17 +980,23 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) if (cc->vmsd != NULL) { vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu); } + + cpu->iommu_notifiers = g_array_new(false, true, sizeof(TCGIOMMUNotifier *)); #endif } -const char *parse_cpu_model(const char *cpu_model) +const char *parse_cpu_option(const char *cpu_option) { ObjectClass *oc; CPUClass *cc; gchar **model_pieces; const char *cpu_type; - model_pieces = g_strsplit(cpu_model, ",", 2); + model_pieces = g_strsplit(cpu_option, ",", 2); + if (!model_pieces[0]) { + error_report("-cpu option cannot be empty"); + exit(1); + } oc = cpu_class_by_name(CPU_RESOLVING_TYPE, model_pieces[0]); if (oc == NULL) { @@ -890,15 +1013,40 @@ const char *parse_cpu_model(const char *cpu_model) } #if defined(CONFIG_USER_ONLY) -static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) +void tb_invalidate_phys_addr(target_ulong addr) { mmap_lock(); - tb_lock(); - tb_invalidate_phys_page_range(pc, pc + 1, 0); - tb_unlock(); + tb_invalidate_phys_page_range(addr, addr + 1, 0); mmap_unlock(); } + +static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) +{ + tb_invalidate_phys_addr(pc); +} #else +void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs) +{ + ram_addr_t ram_addr; + MemoryRegion *mr; + hwaddr l = 1; + + if (!tcg_enabled()) { + return; + } + + rcu_read_lock(); + mr = address_space_translate(as, addr, &addr, &l, false, attrs); + if (!(memory_region_is_ram(mr) + || memory_region_is_romd(mr))) { + rcu_read_unlock(); + return; + } + ram_addr = memory_region_get_ram_addr(mr) + addr; + tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0); + rcu_read_unlock(); +} + static void breakpoint_invalidate(CPUState *cpu, target_ulong pc) { MemTxAttrs attrs; @@ -1114,7 +1262,7 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) fprintf(stderr, "qemu: fatal: "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); - cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP); + cpu_dump_state(cpu, stderr, CPU_DUMP_FPU | CPU_DUMP_CCOP); if (qemu_log_separate()) { qemu_log_lock(); qemu_log("qemu: fatal: "); @@ -1133,6 +1281,7 @@ void cpu_abort(CPUState *cpu, const char *fmt, ...) struct sigaction act; sigfillset(&act.sa_mask); act.sa_handler = SIG_DFL; + act.sa_flags = 0; sigaction(SIGABRT, &act, NULL); } #endif @@ -1186,6 +1335,7 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) RAMBlock *block; ram_addr_t end; + assert(tcg_enabled()); end = TARGET_PAGE_ALIGN(start + length); start &= TARGET_PAGE_MASK; @@ -1207,6 +1357,8 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, DirtyMemoryBlocks *blocks; unsigned long end, page; bool dirty = false; + RAMBlock *ramblock; + uint64_t mr_offset, mr_size; if (length == 0) { return false; @@ -1218,6 +1370,10 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, rcu_read_lock(); blocks = atomic_rcu_read(&ram_list.dirty_memory[client]); + ramblock = qemu_get_ram_block(start); + /* Range sanity check on the ramblock */ + assert(start >= ramblock->offset && + start + length <= ramblock->offset + ramblock->used_length); while (page < end) { unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE; @@ -1229,6 +1385,10 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, page += num; } + mr_offset = (ram_addr_t)(page << TARGET_PAGE_BITS) - ramblock->offset; + mr_size = (end - page) << TARGET_PAGE_BITS; + memory_region_clear_dirty_bitmap(ramblock->mr, mr_offset, mr_size); + rcu_read_unlock(); if (dirty && tcg_enabled()) { @@ -1239,9 +1399,10 @@ bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, } DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty - (ram_addr_t start, ram_addr_t length, unsigned client) + (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client) { DirtyMemoryBlocks *blocks; + ram_addr_t start = memory_region_get_ram_addr(mr) + offset; unsigned long align = 1UL << (TARGET_PAGE_BITS + BITS_PER_LEVEL); ram_addr_t first = QEMU_ALIGN_DOWN(start, align); ram_addr_t last = QEMU_ALIGN_UP(start + length, align); @@ -1283,6 +1444,8 @@ DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty tlb_reset_dirty_range_all(start, length); } + memory_region_clear_dirty_bitmap(mr, offset, length); + return snap; } @@ -1456,35 +1619,49 @@ static void register_multipage(FlatView *fv, phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index); } +/* + * The range in *section* may look like this: + * + * |s|PPPPPPP|s| + * + * where s stands for subpage and P for page. + */ void flatview_add_to_dispatch(FlatView *fv, MemoryRegionSection *section) { - MemoryRegionSection now = *section, remain = *section; + MemoryRegionSection remain = *section; Int128 page_size = int128_make64(TARGET_PAGE_SIZE); - if (now.offset_within_address_space & ~TARGET_PAGE_MASK) { - uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space) - - now.offset_within_address_space; + /* register first subpage */ + if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) { + uint64_t left = TARGET_PAGE_ALIGN(remain.offset_within_address_space) + - remain.offset_within_address_space; + MemoryRegionSection now = remain; now.size = int128_min(int128_make64(left), now.size); register_subpage(fv, &now); - } else { - now.size = int128_zero(); - } - while (int128_ne(remain.size, now.size)) { + if (int128_eq(remain.size, now.size)) { + return; + } remain.size = int128_sub(remain.size, now.size); remain.offset_within_address_space += int128_get64(now.size); remain.offset_within_region += int128_get64(now.size); - now = remain; - if (int128_lt(remain.size, page_size)) { - register_subpage(fv, &now); - } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) { - now.size = page_size; - register_subpage(fv, &now); - } else { - now.size = int128_and(now.size, int128_neg(page_size)); - register_multipage(fv, &now); + } + + /* register whole pages */ + if (int128_ge(remain.size, page_size)) { + MemoryRegionSection now = remain; + now.size = int128_and(now.size, int128_neg(page_size)); + register_multipage(fv, &now); + if (int128_eq(remain.size, now.size)) { + return; } + remain.size = int128_sub(remain.size, now.size); + remain.offset_within_address_space += int128_get64(now.size); + remain.offset_within_region += int128_get64(now.size); } + + /* register last subpage */ + register_subpage(fv, &remain); } void qemu_flush_coalesced_mmio_buffer(void) @@ -1530,14 +1707,15 @@ void ram_block_dump(Monitor *mon) * when we actually open and map them. Iterate over the file * descriptors instead, and use qemu_fd_getpagesize(). */ -static int find_max_supported_pagesize(Object *obj, void *opaque) +static int find_min_backend_pagesize(Object *obj, void *opaque) { long *hpsize_min = opaque; if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { - long hpsize = host_memory_backend_pagesize(MEMORY_BACKEND(obj)); + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + long hpsize = host_memory_backend_pagesize(backend); - if (hpsize < *hpsize_min) { + if (host_memory_backend_is_mapped(backend) && (hpsize < *hpsize_min)) { *hpsize_min = hpsize; } } @@ -1545,7 +1723,27 @@ static int find_max_supported_pagesize(Object *obj, void *opaque) return 0; } -long qemu_getrampagesize(void) +static int find_max_backend_pagesize(Object *obj, void *opaque) +{ + long *hpsize_max = opaque; + + if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) { + HostMemoryBackend *backend = MEMORY_BACKEND(obj); + long hpsize = host_memory_backend_pagesize(backend); + + if (host_memory_backend_is_mapped(backend) && (hpsize > *hpsize_max)) { + *hpsize_max = hpsize; + } + } + + return 0; +} + +/* + * TODO: We assume right now that all mapped host memory backends are + * used as RAM, however some might be used for different purposes. + */ +long qemu_minrampagesize(void) { long hpsize = LONG_MAX; long mainrampagesize; @@ -1565,7 +1763,7 @@ long qemu_getrampagesize(void) */ memdev_root = object_resolve_path("/objects", NULL); if (memdev_root) { - object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize); + object_child_foreach(memdev_root, find_min_backend_pagesize, &hpsize); } if (hpsize == LONG_MAX) { /* No additional memory regions found ==> Report main RAM page size */ @@ -1588,14 +1786,30 @@ long qemu_getrampagesize(void) return hpsize; } + +long qemu_maxrampagesize(void) +{ + long pagesize = qemu_mempath_getpagesize(mem_path); + Object *memdev_root = object_resolve_path("/objects", NULL); + + if (memdev_root) { + object_child_foreach(memdev_root, find_max_backend_pagesize, + &pagesize); + } + return pagesize; +} #else -long qemu_getrampagesize(void) +long qemu_minrampagesize(void) +{ + return getpagesize(); +} +long qemu_maxrampagesize(void) { return getpagesize(); } #endif -#ifdef __linux__ +#ifdef CONFIG_POSIX static int64_t get_file_size(int fd) { int64_t size = lseek(fd, 0, SEEK_END); @@ -1672,6 +1886,7 @@ static void *file_ram_alloc(RAMBlock *block, bool truncate, Error **errp) { + MachineState *ms = MACHINE(qdev_get_machine()); void *area; block->page_size = qemu_fd_getpagesize(fd); @@ -1680,6 +1895,10 @@ static void *file_ram_alloc(RAMBlock *block, " must be multiples of page size 0x%zx", block->mr->align, block->page_size); return NULL; + } else if (block->mr->align && !is_power_of_2(block->mr->align)) { + error_setg(errp, "alignment 0x%" PRIx64 + " must be a power of two", block->mr->align); + return NULL; } block->mr->align = MAX(block->page_size, block->mr->align); #if defined(__s390x__) @@ -1716,7 +1935,7 @@ static void *file_ram_alloc(RAMBlock *block, } area = qemu_ram_mmap(fd, memory, block->mr->align, - block->flags & RAM_SHARED); + block->flags & RAM_SHARED, block->flags & RAM_PMEM); if (area == MAP_FAILED) { error_setg_errno(errp, errno, "unable to map backing store for guest RAM"); @@ -1724,9 +1943,9 @@ static void *file_ram_alloc(RAMBlock *block, } if (mem_prealloc) { - os_mem_prealloc(fd, area, memory, smp_cpus, errp); + os_mem_prealloc(fd, area, memory, ms->smp.cpus, errp); if (errp && *errp) { - qemu_ram_munmap(area, memory); + qemu_ram_munmap(fd, area, memory); return NULL; } } @@ -1792,7 +2011,7 @@ static ram_addr_t find_ram_offset(ram_addr_t size) return offset; } -unsigned long last_ram_page(void) +static unsigned long last_ram_page(void) { RAMBlock *block; ram_addr_t last = 0; @@ -1825,6 +2044,21 @@ const char *qemu_ram_get_idstr(RAMBlock *rb) return rb->idstr; } +void *qemu_ram_get_host_addr(RAMBlock *rb) +{ + return rb->host; +} + +ram_addr_t qemu_ram_get_offset(RAMBlock *rb) +{ + return rb->offset; +} + +ram_addr_t qemu_ram_get_used_length(RAMBlock *rb) +{ + return rb->used_length; +} + bool qemu_ram_is_shared(RAMBlock *rb) { return rb->flags & RAM_SHARED; @@ -2087,15 +2321,18 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared) } } -#ifdef __linux__ +#ifdef CONFIG_POSIX RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, - bool share, int fd, + uint32_t ram_flags, int fd, Error **errp) { RAMBlock *new_block; Error *local_err = NULL; int64_t file_size; + /* Just support these ram flags by now. */ + assert((ram_flags & ~(RAM_SHARED | RAM_PMEM)) == 0); + if (xen_enabled()) { error_setg(errp, "-mem-path not supported with Xen"); return NULL; @@ -2131,14 +2368,14 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, new_block->mr = mr; new_block->used_length = size; new_block->max_length = size; - new_block->flags = share ? RAM_SHARED : 0; + new_block->flags = ram_flags; new_block->host = file_ram_alloc(new_block, size, fd, !file_size, errp); if (!new_block->host) { g_free(new_block); return NULL; } - ram_block_add(new_block, &local_err, share); + ram_block_add(new_block, &local_err, ram_flags & RAM_SHARED); if (local_err) { g_free(new_block); error_propagate(errp, local_err); @@ -2150,7 +2387,7 @@ RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr, RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, - bool share, const char *mem_path, + uint32_t ram_flags, const char *mem_path, Error **errp) { int fd; @@ -2162,7 +2399,7 @@ RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr, return NULL; } - block = qemu_ram_alloc_from_fd(size, mr, share, fd, errp); + block = qemu_ram_alloc_from_fd(size, mr, ram_flags, fd, errp); if (!block) { if (created) { unlink(mem_path); @@ -2244,7 +2481,7 @@ static void reclaim_ramblock(RAMBlock *block) xen_invalidate_map_cache_entry(block->host); #ifndef _WIN32 } else if (block->fd >= 0) { - qemu_ram_munmap(block->host, block->max_length); + qemu_ram_munmap(block->fd, block->host, block->max_length); close(block->fd); #endif } else { @@ -2505,21 +2742,22 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi, ndi->ram_addr = ram_addr; ndi->mem_vaddr = mem_vaddr; ndi->size = size; - ndi->locked = false; + ndi->pages = NULL; assert(tcg_enabled()); if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) { - ndi->locked = true; - tb_lock(); - tb_invalidate_phys_page_fast(ram_addr, size); + ndi->pages = page_collection_lock(ram_addr, ram_addr + size); + tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size); } } /* Called within RCU critical section. */ void memory_notdirty_write_complete(NotDirtyInfo *ndi) { - if (ndi->locked) { - tb_unlock(); + if (ndi->pages) { + assert(tcg_enabled()); + page_collection_unlock(ndi->pages); + ndi->pages = NULL; } /* Set both VGA and migration bits for simplicity and to remove @@ -2543,22 +2781,7 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr, memory_notdirty_write_prepare(&ndi, current_cpu, current_cpu->mem_io_vaddr, ram_addr, size); - switch (size) { - case 1: - stb_p(qemu_map_ram_ptr(NULL, ram_addr), val); - break; - case 2: - stw_p(qemu_map_ram_ptr(NULL, ram_addr), val); - break; - case 4: - stl_p(qemu_map_ram_ptr(NULL, ram_addr), val); - break; - case 8: - stq_p(qemu_map_ram_ptr(NULL, ram_addr), val); - break; - default: - abort(); - } + stn_p(qemu_map_ram_ptr(NULL, ram_addr), size, val); memory_notdirty_write_complete(&ndi); } @@ -2621,18 +2844,16 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags) } cpu->watchpoint_hit = wp; - /* Both tb_lock and iothread_mutex will be reset when - * cpu_loop_exit or cpu_loop_exit_noexc longjmp - * back into the cpu_exec main loop. - */ - tb_lock(); + mmap_lock(); tb_check_watchpoint(cpu); if (wp->flags & BP_STOP_BEFORE_ACCESS) { cpu->exception_index = EXCP_DEBUG; + mmap_unlock(); cpu_loop_exit(cpu); } else { /* Force execution of one insn next time. */ cpu->cflags_next_tb = 1 | curr_cflags(); + mmap_unlock(); cpu_loop_exit_noexc(cpu); } } @@ -2717,10 +2938,10 @@ static const MemoryRegionOps watch_mem_ops = { }; static MemTxResult flatview_read(FlatView *fv, hwaddr addr, - MemTxAttrs attrs, uint8_t *buf, int len); + MemTxAttrs attrs, uint8_t *buf, hwaddr len); static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, - const uint8_t *buf, int len); -static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len, + const uint8_t *buf, hwaddr len); +static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, bool is_write, MemTxAttrs attrs); static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data, @@ -2738,22 +2959,8 @@ static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data, if (res) { return res; } - switch (len) { - case 1: - *data = ldub_p(buf); - return MEMTX_OK; - case 2: - *data = lduw_p(buf); - return MEMTX_OK; - case 4: - *data = ldl_p(buf); - return MEMTX_OK; - case 8: - *data = ldq_p(buf); - return MEMTX_OK; - default: - abort(); - } + *data = ldn_p(buf, len); + return MEMTX_OK; } static MemTxResult subpage_write(void *opaque, hwaddr addr, @@ -2767,22 +2974,7 @@ static MemTxResult subpage_write(void *opaque, hwaddr addr, " value %"PRIx64"\n", __func__, subpage, len, addr, value); #endif - switch (len) { - case 1: - stb_p(buf, value); - break; - case 2: - stw_p(buf, value); - break; - case 4: - stl_p(buf, value); - break; - case 8: - stq_p(buf, value); - break; - default: - abort(); - } + stn_p(buf, len, value); return flatview_write(subpage->fv, addr + subpage->base, attrs, buf, len); } @@ -2896,14 +3088,15 @@ static const MemoryRegionOps readonly_mem_ops = { }, }; -MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs) +MemoryRegionSection *iotlb_to_section(CPUState *cpu, + hwaddr index, MemTxAttrs attrs) { int asidx = cpu_asidx_from_attrs(cpu, attrs); CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx]; AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch); MemoryRegionSection *sections = d->map.sections; - return sections[index & ~TARGET_PAGE_MASK].mr; + return §ions[index & ~TARGET_PAGE_MASK]; } static void io_mem_init(void) @@ -2954,6 +3147,7 @@ static void tcg_commit(MemoryListener *listener) CPUAddressSpace *cpuas; AddressSpaceDispatch *d; + assert(tcg_enabled()); /* since each CPU stores ram addresses in its TLB cache, we must reset the modified entries */ cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener); @@ -2995,10 +3189,10 @@ MemoryRegion *get_system_io(void) /* physical memory access (slow version, mainly for debug) */ #if defined(CONFIG_USER_ONLY) int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, - uint8_t *buf, int len, int is_write) + uint8_t *buf, target_ulong len, int is_write) { - int l, flags; - target_ulong page; + int flags; + target_ulong l, page; void * p; while (len > 0) { @@ -3051,14 +3245,25 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, } if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { assert(tcg_enabled()); - tb_lock(); tb_invalidate_phys_range(addr, addr + length); - tb_unlock(); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); } cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); } +void memory_region_flush_rom_device(MemoryRegion *mr, hwaddr addr, hwaddr size) +{ + /* + * In principle this function would work on other memory region types too, + * but the ROM device use case is the only one where this operation is + * necessary. Other memory regions should use the + * address_space_read/write() APIs. + */ + assert(memory_region_is_romd(mr)); + + invalidate_and_set_dirty(mr, addr, size); +} + static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) { unsigned access_size_max = mr->ops->valid.max_access_size; @@ -3113,7 +3318,7 @@ static bool prepare_mmio_access(MemoryRegion *mr) static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, MemTxAttrs attrs, const uint8_t *buf, - int len, hwaddr addr1, + hwaddr len, hwaddr addr1, hwaddr l, MemoryRegion *mr) { uint8_t *ptr; @@ -3127,34 +3332,8 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, l = memory_access_size(mr, l, addr1); /* XXX: could force current_cpu to NULL to avoid potential bugs */ - switch (l) { - case 8: - /* 64 bit write access */ - val = ldq_p(buf); - result |= memory_region_dispatch_write(mr, addr1, val, 8, - attrs); - break; - case 4: - /* 32 bit write access */ - val = (uint32_t)ldl_p(buf); - result |= memory_region_dispatch_write(mr, addr1, val, 4, - attrs); - break; - case 2: - /* 16 bit write access */ - val = lduw_p(buf); - result |= memory_region_dispatch_write(mr, addr1, val, 2, - attrs); - break; - case 1: - /* 8 bit write access */ - val = ldub_p(buf); - result |= memory_region_dispatch_write(mr, addr1, val, 1, - attrs); - break; - default: - abort(); - } + val = ldn_p(buf, l); + result |= memory_region_dispatch_write(mr, addr1, val, l, attrs); } else { /* RAM case */ ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); @@ -3184,7 +3363,7 @@ static MemTxResult flatview_write_continue(FlatView *fv, hwaddr addr, /* Called from RCU critical section. */ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, - const uint8_t *buf, int len) + const uint8_t *buf, hwaddr len) { hwaddr l; hwaddr addr1; @@ -3202,7 +3381,7 @@ static MemTxResult flatview_write(FlatView *fv, hwaddr addr, MemTxAttrs attrs, /* Called within RCU critical section. */ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, MemTxAttrs attrs, uint8_t *buf, - int len, hwaddr addr1, hwaddr l, + hwaddr len, hwaddr addr1, hwaddr l, MemoryRegion *mr) { uint8_t *ptr; @@ -3215,34 +3394,8 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, /* I/O case */ release_lock |= prepare_mmio_access(mr); l = memory_access_size(mr, l, addr1); - switch (l) { - case 8: - /* 64 bit read access */ - result |= memory_region_dispatch_read(mr, addr1, &val, 8, - attrs); - stq_p(buf, val); - break; - case 4: - /* 32 bit read access */ - result |= memory_region_dispatch_read(mr, addr1, &val, 4, - attrs); - stl_p(buf, val); - break; - case 2: - /* 16 bit read access */ - result |= memory_region_dispatch_read(mr, addr1, &val, 2, - attrs); - stw_p(buf, val); - break; - case 1: - /* 8 bit read access */ - result |= memory_region_dispatch_read(mr, addr1, &val, 1, - attrs); - stb_p(buf, val); - break; - default: - abort(); - } + result |= memory_region_dispatch_read(mr, addr1, &val, l, attrs); + stn_p(buf, l, val); } else { /* RAM case */ ptr = qemu_ram_ptr_length(mr->ram_block, addr1, &l, false); @@ -3271,7 +3424,7 @@ MemTxResult flatview_read_continue(FlatView *fv, hwaddr addr, /* Called from RCU critical section. */ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, - MemTxAttrs attrs, uint8_t *buf, int len) + MemTxAttrs attrs, uint8_t *buf, hwaddr len) { hwaddr l; hwaddr addr1; @@ -3284,7 +3437,7 @@ static MemTxResult flatview_read(FlatView *fv, hwaddr addr, } MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, - MemTxAttrs attrs, uint8_t *buf, int len) + MemTxAttrs attrs, uint8_t *buf, hwaddr len) { MemTxResult result = MEMTX_OK; FlatView *fv; @@ -3301,7 +3454,7 @@ MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr, MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, - const uint8_t *buf, int len) + const uint8_t *buf, hwaddr len) { MemTxResult result = MEMTX_OK; FlatView *fv; @@ -3317,7 +3470,7 @@ MemTxResult address_space_write(AddressSpace *as, hwaddr addr, } MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, - uint8_t *buf, int len, bool is_write) + uint8_t *buf, hwaddr len, bool is_write) { if (is_write) { return address_space_write(as, addr, attrs, buf, len); @@ -3327,7 +3480,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, } void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, - int len, int is_write) + hwaddr len, int is_write) { address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED, buf, len, is_write); @@ -3338,8 +3491,12 @@ enum write_rom_type { FLUSH_CACHE, }; -static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, - hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type) +static inline MemTxResult address_space_write_rom_internal(AddressSpace *as, + hwaddr addr, + MemTxAttrs attrs, + const uint8_t *buf, + hwaddr len, + enum write_rom_type type) { hwaddr l; uint8_t *ptr; @@ -3349,8 +3506,7 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, rcu_read_lock(); while (len > 0) { l = len; - mr = address_space_translate(as, addr, &addr1, &l, true, - MEMTXATTRS_UNSPECIFIED); + mr = address_space_translate(as, addr, &addr1, &l, true, attrs); if (!(memory_region_is_ram(mr) || memory_region_is_romd(mr))) { @@ -3373,16 +3529,19 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, addr += l; } rcu_read_unlock(); + return MEMTX_OK; } /* used for ROM loading : can write in RAM and ROM */ -void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr, - const uint8_t *buf, int len) +MemTxResult address_space_write_rom(AddressSpace *as, hwaddr addr, + MemTxAttrs attrs, + const uint8_t *buf, hwaddr len) { - cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA); + return address_space_write_rom_internal(as, addr, attrs, + buf, len, WRITE_DATA); } -void cpu_flush_icache_range(hwaddr start, int len) +void cpu_flush_icache_range(hwaddr start, hwaddr len) { /* * This function should do the same thing as an icache flush that was @@ -3394,8 +3553,9 @@ void cpu_flush_icache_range(hwaddr start, int len) return; } - cpu_physical_memory_write_rom_internal(&address_space_memory, - start, NULL, len, FLUSH_CACHE); + address_space_write_rom_internal(&address_space_memory, + start, MEMTXATTRS_UNSPECIFIED, + NULL, len, FLUSH_CACHE); } typedef struct { @@ -3414,7 +3574,7 @@ typedef struct MapClient { } MapClient; QemuMutex map_client_list_lock; -static QLIST_HEAD(map_client_list, MapClient) map_client_list +static QLIST_HEAD(, MapClient) map_client_list = QLIST_HEAD_INITIALIZER(map_client_list); static void cpu_unregister_map_client_do(MapClient *client) @@ -3484,7 +3644,7 @@ static void cpu_notify_map_clients(void) qemu_mutex_unlock(&map_client_list_lock); } -static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len, +static bool flatview_access_valid(FlatView *fv, hwaddr addr, hwaddr len, bool is_write, MemTxAttrs attrs) { MemoryRegion *mr; @@ -3507,7 +3667,7 @@ static bool flatview_access_valid(FlatView *fv, hwaddr addr, int len, } bool address_space_access_valid(AddressSpace *as, hwaddr addr, - int len, bool is_write, + hwaddr len, bool is_write, MemTxAttrs attrs) { FlatView *fv; @@ -3659,9 +3819,6 @@ void cpu_physical_memory_unmap(void *buffer, hwaddr len, #define ARG1 as #define SUFFIX #define TRANSLATE(...) address_space_translate(as, __VA_ARGS__) -#define IS_DIRECT(mr, is_write) memory_access_is_direct(mr, is_write) -#define MAP_RAM(mr, ofs) qemu_map_ram_ptr((mr)->ram_block, ofs) -#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len) #define RCU_READ_LOCK(...) rcu_read_lock() #define RCU_READ_UNLOCK(...) rcu_read_unlock() #include "memory_ldst.inc.c" @@ -3763,7 +3920,7 @@ static inline MemoryRegion *address_space_translate_cached( */ void address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, - void *buf, int len) + void *buf, hwaddr len) { hwaddr addr1, l; MemoryRegion *mr; @@ -3781,7 +3938,7 @@ address_space_read_cached_slow(MemoryRegionCache *cache, hwaddr addr, */ void address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, - const void *buf, int len) + const void *buf, hwaddr len) { hwaddr addr1, l; MemoryRegion *mr; @@ -3798,20 +3955,16 @@ address_space_write_cached_slow(MemoryRegionCache *cache, hwaddr addr, #define ARG1 cache #define SUFFIX _cached_slow #define TRANSLATE(...) address_space_translate_cached(cache, __VA_ARGS__) -#define IS_DIRECT(mr, is_write) memory_access_is_direct(mr, is_write) -#define MAP_RAM(mr, ofs) (cache->ptr + (ofs - cache->xlat)) -#define INVALIDATE(mr, ofs, len) invalidate_and_set_dirty(mr, ofs, len) #define RCU_READ_LOCK() ((void)0) #define RCU_READ_UNLOCK() ((void)0) #include "memory_ldst.inc.c" /* virtual memory access for debug (includes writing to ROM) */ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, - uint8_t *buf, int len, int is_write) + uint8_t *buf, target_ulong len, int is_write) { - int l; hwaddr phys_addr; - target_ulong page; + target_ulong l, page; cpu_synchronize_state(cpu); while (len > 0) { @@ -3829,12 +3982,11 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, l = len; phys_addr += (addr & ~TARGET_PAGE_MASK); if (is_write) { - cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as, - phys_addr, buf, l); + address_space_write_rom(cpu->cpu_ases[asidx].as, phys_addr, + attrs, buf, l); } else { address_space_rw(cpu->cpu_ases[asidx].as, phys_addr, - MEMTXATTRS_UNSPECIFIED, - buf, l, 0); + attrs, buf, l, 0); } len -= l; buf += l; @@ -3863,11 +4015,6 @@ int qemu_target_page_bits_min(void) } #endif -/* - * A helper function for the _utterly broken_ virtio device model to find out if - * it's running on a big endian machine. Don't do this at home kids! - */ -bool target_words_bigendian(void); bool target_words_bigendian(void) { #if defined(TARGET_WORDS_BIGENDIAN) @@ -3901,28 +4048,7 @@ int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque) rcu_read_lock(); RAMBLOCK_FOREACH(block) { - ret = func(block->idstr, block->host, block->offset, - block->used_length, opaque); - if (ret) { - break; - } - } - rcu_read_unlock(); - return ret; -} - -int qemu_ram_foreach_migratable_block(RAMBlockIterFunc func, void *opaque) -{ - RAMBlock *block; - int ret = 0; - - rcu_read_lock(); - RAMBLOCK_FOREACH(block) { - if (!qemu_ram_is_migratable(block)) { - continue; - } - ret = func(block->idstr, block->host, block->offset, - block->used_length, opaque); + ret = func(block, opaque); if (ret) { break; } @@ -4026,6 +4152,11 @@ err: return ret; } +bool ramblock_is_pmem(RAMBlock *rb) +{ + return rb->flags & RAM_PMEM; +} + #endif void page_size_init(void) @@ -4043,42 +4174,41 @@ void page_size_init(void) #if !defined(CONFIG_USER_ONLY) -static void mtree_print_phys_entries(fprintf_function mon, void *f, - int start, int end, int skip, int ptr) +static void mtree_print_phys_entries(int start, int end, int skip, int ptr) { if (start == end - 1) { - mon(f, "\t%3d ", start); + qemu_printf("\t%3d ", start); } else { - mon(f, "\t%3d..%-3d ", start, end - 1); + qemu_printf("\t%3d..%-3d ", start, end - 1); } - mon(f, " skip=%d ", skip); + qemu_printf(" skip=%d ", skip); if (ptr == PHYS_MAP_NODE_NIL) { - mon(f, " ptr=NIL"); + qemu_printf(" ptr=NIL"); } else if (!skip) { - mon(f, " ptr=#%d", ptr); + qemu_printf(" ptr=#%d", ptr); } else { - mon(f, " ptr=[%d]", ptr); + qemu_printf(" ptr=[%d]", ptr); } - mon(f, "\n"); + qemu_printf("\n"); } #define MR_SIZE(size) (int128_nz(size) ? (hwaddr)int128_get64( \ int128_sub((size), int128_one())) : 0) -void mtree_print_dispatch(fprintf_function mon, void *f, - AddressSpaceDispatch *d, MemoryRegion *root) +void mtree_print_dispatch(AddressSpaceDispatch *d, MemoryRegion *root) { int i; - mon(f, " Dispatch\n"); - mon(f, " Physical sections\n"); + qemu_printf(" Dispatch\n"); + qemu_printf(" Physical sections\n"); for (i = 0; i < d->map.sections_nb; ++i) { MemoryRegionSection *s = d->map.sections + i; const char *names[] = { " [unassigned]", " [not dirty]", " [ROM]", " [watch]" }; - mon(f, " #%d @" TARGET_FMT_plx ".." TARGET_FMT_plx " %s%s%s%s%s", + qemu_printf(" #%d @" TARGET_FMT_plx ".." TARGET_FMT_plx + " %s%s%s%s%s", i, s->offset_within_address_space, s->offset_within_address_space + MR_SIZE(s->mr->size), @@ -4089,20 +4219,20 @@ void mtree_print_dispatch(fprintf_function mon, void *f, s->mr->is_iommu ? " [iommu]" : ""); if (s->mr->alias) { - mon(f, " alias=%s", s->mr->alias->name ? + qemu_printf(" alias=%s", s->mr->alias->name ? s->mr->alias->name : "noname"); } - mon(f, "\n"); + qemu_printf("\n"); } - mon(f, " Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n", + qemu_printf(" Nodes (%d bits per level, %d levels) ptr=[%d] skip=%d\n", P_L2_BITS, P_L2_LEVELS, d->phys_map.ptr, d->phys_map.skip); for (i = 0; i < d->map.nodes_nb; ++i) { int j, jprev; PhysPageEntry prev; Node *n = d->map.nodes + i; - mon(f, " [%d]\n", i); + qemu_printf(" [%d]\n", i); for (j = 0, jprev = 0, prev = *n[0]; j < ARRAY_SIZE(*n); ++j) { PhysPageEntry *pe = *n + j; @@ -4111,14 +4241,14 @@ void mtree_print_dispatch(fprintf_function mon, void *f, continue; } - mtree_print_phys_entries(mon, f, jprev, j, prev.skip, prev.ptr); + mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr); jprev = j; prev = *pe; } if (jprev != ARRAY_SIZE(*n)) { - mtree_print_phys_entries(mon, f, jprev, j, prev.skip, prev.ptr); + mtree_print_phys_entries(jprev, j, prev.skip, prev.ptr); } } }