4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
53 #include "exec/ram_addr.h"
54 #include "qemu/cache-utils.h"
56 #include "qemu/range.h"
58 //#define DEBUG_SUBPAGE
60 #if !defined(CONFIG_USER_ONLY)
61 static bool in_migration;
63 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
65 static MemoryRegion *system_memory;
66 static MemoryRegion *system_io;
68 AddressSpace address_space_io;
69 AddressSpace address_space_memory;
71 MemoryRegion io_mem_rom, io_mem_notdirty;
72 static MemoryRegion io_mem_unassigned;
76 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
77 /* current CPU in the current thread. It is only valid inside
79 DEFINE_TLS(CPUState *, current_cpu);
80 /* 0 = Do not count executed instructions.
81 1 = Precise instruction counting.
82 2 = Adaptive rate instruction counting. */
85 #if !defined(CONFIG_USER_ONLY)
87 typedef struct PhysPageEntry PhysPageEntry;
89 struct PhysPageEntry {
90 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
92 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
96 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
98 /* Size of the L2 (and L3, etc) page tables. */
99 #define ADDR_SPACE_BITS 64
102 #define P_L2_SIZE (1 << P_L2_BITS)
104 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
106 typedef PhysPageEntry Node[P_L2_SIZE];
108 typedef struct PhysPageMap {
109 unsigned sections_nb;
110 unsigned sections_nb_alloc;
112 unsigned nodes_nb_alloc;
114 MemoryRegionSection *sections;
117 struct AddressSpaceDispatch {
118 /* This is a multi-level map on the physical address space.
119 * The bottom level has pointers to MemoryRegionSections.
121 PhysPageEntry phys_map;
126 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
127 typedef struct subpage_t {
131 uint16_t sub_section[TARGET_PAGE_SIZE];
134 #define PHYS_SECTION_UNASSIGNED 0
135 #define PHYS_SECTION_NOTDIRTY 1
136 #define PHYS_SECTION_ROM 2
137 #define PHYS_SECTION_WATCH 3
139 static void io_mem_init(void);
140 static void memory_map_init(void);
142 static MemoryRegion io_mem_watch;
145 #if !defined(CONFIG_USER_ONLY)
147 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
149 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
151 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
152 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
156 static uint32_t phys_map_node_alloc(PhysPageMap *map)
161 ret = map->nodes_nb++;
162 assert(ret != PHYS_MAP_NODE_NIL);
163 assert(ret != map->nodes_nb_alloc);
164 for (i = 0; i < P_L2_SIZE; ++i) {
165 map->nodes[ret][i].skip = 1;
166 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
171 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
172 hwaddr *index, hwaddr *nb, uint16_t leaf,
177 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
179 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
180 lp->ptr = phys_map_node_alloc(map);
181 p = map->nodes[lp->ptr];
183 for (i = 0; i < P_L2_SIZE; i++) {
185 p[i].ptr = PHYS_SECTION_UNASSIGNED;
189 p = map->nodes[lp->ptr];
191 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
193 while (*nb && lp < &p[P_L2_SIZE]) {
194 if ((*index & (step - 1)) == 0 && *nb >= step) {
200 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
206 static void phys_page_set(AddressSpaceDispatch *d,
207 hwaddr index, hwaddr nb,
210 /* Wildly overreserve - it doesn't matter much. */
211 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
213 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
216 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
217 * and update our entry so we can skip it and go directly to the destination.
219 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
221 unsigned valid_ptr = P_L2_SIZE;
226 if (lp->ptr == PHYS_MAP_NODE_NIL) {
231 for (i = 0; i < P_L2_SIZE; i++) {
232 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
239 phys_page_compact(&p[i], nodes, compacted);
243 /* We can only compress if there's only one child. */
248 assert(valid_ptr < P_L2_SIZE);
250 /* Don't compress if it won't fit in the # of bits we have. */
251 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
255 lp->ptr = p[valid_ptr].ptr;
256 if (!p[valid_ptr].skip) {
257 /* If our only child is a leaf, make this a leaf. */
258 /* By design, we should have made this node a leaf to begin with so we
259 * should never reach here.
260 * But since it's so simple to handle this, let's do it just in case we
265 lp->skip += p[valid_ptr].skip;
269 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
271 DECLARE_BITMAP(compacted, nodes_nb);
273 if (d->phys_map.skip) {
274 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
278 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
279 Node *nodes, MemoryRegionSection *sections)
282 hwaddr index = addr >> TARGET_PAGE_BITS;
285 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
286 if (lp.ptr == PHYS_MAP_NODE_NIL) {
287 return §ions[PHYS_SECTION_UNASSIGNED];
290 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
293 if (sections[lp.ptr].size.hi ||
294 range_covers_byte(sections[lp.ptr].offset_within_address_space,
295 sections[lp.ptr].size.lo, addr)) {
296 return §ions[lp.ptr];
298 return §ions[PHYS_SECTION_UNASSIGNED];
302 bool memory_region_is_unassigned(MemoryRegion *mr)
304 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
305 && mr != &io_mem_watch;
308 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
310 bool resolve_subpage)
312 MemoryRegionSection *section;
315 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
316 if (resolve_subpage && section->mr->subpage) {
317 subpage = container_of(section->mr, subpage_t, iomem);
318 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
323 static MemoryRegionSection *
324 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
325 hwaddr *plen, bool resolve_subpage)
327 MemoryRegionSection *section;
328 Int128 diff, diff_page;
330 section = address_space_lookup_region(d, addr, resolve_subpage);
331 /* Compute offset within MemoryRegionSection */
332 addr -= section->offset_within_address_space;
334 /* Compute offset within MemoryRegion */
335 *xlat = addr + section->offset_within_region;
337 diff_page = int128_make64(((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr);
338 diff = int128_sub(section->mr->size, int128_make64(addr));
339 diff = int128_min(diff, diff_page);
340 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
344 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
345 hwaddr *xlat, hwaddr *plen,
349 MemoryRegionSection *section;
354 section = address_space_translate_internal(as->dispatch, addr, &addr, &len, true);
357 if (!mr->iommu_ops) {
361 iotlb = mr->iommu_ops->translate(mr, addr);
362 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
363 | (addr & iotlb.addr_mask));
364 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
365 if (!(iotlb.perm & (1 << is_write))) {
366 mr = &io_mem_unassigned;
370 as = iotlb.target_as;
378 MemoryRegionSection *
379 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
382 MemoryRegionSection *section;
383 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
385 assert(!section->mr->iommu_ops);
390 void cpu_exec_init_all(void)
392 #if !defined(CONFIG_USER_ONLY)
393 qemu_mutex_init(&ram_list.mutex);
399 #if !defined(CONFIG_USER_ONLY)
401 static int cpu_common_post_load(void *opaque, int version_id)
403 CPUState *cpu = opaque;
405 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
406 version_id is increased. */
407 cpu->interrupt_request &= ~0x01;
408 tlb_flush(cpu->env_ptr, 1);
413 const VMStateDescription vmstate_cpu_common = {
414 .name = "cpu_common",
416 .minimum_version_id = 1,
417 .minimum_version_id_old = 1,
418 .post_load = cpu_common_post_load,
419 .fields = (VMStateField []) {
420 VMSTATE_UINT32(halted, CPUState),
421 VMSTATE_UINT32(interrupt_request, CPUState),
422 VMSTATE_END_OF_LIST()
428 CPUState *qemu_get_cpu(int index)
433 if (cpu->cpu_index == index) {
441 void cpu_exec_init(CPUArchState *env)
443 CPUState *cpu = ENV_GET_CPU(env);
444 CPUClass *cc = CPU_GET_CLASS(cpu);
448 #if defined(CONFIG_USER_ONLY)
452 CPU_FOREACH(some_cpu) {
455 cpu->cpu_index = cpu_index;
457 QTAILQ_INIT(&env->breakpoints);
458 QTAILQ_INIT(&env->watchpoints);
459 #ifndef CONFIG_USER_ONLY
460 cpu->thread_id = qemu_get_thread_id();
462 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
463 #if defined(CONFIG_USER_ONLY)
466 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
467 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
469 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
470 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
471 cpu_save, cpu_load, env);
472 assert(cc->vmsd == NULL);
473 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
475 if (cc->vmsd != NULL) {
476 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
480 #if defined(TARGET_HAS_ICE)
481 #if defined(CONFIG_USER_ONLY)
482 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
484 tb_invalidate_phys_page_range(pc, pc + 1, 0);
487 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
489 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
491 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
495 #endif /* TARGET_HAS_ICE */
497 #if defined(CONFIG_USER_ONLY)
498 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
503 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
504 int flags, CPUWatchpoint **watchpoint)
509 /* Add a watchpoint. */
510 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
511 int flags, CPUWatchpoint **watchpoint)
513 target_ulong len_mask = ~(len - 1);
516 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
517 if ((len & (len - 1)) || (addr & ~len_mask) ||
518 len == 0 || len > TARGET_PAGE_SIZE) {
519 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
520 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
523 wp = g_malloc(sizeof(*wp));
526 wp->len_mask = len_mask;
529 /* keep all GDB-injected watchpoints in front */
531 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
533 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
535 tlb_flush_page(env, addr);
542 /* Remove a specific watchpoint. */
543 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
546 target_ulong len_mask = ~(len - 1);
549 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
550 if (addr == wp->vaddr && len_mask == wp->len_mask
551 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
552 cpu_watchpoint_remove_by_ref(env, wp);
559 /* Remove a specific watchpoint by reference. */
560 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
562 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
564 tlb_flush_page(env, watchpoint->vaddr);
569 /* Remove all matching watchpoints. */
570 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
572 CPUWatchpoint *wp, *next;
574 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
575 if (wp->flags & mask)
576 cpu_watchpoint_remove_by_ref(env, wp);
581 /* Add a breakpoint. */
582 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
583 CPUBreakpoint **breakpoint)
585 #if defined(TARGET_HAS_ICE)
588 bp = g_malloc(sizeof(*bp));
593 /* keep all GDB-injected breakpoints in front */
594 if (flags & BP_GDB) {
595 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
597 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
600 breakpoint_invalidate(ENV_GET_CPU(env), pc);
611 /* Remove a specific breakpoint. */
612 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
614 #if defined(TARGET_HAS_ICE)
617 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
618 if (bp->pc == pc && bp->flags == flags) {
619 cpu_breakpoint_remove_by_ref(env, bp);
629 /* Remove a specific breakpoint by reference. */
630 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
632 #if defined(TARGET_HAS_ICE)
633 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
635 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
641 /* Remove all matching breakpoints. */
642 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
644 #if defined(TARGET_HAS_ICE)
645 CPUBreakpoint *bp, *next;
647 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
648 if (bp->flags & mask)
649 cpu_breakpoint_remove_by_ref(env, bp);
654 /* enable or disable single step mode. EXCP_DEBUG is returned by the
655 CPU loop after each instruction */
656 void cpu_single_step(CPUState *cpu, int enabled)
658 #if defined(TARGET_HAS_ICE)
659 if (cpu->singlestep_enabled != enabled) {
660 cpu->singlestep_enabled = enabled;
662 kvm_update_guest_debug(cpu, 0);
664 /* must flush all the translated code to avoid inconsistencies */
665 /* XXX: only flush what is necessary */
666 CPUArchState *env = cpu->env_ptr;
673 void cpu_abort(CPUArchState *env, const char *fmt, ...)
675 CPUState *cpu = ENV_GET_CPU(env);
681 fprintf(stderr, "qemu: fatal: ");
682 vfprintf(stderr, fmt, ap);
683 fprintf(stderr, "\n");
684 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
685 if (qemu_log_enabled()) {
686 qemu_log("qemu: fatal: ");
687 qemu_log_vprintf(fmt, ap2);
689 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
695 #if defined(CONFIG_USER_ONLY)
697 struct sigaction act;
698 sigfillset(&act.sa_mask);
699 act.sa_handler = SIG_DFL;
700 sigaction(SIGABRT, &act, NULL);
706 #if !defined(CONFIG_USER_ONLY)
707 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
711 /* The list is protected by the iothread lock here. */
712 block = ram_list.mru_block;
713 if (block && addr - block->offset < block->length) {
716 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
717 if (addr - block->offset < block->length) {
722 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
726 ram_list.mru_block = block;
730 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
736 end = TARGET_PAGE_ALIGN(start + length);
737 start &= TARGET_PAGE_MASK;
739 block = qemu_get_ram_block(start);
740 assert(block == qemu_get_ram_block(end - 1));
741 start1 = (uintptr_t)block->host + (start - block->offset);
742 cpu_tlb_reset_dirty_all(start1, length);
745 /* Note: start and end must be within the same ram block. */
746 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
751 cpu_physical_memory_clear_dirty_range(start, length, client);
754 tlb_reset_dirty_range_all(start, length);
758 static void cpu_physical_memory_set_dirty_tracking(bool enable)
760 in_migration = enable;
763 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
764 MemoryRegionSection *section,
766 hwaddr paddr, hwaddr xlat,
768 target_ulong *address)
773 if (memory_region_is_ram(section->mr)) {
775 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
777 if (!section->readonly) {
778 iotlb |= PHYS_SECTION_NOTDIRTY;
780 iotlb |= PHYS_SECTION_ROM;
783 iotlb = section - address_space_memory.dispatch->map.sections;
787 /* Make accesses to pages with watchpoints go via the
788 watchpoint trap routines. */
789 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
790 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
791 /* Avoid trapping reads of pages with a write breakpoint. */
792 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
793 iotlb = PHYS_SECTION_WATCH + paddr;
794 *address |= TLB_MMIO;
802 #endif /* defined(CONFIG_USER_ONLY) */
804 #if !defined(CONFIG_USER_ONLY)
806 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
808 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
810 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
813 * Set a custom physical guest memory alloator.
814 * Accelerators with unusual needs may need this. Hopefully, we can
815 * get rid of it eventually.
817 void phys_mem_set_alloc(void *(*alloc)(size_t))
819 phys_mem_alloc = alloc;
822 static uint16_t phys_section_add(PhysPageMap *map,
823 MemoryRegionSection *section)
825 /* The physical section number is ORed with a page-aligned
826 * pointer to produce the iotlb entries. Thus it should
827 * never overflow into the page-aligned value.
829 assert(map->sections_nb < TARGET_PAGE_SIZE);
831 if (map->sections_nb == map->sections_nb_alloc) {
832 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
833 map->sections = g_renew(MemoryRegionSection, map->sections,
834 map->sections_nb_alloc);
836 map->sections[map->sections_nb] = *section;
837 memory_region_ref(section->mr);
838 return map->sections_nb++;
841 static void phys_section_destroy(MemoryRegion *mr)
843 memory_region_unref(mr);
846 subpage_t *subpage = container_of(mr, subpage_t, iomem);
847 memory_region_destroy(&subpage->iomem);
852 static void phys_sections_free(PhysPageMap *map)
854 while (map->sections_nb > 0) {
855 MemoryRegionSection *section = &map->sections[--map->sections_nb];
856 phys_section_destroy(section->mr);
858 g_free(map->sections);
862 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
865 hwaddr base = section->offset_within_address_space
867 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
868 d->map.nodes, d->map.sections);
869 MemoryRegionSection subsection = {
870 .offset_within_address_space = base,
871 .size = int128_make64(TARGET_PAGE_SIZE),
875 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
877 if (!(existing->mr->subpage)) {
878 subpage = subpage_init(d->as, base);
879 subsection.mr = &subpage->iomem;
880 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
881 phys_section_add(&d->map, &subsection));
883 subpage = container_of(existing->mr, subpage_t, iomem);
885 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
886 end = start + int128_get64(section->size) - 1;
887 subpage_register(subpage, start, end,
888 phys_section_add(&d->map, section));
892 static void register_multipage(AddressSpaceDispatch *d,
893 MemoryRegionSection *section)
895 hwaddr start_addr = section->offset_within_address_space;
896 uint16_t section_index = phys_section_add(&d->map, section);
897 uint64_t num_pages = int128_get64(int128_rshift(section->size,
901 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
904 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
906 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
907 AddressSpaceDispatch *d = as->next_dispatch;
908 MemoryRegionSection now = *section, remain = *section;
909 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
911 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
912 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
913 - now.offset_within_address_space;
915 now.size = int128_min(int128_make64(left), now.size);
916 register_subpage(d, &now);
918 now.size = int128_zero();
920 while (int128_ne(remain.size, now.size)) {
921 remain.size = int128_sub(remain.size, now.size);
922 remain.offset_within_address_space += int128_get64(now.size);
923 remain.offset_within_region += int128_get64(now.size);
925 if (int128_lt(remain.size, page_size)) {
926 register_subpage(d, &now);
927 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
928 now.size = page_size;
929 register_subpage(d, &now);
931 now.size = int128_and(now.size, int128_neg(page_size));
932 register_multipage(d, &now);
937 void qemu_flush_coalesced_mmio_buffer(void)
940 kvm_flush_coalesced_mmio_buffer();
943 void qemu_mutex_lock_ramlist(void)
945 qemu_mutex_lock(&ram_list.mutex);
948 void qemu_mutex_unlock_ramlist(void)
950 qemu_mutex_unlock(&ram_list.mutex);
957 #define HUGETLBFS_MAGIC 0x958458f6
959 static long gethugepagesize(const char *path)
965 ret = statfs(path, &fs);
966 } while (ret != 0 && errno == EINTR);
973 if (fs.f_type != HUGETLBFS_MAGIC)
974 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
979 static sigjmp_buf sigjump;
981 static void sigbus_handler(int signal)
983 siglongjmp(sigjump, 1);
986 static void *file_ram_alloc(RAMBlock *block,
991 char *sanitized_name;
995 unsigned long hpagesize;
997 hpagesize = gethugepagesize(path);
1002 if (memory < hpagesize) {
1006 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1007 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1011 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1012 sanitized_name = g_strdup(block->mr->name);
1013 for (c = sanitized_name; *c != '\0'; c++) {
1018 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1020 g_free(sanitized_name);
1022 fd = mkstemp(filename);
1024 perror("unable to create backing store for hugepages");
1031 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1034 * ftruncate is not supported by hugetlbfs in older
1035 * hosts, so don't bother bailing out on errors.
1036 * If anything goes wrong with it under other filesystems,
1039 if (ftruncate(fd, memory))
1040 perror("ftruncate");
1042 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1043 if (area == MAP_FAILED) {
1044 perror("file_ram_alloc: can't mmap RAM pages");
1051 struct sigaction act, oldact;
1052 sigset_t set, oldset;
1054 memset(&act, 0, sizeof(act));
1055 act.sa_handler = &sigbus_handler;
1058 ret = sigaction(SIGBUS, &act, &oldact);
1060 perror("file_ram_alloc: failed to install signal handler");
1064 /* unblock SIGBUS */
1066 sigaddset(&set, SIGBUS);
1067 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1069 if (sigsetjmp(sigjump, 1)) {
1070 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1074 /* MAP_POPULATE silently ignores failures */
1075 for (i = 0; i < (memory/hpagesize); i++) {
1076 memset(area + (hpagesize*i), 0, 1);
1079 ret = sigaction(SIGBUS, &oldact, NULL);
1081 perror("file_ram_alloc: failed to reinstall signal handler");
1085 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1092 static void *file_ram_alloc(RAMBlock *block,
1096 fprintf(stderr, "-mem-path not supported on this host\n");
1101 static ram_addr_t find_ram_offset(ram_addr_t size)
1103 RAMBlock *block, *next_block;
1104 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1106 assert(size != 0); /* it would hand out same offset multiple times */
1108 if (QTAILQ_EMPTY(&ram_list.blocks))
1111 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1112 ram_addr_t end, next = RAM_ADDR_MAX;
1114 end = block->offset + block->length;
1116 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1117 if (next_block->offset >= end) {
1118 next = MIN(next, next_block->offset);
1121 if (next - end >= size && next - end < mingap) {
1123 mingap = next - end;
1127 if (offset == RAM_ADDR_MAX) {
1128 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1136 ram_addr_t last_ram_offset(void)
1139 ram_addr_t last = 0;
1141 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1142 last = MAX(last, block->offset + block->length);
1147 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1151 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1152 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1153 "dump-guest-core", true)) {
1154 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1156 perror("qemu_madvise");
1157 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1158 "but dump_guest_core=off specified\n");
1163 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1165 RAMBlock *new_block, *block;
1168 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1169 if (block->offset == addr) {
1175 assert(!new_block->idstr[0]);
1178 char *id = qdev_get_dev_path(dev);
1180 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1184 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1186 /* This assumes the iothread lock is taken here too. */
1187 qemu_mutex_lock_ramlist();
1188 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1189 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1190 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1195 qemu_mutex_unlock_ramlist();
1198 static int memory_try_enable_merging(void *addr, size_t len)
1200 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1201 /* disabled by the user */
1205 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1208 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1211 RAMBlock *block, *new_block;
1212 ram_addr_t old_ram_size, new_ram_size;
1214 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1216 size = TARGET_PAGE_ALIGN(size);
1217 new_block = g_malloc0(sizeof(*new_block));
1220 /* This assumes the iothread lock is taken here too. */
1221 qemu_mutex_lock_ramlist();
1223 new_block->offset = find_ram_offset(size);
1225 new_block->host = host;
1226 new_block->flags |= RAM_PREALLOC_MASK;
1227 } else if (xen_enabled()) {
1229 fprintf(stderr, "-mem-path not supported with Xen\n");
1232 xen_ram_alloc(new_block->offset, size, mr);
1235 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1237 * file_ram_alloc() needs to allocate just like
1238 * phys_mem_alloc, but we haven't bothered to provide
1242 "-mem-path not supported with this accelerator\n");
1245 new_block->host = file_ram_alloc(new_block, size, mem_path);
1247 if (!new_block->host) {
1248 new_block->host = phys_mem_alloc(size);
1249 if (!new_block->host) {
1250 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1251 new_block->mr->name, strerror(errno));
1254 memory_try_enable_merging(new_block->host, size);
1257 new_block->length = size;
1259 /* Keep the list sorted from biggest to smallest block. */
1260 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1261 if (block->length < new_block->length) {
1266 QTAILQ_INSERT_BEFORE(block, new_block, next);
1268 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1270 ram_list.mru_block = NULL;
1273 qemu_mutex_unlock_ramlist();
1275 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1277 if (new_ram_size > old_ram_size) {
1279 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1280 ram_list.dirty_memory[i] =
1281 bitmap_zero_extend(ram_list.dirty_memory[i],
1282 old_ram_size, new_ram_size);
1285 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1287 qemu_ram_setup_dump(new_block->host, size);
1288 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1289 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1292 kvm_setup_guest_memory(new_block->host, size);
1294 return new_block->offset;
1297 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1299 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1302 void qemu_ram_free_from_ptr(ram_addr_t addr)
1306 /* This assumes the iothread lock is taken here too. */
1307 qemu_mutex_lock_ramlist();
1308 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1309 if (addr == block->offset) {
1310 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1311 ram_list.mru_block = NULL;
1317 qemu_mutex_unlock_ramlist();
1320 void qemu_ram_free(ram_addr_t addr)
1324 /* This assumes the iothread lock is taken here too. */
1325 qemu_mutex_lock_ramlist();
1326 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1327 if (addr == block->offset) {
1328 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1329 ram_list.mru_block = NULL;
1331 if (block->flags & RAM_PREALLOC_MASK) {
1333 } else if (xen_enabled()) {
1334 xen_invalidate_map_cache_entry(block->host);
1336 } else if (block->fd >= 0) {
1337 munmap(block->host, block->length);
1341 qemu_anon_ram_free(block->host, block->length);
1347 qemu_mutex_unlock_ramlist();
1352 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1359 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1360 offset = addr - block->offset;
1361 if (offset < block->length) {
1362 vaddr = block->host + offset;
1363 if (block->flags & RAM_PREALLOC_MASK) {
1365 } else if (xen_enabled()) {
1369 munmap(vaddr, length);
1370 if (block->fd >= 0) {
1372 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1375 flags |= MAP_PRIVATE;
1377 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1378 flags, block->fd, offset);
1381 * Remap needs to match alloc. Accelerators that
1382 * set phys_mem_alloc never remap. If they did,
1383 * we'd need a remap hook here.
1385 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1387 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1388 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1391 if (area != vaddr) {
1392 fprintf(stderr, "Could not remap addr: "
1393 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1397 memory_try_enable_merging(vaddr, length);
1398 qemu_ram_setup_dump(vaddr, length);
1404 #endif /* !_WIN32 */
1406 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1407 With the exception of the softmmu code in this file, this should
1408 only be used for local memory (e.g. video ram) that the device owns,
1409 and knows it isn't going to access beyond the end of the block.
1411 It should not be used for general purpose DMA.
1412 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1414 void *qemu_get_ram_ptr(ram_addr_t addr)
1416 RAMBlock *block = qemu_get_ram_block(addr);
1418 if (xen_enabled()) {
1419 /* We need to check if the requested address is in the RAM
1420 * because we don't want to map the entire memory in QEMU.
1421 * In that case just map until the end of the page.
1423 if (block->offset == 0) {
1424 return xen_map_cache(addr, 0, 0);
1425 } else if (block->host == NULL) {
1427 xen_map_cache(block->offset, block->length, 1);
1430 return block->host + (addr - block->offset);
1433 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1434 * but takes a size argument */
1435 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1440 if (xen_enabled()) {
1441 return xen_map_cache(addr, *size, 1);
1445 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1446 if (addr - block->offset < block->length) {
1447 if (addr - block->offset + *size > block->length)
1448 *size = block->length - addr + block->offset;
1449 return block->host + (addr - block->offset);
1453 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1458 /* Some of the softmmu routines need to translate from a host pointer
1459 (typically a TLB entry) back to a ram offset. */
1460 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1463 uint8_t *host = ptr;
1465 if (xen_enabled()) {
1466 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1467 return qemu_get_ram_block(*ram_addr)->mr;
1470 block = ram_list.mru_block;
1471 if (block && block->host && host - block->host < block->length) {
1475 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1476 /* This case append when the block is not mapped. */
1477 if (block->host == NULL) {
1480 if (host - block->host < block->length) {
1488 *ram_addr = block->offset + (host - block->host);
1492 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1493 uint64_t val, unsigned size)
1495 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1496 tb_invalidate_phys_page_fast(ram_addr, size);
1500 stb_p(qemu_get_ram_ptr(ram_addr), val);
1503 stw_p(qemu_get_ram_ptr(ram_addr), val);
1506 stl_p(qemu_get_ram_ptr(ram_addr), val);
1511 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1512 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1513 /* we remove the notdirty callback only if the code has been
1515 if (!cpu_physical_memory_is_clean(ram_addr)) {
1516 CPUArchState *env = current_cpu->env_ptr;
1517 tlb_set_dirty(env, env->mem_io_vaddr);
1521 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1522 unsigned size, bool is_write)
1527 static const MemoryRegionOps notdirty_mem_ops = {
1528 .write = notdirty_mem_write,
1529 .valid.accepts = notdirty_mem_accepts,
1530 .endianness = DEVICE_NATIVE_ENDIAN,
1533 /* Generate a debug exception if a watchpoint has been hit. */
1534 static void check_watchpoint(int offset, int len_mask, int flags)
1536 CPUArchState *env = current_cpu->env_ptr;
1537 target_ulong pc, cs_base;
1542 if (env->watchpoint_hit) {
1543 /* We re-entered the check after replacing the TB. Now raise
1544 * the debug interrupt so that is will trigger after the
1545 * current instruction. */
1546 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1549 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1550 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1551 if ((vaddr == (wp->vaddr & len_mask) ||
1552 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1553 wp->flags |= BP_WATCHPOINT_HIT;
1554 if (!env->watchpoint_hit) {
1555 env->watchpoint_hit = wp;
1556 tb_check_watchpoint(env);
1557 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1558 env->exception_index = EXCP_DEBUG;
1561 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1562 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1563 cpu_resume_from_signal(env, NULL);
1567 wp->flags &= ~BP_WATCHPOINT_HIT;
1572 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1573 so these check for a hit then pass through to the normal out-of-line
1575 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1578 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1580 case 1: return ldub_phys(addr);
1581 case 2: return lduw_phys(addr);
1582 case 4: return ldl_phys(addr);
1587 static void watch_mem_write(void *opaque, hwaddr addr,
1588 uint64_t val, unsigned size)
1590 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1593 stb_phys(addr, val);
1596 stw_phys(addr, val);
1599 stl_phys(addr, val);
1605 static const MemoryRegionOps watch_mem_ops = {
1606 .read = watch_mem_read,
1607 .write = watch_mem_write,
1608 .endianness = DEVICE_NATIVE_ENDIAN,
1611 static uint64_t subpage_read(void *opaque, hwaddr addr,
1614 subpage_t *subpage = opaque;
1617 #if defined(DEBUG_SUBPAGE)
1618 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1619 subpage, len, addr);
1621 address_space_read(subpage->as, addr + subpage->base, buf, len);
1634 static void subpage_write(void *opaque, hwaddr addr,
1635 uint64_t value, unsigned len)
1637 subpage_t *subpage = opaque;
1640 #if defined(DEBUG_SUBPAGE)
1641 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1642 " value %"PRIx64"\n",
1643 __func__, subpage, len, addr, value);
1658 address_space_write(subpage->as, addr + subpage->base, buf, len);
1661 static bool subpage_accepts(void *opaque, hwaddr addr,
1662 unsigned len, bool is_write)
1664 subpage_t *subpage = opaque;
1665 #if defined(DEBUG_SUBPAGE)
1666 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1667 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1670 return address_space_access_valid(subpage->as, addr + subpage->base,
1674 static const MemoryRegionOps subpage_ops = {
1675 .read = subpage_read,
1676 .write = subpage_write,
1677 .valid.accepts = subpage_accepts,
1678 .endianness = DEVICE_NATIVE_ENDIAN,
1681 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1686 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1688 idx = SUBPAGE_IDX(start);
1689 eidx = SUBPAGE_IDX(end);
1690 #if defined(DEBUG_SUBPAGE)
1691 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1692 __func__, mmio, start, end, idx, eidx, section);
1694 for (; idx <= eidx; idx++) {
1695 mmio->sub_section[idx] = section;
1701 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1705 mmio = g_malloc0(sizeof(subpage_t));
1709 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1710 "subpage", TARGET_PAGE_SIZE);
1711 mmio->iomem.subpage = true;
1712 #if defined(DEBUG_SUBPAGE)
1713 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1714 mmio, base, TARGET_PAGE_SIZE);
1716 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1721 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1723 MemoryRegionSection section = {
1725 .offset_within_address_space = 0,
1726 .offset_within_region = 0,
1727 .size = int128_2_64(),
1730 return phys_section_add(map, §ion);
1733 MemoryRegion *iotlb_to_region(hwaddr index)
1735 return address_space_memory.dispatch->map.sections[
1736 index & ~TARGET_PAGE_MASK].mr;
1739 static void io_mem_init(void)
1741 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1742 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1743 "unassigned", UINT64_MAX);
1744 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1745 "notdirty", UINT64_MAX);
1746 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1747 "watch", UINT64_MAX);
1750 static void mem_begin(MemoryListener *listener)
1752 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1753 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1756 n = dummy_section(&d->map, &io_mem_unassigned);
1757 assert(n == PHYS_SECTION_UNASSIGNED);
1758 n = dummy_section(&d->map, &io_mem_notdirty);
1759 assert(n == PHYS_SECTION_NOTDIRTY);
1760 n = dummy_section(&d->map, &io_mem_rom);
1761 assert(n == PHYS_SECTION_ROM);
1762 n = dummy_section(&d->map, &io_mem_watch);
1763 assert(n == PHYS_SECTION_WATCH);
1765 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1767 as->next_dispatch = d;
1770 static void mem_commit(MemoryListener *listener)
1772 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1773 AddressSpaceDispatch *cur = as->dispatch;
1774 AddressSpaceDispatch *next = as->next_dispatch;
1776 phys_page_compact_all(next, next->map.nodes_nb);
1778 as->dispatch = next;
1781 phys_sections_free(&cur->map);
1786 static void tcg_commit(MemoryListener *listener)
1790 /* since each CPU stores ram addresses in its TLB cache, we must
1791 reset the modified entries */
1794 CPUArchState *env = cpu->env_ptr;
1800 static void core_log_global_start(MemoryListener *listener)
1802 cpu_physical_memory_set_dirty_tracking(true);
1805 static void core_log_global_stop(MemoryListener *listener)
1807 cpu_physical_memory_set_dirty_tracking(false);
1810 static MemoryListener core_memory_listener = {
1811 .log_global_start = core_log_global_start,
1812 .log_global_stop = core_log_global_stop,
1816 static MemoryListener tcg_memory_listener = {
1817 .commit = tcg_commit,
1820 void address_space_init_dispatch(AddressSpace *as)
1822 as->dispatch = NULL;
1823 as->dispatch_listener = (MemoryListener) {
1825 .commit = mem_commit,
1826 .region_add = mem_add,
1827 .region_nop = mem_add,
1830 memory_listener_register(&as->dispatch_listener, as);
1833 void address_space_destroy_dispatch(AddressSpace *as)
1835 AddressSpaceDispatch *d = as->dispatch;
1837 memory_listener_unregister(&as->dispatch_listener);
1839 as->dispatch = NULL;
1842 static void memory_map_init(void)
1844 system_memory = g_malloc(sizeof(*system_memory));
1846 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1847 address_space_init(&address_space_memory, system_memory, "memory");
1849 system_io = g_malloc(sizeof(*system_io));
1850 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1852 address_space_init(&address_space_io, system_io, "I/O");
1854 memory_listener_register(&core_memory_listener, &address_space_memory);
1855 if (tcg_enabled()) {
1856 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1860 MemoryRegion *get_system_memory(void)
1862 return system_memory;
1865 MemoryRegion *get_system_io(void)
1870 #endif /* !defined(CONFIG_USER_ONLY) */
1872 /* physical memory access (slow version, mainly for debug) */
1873 #if defined(CONFIG_USER_ONLY)
1874 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1875 uint8_t *buf, int len, int is_write)
1882 page = addr & TARGET_PAGE_MASK;
1883 l = (page + TARGET_PAGE_SIZE) - addr;
1886 flags = page_get_flags(page);
1887 if (!(flags & PAGE_VALID))
1890 if (!(flags & PAGE_WRITE))
1892 /* XXX: this code should not depend on lock_user */
1893 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1896 unlock_user(p, addr, l);
1898 if (!(flags & PAGE_READ))
1900 /* XXX: this code should not depend on lock_user */
1901 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1904 unlock_user(p, addr, 0);
1915 static void invalidate_and_set_dirty(hwaddr addr,
1918 if (cpu_physical_memory_is_clean(addr)) {
1919 /* invalidate code */
1920 tb_invalidate_phys_page_range(addr, addr + length, 0);
1922 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1923 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1925 xen_modified_memory(addr, length);
1928 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1930 if (memory_region_is_ram(mr)) {
1931 return !(is_write && mr->readonly);
1933 if (memory_region_is_romd(mr)) {
1940 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1942 unsigned access_size_max = mr->ops->valid.max_access_size;
1944 /* Regions are assumed to support 1-4 byte accesses unless
1945 otherwise specified. */
1946 if (access_size_max == 0) {
1947 access_size_max = 4;
1950 /* Bound the maximum access by the alignment of the address. */
1951 if (!mr->ops->impl.unaligned) {
1952 unsigned align_size_max = addr & -addr;
1953 if (align_size_max != 0 && align_size_max < access_size_max) {
1954 access_size_max = align_size_max;
1958 /* Don't attempt accesses larger than the maximum. */
1959 if (l > access_size_max) {
1960 l = access_size_max;
1963 l = 1 << (qemu_fls(l) - 1);
1969 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1970 int len, bool is_write)
1981 mr = address_space_translate(as, addr, &addr1, &l, is_write);
1984 if (!memory_access_is_direct(mr, is_write)) {
1985 l = memory_access_size(mr, l, addr1);
1986 /* XXX: could force current_cpu to NULL to avoid
1990 /* 64 bit write access */
1992 error |= io_mem_write(mr, addr1, val, 8);
1995 /* 32 bit write access */
1997 error |= io_mem_write(mr, addr1, val, 4);
2000 /* 16 bit write access */
2002 error |= io_mem_write(mr, addr1, val, 2);
2005 /* 8 bit write access */
2007 error |= io_mem_write(mr, addr1, val, 1);
2013 addr1 += memory_region_get_ram_addr(mr);
2015 ptr = qemu_get_ram_ptr(addr1);
2016 memcpy(ptr, buf, l);
2017 invalidate_and_set_dirty(addr1, l);
2020 if (!memory_access_is_direct(mr, is_write)) {
2022 l = memory_access_size(mr, l, addr1);
2025 /* 64 bit read access */
2026 error |= io_mem_read(mr, addr1, &val, 8);
2030 /* 32 bit read access */
2031 error |= io_mem_read(mr, addr1, &val, 4);
2035 /* 16 bit read access */
2036 error |= io_mem_read(mr, addr1, &val, 2);
2040 /* 8 bit read access */
2041 error |= io_mem_read(mr, addr1, &val, 1);
2049 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2050 memcpy(buf, ptr, l);
2061 bool address_space_write(AddressSpace *as, hwaddr addr,
2062 const uint8_t *buf, int len)
2064 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2067 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2069 return address_space_rw(as, addr, buf, len, false);
2073 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2074 int len, int is_write)
2076 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2079 enum write_rom_type {
2084 static inline void cpu_physical_memory_write_rom_internal(
2085 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2094 mr = address_space_translate(&address_space_memory,
2095 addr, &addr1, &l, true);
2097 if (!(memory_region_is_ram(mr) ||
2098 memory_region_is_romd(mr))) {
2101 addr1 += memory_region_get_ram_addr(mr);
2103 ptr = qemu_get_ram_ptr(addr1);
2106 memcpy(ptr, buf, l);
2107 invalidate_and_set_dirty(addr1, l);
2110 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2120 /* used for ROM loading : can write in RAM and ROM */
2121 void cpu_physical_memory_write_rom(hwaddr addr,
2122 const uint8_t *buf, int len)
2124 cpu_physical_memory_write_rom_internal(addr, buf, len, WRITE_DATA);
2127 void cpu_flush_icache_range(hwaddr start, int len)
2130 * This function should do the same thing as an icache flush that was
2131 * triggered from within the guest. For TCG we are always cache coherent,
2132 * so there is no need to flush anything. For KVM / Xen we need to flush
2133 * the host's instruction cache at least.
2135 if (tcg_enabled()) {
2139 cpu_physical_memory_write_rom_internal(start, NULL, len, FLUSH_CACHE);
2149 static BounceBuffer bounce;
2151 typedef struct MapClient {
2153 void (*callback)(void *opaque);
2154 QLIST_ENTRY(MapClient) link;
2157 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2158 = QLIST_HEAD_INITIALIZER(map_client_list);
2160 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2162 MapClient *client = g_malloc(sizeof(*client));
2164 client->opaque = opaque;
2165 client->callback = callback;
2166 QLIST_INSERT_HEAD(&map_client_list, client, link);
2170 static void cpu_unregister_map_client(void *_client)
2172 MapClient *client = (MapClient *)_client;
2174 QLIST_REMOVE(client, link);
2178 static void cpu_notify_map_clients(void)
2182 while (!QLIST_EMPTY(&map_client_list)) {
2183 client = QLIST_FIRST(&map_client_list);
2184 client->callback(client->opaque);
2185 cpu_unregister_map_client(client);
2189 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2196 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2197 if (!memory_access_is_direct(mr, is_write)) {
2198 l = memory_access_size(mr, l, addr);
2199 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2210 /* Map a physical memory region into a host virtual address.
2211 * May map a subset of the requested range, given by and returned in *plen.
2212 * May return NULL if resources needed to perform the mapping are exhausted.
2213 * Use only for reads OR writes - not for read-modify-write operations.
2214 * Use cpu_register_map_client() to know when retrying the map operation is
2215 * likely to succeed.
2217 void *address_space_map(AddressSpace *as,
2224 hwaddr l, xlat, base;
2225 MemoryRegion *mr, *this_mr;
2233 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2234 if (!memory_access_is_direct(mr, is_write)) {
2235 if (bounce.buffer) {
2238 /* Avoid unbounded allocations */
2239 l = MIN(l, TARGET_PAGE_SIZE);
2240 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2244 memory_region_ref(mr);
2247 address_space_read(as, addr, bounce.buffer, l);
2251 return bounce.buffer;
2255 raddr = memory_region_get_ram_addr(mr);
2266 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2267 if (this_mr != mr || xlat != base + done) {
2272 memory_region_ref(mr);
2274 return qemu_ram_ptr_length(raddr + base, plen);
2277 /* Unmaps a memory region previously mapped by address_space_map().
2278 * Will also mark the memory as dirty if is_write == 1. access_len gives
2279 * the amount of memory that was actually read or written by the caller.
2281 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2282 int is_write, hwaddr access_len)
2284 if (buffer != bounce.buffer) {
2288 mr = qemu_ram_addr_from_host(buffer, &addr1);
2291 while (access_len) {
2293 l = TARGET_PAGE_SIZE;
2296 invalidate_and_set_dirty(addr1, l);
2301 if (xen_enabled()) {
2302 xen_invalidate_map_cache_entry(buffer);
2304 memory_region_unref(mr);
2308 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2310 qemu_vfree(bounce.buffer);
2311 bounce.buffer = NULL;
2312 memory_region_unref(bounce.mr);
2313 cpu_notify_map_clients();
2316 void *cpu_physical_memory_map(hwaddr addr,
2320 return address_space_map(&address_space_memory, addr, plen, is_write);
2323 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2324 int is_write, hwaddr access_len)
2326 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2329 /* warning: addr must be aligned */
2330 static inline uint32_t ldl_phys_internal(hwaddr addr,
2331 enum device_endian endian)
2339 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2341 if (l < 4 || !memory_access_is_direct(mr, false)) {
2343 io_mem_read(mr, addr1, &val, 4);
2344 #if defined(TARGET_WORDS_BIGENDIAN)
2345 if (endian == DEVICE_LITTLE_ENDIAN) {
2349 if (endian == DEVICE_BIG_ENDIAN) {
2355 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2359 case DEVICE_LITTLE_ENDIAN:
2360 val = ldl_le_p(ptr);
2362 case DEVICE_BIG_ENDIAN:
2363 val = ldl_be_p(ptr);
2373 uint32_t ldl_phys(hwaddr addr)
2375 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2378 uint32_t ldl_le_phys(hwaddr addr)
2380 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2383 uint32_t ldl_be_phys(hwaddr addr)
2385 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2388 /* warning: addr must be aligned */
2389 static inline uint64_t ldq_phys_internal(hwaddr addr,
2390 enum device_endian endian)
2398 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2400 if (l < 8 || !memory_access_is_direct(mr, false)) {
2402 io_mem_read(mr, addr1, &val, 8);
2403 #if defined(TARGET_WORDS_BIGENDIAN)
2404 if (endian == DEVICE_LITTLE_ENDIAN) {
2408 if (endian == DEVICE_BIG_ENDIAN) {
2414 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2418 case DEVICE_LITTLE_ENDIAN:
2419 val = ldq_le_p(ptr);
2421 case DEVICE_BIG_ENDIAN:
2422 val = ldq_be_p(ptr);
2432 uint64_t ldq_phys(hwaddr addr)
2434 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2437 uint64_t ldq_le_phys(hwaddr addr)
2439 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2442 uint64_t ldq_be_phys(hwaddr addr)
2444 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2448 uint32_t ldub_phys(hwaddr addr)
2451 cpu_physical_memory_read(addr, &val, 1);
2455 /* warning: addr must be aligned */
2456 static inline uint32_t lduw_phys_internal(hwaddr addr,
2457 enum device_endian endian)
2465 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2467 if (l < 2 || !memory_access_is_direct(mr, false)) {
2469 io_mem_read(mr, addr1, &val, 2);
2470 #if defined(TARGET_WORDS_BIGENDIAN)
2471 if (endian == DEVICE_LITTLE_ENDIAN) {
2475 if (endian == DEVICE_BIG_ENDIAN) {
2481 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2485 case DEVICE_LITTLE_ENDIAN:
2486 val = lduw_le_p(ptr);
2488 case DEVICE_BIG_ENDIAN:
2489 val = lduw_be_p(ptr);
2499 uint32_t lduw_phys(hwaddr addr)
2501 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2504 uint32_t lduw_le_phys(hwaddr addr)
2506 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2509 uint32_t lduw_be_phys(hwaddr addr)
2511 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2514 /* warning: addr must be aligned. The ram page is not masked as dirty
2515 and the code inside is not invalidated. It is useful if the dirty
2516 bits are used to track modified PTEs */
2517 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2524 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2526 if (l < 4 || !memory_access_is_direct(mr, true)) {
2527 io_mem_write(mr, addr1, val, 4);
2529 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2530 ptr = qemu_get_ram_ptr(addr1);
2533 if (unlikely(in_migration)) {
2534 if (cpu_physical_memory_is_clean(addr1)) {
2535 /* invalidate code */
2536 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2538 cpu_physical_memory_set_dirty_flag(addr1,
2539 DIRTY_MEMORY_MIGRATION);
2540 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2546 /* warning: addr must be aligned */
2547 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2548 enum device_endian endian)
2555 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2557 if (l < 4 || !memory_access_is_direct(mr, true)) {
2558 #if defined(TARGET_WORDS_BIGENDIAN)
2559 if (endian == DEVICE_LITTLE_ENDIAN) {
2563 if (endian == DEVICE_BIG_ENDIAN) {
2567 io_mem_write(mr, addr1, val, 4);
2570 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2571 ptr = qemu_get_ram_ptr(addr1);
2573 case DEVICE_LITTLE_ENDIAN:
2576 case DEVICE_BIG_ENDIAN:
2583 invalidate_and_set_dirty(addr1, 4);
2587 void stl_phys(hwaddr addr, uint32_t val)
2589 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2592 void stl_le_phys(hwaddr addr, uint32_t val)
2594 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2597 void stl_be_phys(hwaddr addr, uint32_t val)
2599 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2603 void stb_phys(hwaddr addr, uint32_t val)
2606 cpu_physical_memory_write(addr, &v, 1);
2609 /* warning: addr must be aligned */
2610 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2611 enum device_endian endian)
2618 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2620 if (l < 2 || !memory_access_is_direct(mr, true)) {
2621 #if defined(TARGET_WORDS_BIGENDIAN)
2622 if (endian == DEVICE_LITTLE_ENDIAN) {
2626 if (endian == DEVICE_BIG_ENDIAN) {
2630 io_mem_write(mr, addr1, val, 2);
2633 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2634 ptr = qemu_get_ram_ptr(addr1);
2636 case DEVICE_LITTLE_ENDIAN:
2639 case DEVICE_BIG_ENDIAN:
2646 invalidate_and_set_dirty(addr1, 2);
2650 void stw_phys(hwaddr addr, uint32_t val)
2652 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2655 void stw_le_phys(hwaddr addr, uint32_t val)
2657 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2660 void stw_be_phys(hwaddr addr, uint32_t val)
2662 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2666 void stq_phys(hwaddr addr, uint64_t val)
2669 cpu_physical_memory_write(addr, &val, 8);
2672 void stq_le_phys(hwaddr addr, uint64_t val)
2674 val = cpu_to_le64(val);
2675 cpu_physical_memory_write(addr, &val, 8);
2678 void stq_be_phys(hwaddr addr, uint64_t val)
2680 val = cpu_to_be64(val);
2681 cpu_physical_memory_write(addr, &val, 8);
2684 /* virtual memory access for debug (includes writing to ROM) */
2685 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2686 uint8_t *buf, int len, int is_write)
2693 page = addr & TARGET_PAGE_MASK;
2694 phys_addr = cpu_get_phys_page_debug(cpu, page);
2695 /* if no physical page mapped, return an error */
2696 if (phys_addr == -1)
2698 l = (page + TARGET_PAGE_SIZE) - addr;
2701 phys_addr += (addr & ~TARGET_PAGE_MASK);
2703 cpu_physical_memory_write_rom(phys_addr, buf, l);
2705 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2714 #if !defined(CONFIG_USER_ONLY)
2717 * A helper function for the _utterly broken_ virtio device model to find out if
2718 * it's running on a big endian machine. Don't do this at home kids!
2720 bool virtio_is_big_endian(void);
2721 bool virtio_is_big_endian(void)
2723 #if defined(TARGET_WORDS_BIGENDIAN)
2732 #ifndef CONFIG_USER_ONLY
2733 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2738 mr = address_space_translate(&address_space_memory,
2739 phys_addr, &phys_addr, &l, false);
2741 return !(memory_region_is_ram(mr) ||
2742 memory_region_is_romd(mr));
2745 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2749 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2750 func(block->host, block->offset, block->length, opaque);