4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "hw/xen/xen.h"
36 #include "qemu/timer.h"
37 #include "qemu/config-file.h"
38 #include "exec/memory.h"
39 #include "sysemu/dma.h"
40 #include "exec/address-spaces.h"
41 #if defined(CONFIG_USER_ONLY)
43 #else /* !CONFIG_USER_ONLY */
44 #include "sysemu/xen-mapcache.h"
47 #include "exec/cpu-all.h"
49 #include "exec/cputlb.h"
50 #include "translate-all.h"
52 #include "exec/memory-internal.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static int in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
74 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
75 /* current CPU in the current thread. It is only valid inside
77 DEFINE_TLS(CPUState *, current_cpu);
78 /* 0 = Do not count executed instructions.
79 1 = Precise instruction counting.
80 2 = Adaptive rate instruction counting. */
83 #if !defined(CONFIG_USER_ONLY)
85 typedef struct PhysPageEntry PhysPageEntry;
87 struct PhysPageEntry {
88 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
94 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96 /* Size of the L2 (and L3, etc) page tables. */
97 #define ADDR_SPACE_BITS 64
100 #define P_L2_SIZE (1 << P_L2_BITS)
102 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104 typedef PhysPageEntry Node[P_L2_SIZE];
106 struct AddressSpaceDispatch {
107 /* This is a multi-level map on the physical address space.
108 * The bottom level has pointers to MemoryRegionSections.
110 PhysPageEntry phys_map;
112 MemoryRegionSection *sections;
116 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
117 typedef struct subpage_t {
121 uint16_t sub_section[TARGET_PAGE_SIZE];
124 #define PHYS_SECTION_UNASSIGNED 0
125 #define PHYS_SECTION_NOTDIRTY 1
126 #define PHYS_SECTION_ROM 2
127 #define PHYS_SECTION_WATCH 3
129 typedef struct PhysPageMap {
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
133 unsigned nodes_nb_alloc;
135 MemoryRegionSection *sections;
138 static PhysPageMap *prev_map;
139 static PhysPageMap next_map;
141 static void io_mem_init(void);
142 static void memory_map_init(void);
144 static MemoryRegion io_mem_watch;
147 #if !defined(CONFIG_USER_ONLY)
149 static void phys_map_node_reserve(unsigned nodes)
151 if (next_map.nodes_nb + nodes > next_map.nodes_nb_alloc) {
152 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc * 2,
154 next_map.nodes_nb_alloc = MAX(next_map.nodes_nb_alloc,
155 next_map.nodes_nb + nodes);
156 next_map.nodes = g_renew(Node, next_map.nodes,
157 next_map.nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(void)
166 ret = next_map.nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != next_map.nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 next_map.nodes[ret][i].skip = 1;
171 next_map.nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
176 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
177 hwaddr *nb, uint16_t leaf,
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc();
186 p = next_map.nodes[lp->ptr];
188 for (i = 0; i < P_L2_SIZE; i++) {
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
194 p = next_map.nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 phys_page_set_level(lp, index, nb, leaf, level - 1);
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(3 * P_L2_LEVELS);
218 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
287 hwaddr index = addr >> TARGET_PAGE_BITS;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return §ions[PHYS_SECTION_UNASSIGNED];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return §ions[lp.ptr];
303 return §ions[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
320 section = phys_page_find(d->phys_map, addr, d->nodes, d->sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
347 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
348 hwaddr *xlat, hwaddr *plen,
352 MemoryRegionSection *section;
357 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
360 if (!mr->iommu_ops) {
364 iotlb = mr->iommu_ops->translate(mr, addr);
365 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
366 | (addr & iotlb.addr_mask));
367 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
368 if (!(iotlb.perm & (1 << is_write))) {
369 mr = &io_mem_unassigned;
373 as = iotlb.target_as;
381 MemoryRegionSection *
382 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
385 MemoryRegionSection *section;
386 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
388 assert(!section->mr->iommu_ops);
393 void cpu_exec_init_all(void)
395 #if !defined(CONFIG_USER_ONLY)
396 qemu_mutex_init(&ram_list.mutex);
402 #if !defined(CONFIG_USER_ONLY)
404 static int cpu_common_post_load(void *opaque, int version_id)
406 CPUState *cpu = opaque;
408 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
409 version_id is increased. */
410 cpu->interrupt_request &= ~0x01;
411 tlb_flush(cpu->env_ptr, 1);
416 const VMStateDescription vmstate_cpu_common = {
417 .name = "cpu_common",
419 .minimum_version_id = 1,
420 .minimum_version_id_old = 1,
421 .post_load = cpu_common_post_load,
422 .fields = (VMStateField []) {
423 VMSTATE_UINT32(halted, CPUState),
424 VMSTATE_UINT32(interrupt_request, CPUState),
425 VMSTATE_END_OF_LIST()
431 CPUState *qemu_get_cpu(int index)
436 if (cpu->cpu_index == index) {
444 void cpu_exec_init(CPUArchState *env)
446 CPUState *cpu = ENV_GET_CPU(env);
447 CPUClass *cc = CPU_GET_CLASS(cpu);
451 #if defined(CONFIG_USER_ONLY)
455 CPU_FOREACH(some_cpu) {
458 cpu->cpu_index = cpu_index;
460 QTAILQ_INIT(&env->breakpoints);
461 QTAILQ_INIT(&env->watchpoints);
462 #ifndef CONFIG_USER_ONLY
463 cpu->thread_id = qemu_get_thread_id();
465 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
466 #if defined(CONFIG_USER_ONLY)
469 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
470 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
472 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
473 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
474 cpu_save, cpu_load, env);
475 assert(cc->vmsd == NULL);
476 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
478 if (cc->vmsd != NULL) {
479 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
483 #if defined(TARGET_HAS_ICE)
484 #if defined(CONFIG_USER_ONLY)
485 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
487 tb_invalidate_phys_page_range(pc, pc + 1, 0);
490 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
492 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
494 tb_invalidate_phys_addr(phys | (pc & ~TARGET_PAGE_MASK));
498 #endif /* TARGET_HAS_ICE */
500 #if defined(CONFIG_USER_ONLY)
501 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
506 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
507 int flags, CPUWatchpoint **watchpoint)
512 /* Add a watchpoint. */
513 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
514 int flags, CPUWatchpoint **watchpoint)
516 target_ulong len_mask = ~(len - 1);
519 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
520 if ((len & (len - 1)) || (addr & ~len_mask) ||
521 len == 0 || len > TARGET_PAGE_SIZE) {
522 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
523 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
526 wp = g_malloc(sizeof(*wp));
529 wp->len_mask = len_mask;
532 /* keep all GDB-injected watchpoints in front */
534 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
536 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
538 tlb_flush_page(env, addr);
545 /* Remove a specific watchpoint. */
546 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
549 target_ulong len_mask = ~(len - 1);
552 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
553 if (addr == wp->vaddr && len_mask == wp->len_mask
554 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
555 cpu_watchpoint_remove_by_ref(env, wp);
562 /* Remove a specific watchpoint by reference. */
563 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
565 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
567 tlb_flush_page(env, watchpoint->vaddr);
572 /* Remove all matching watchpoints. */
573 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
575 CPUWatchpoint *wp, *next;
577 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
578 if (wp->flags & mask)
579 cpu_watchpoint_remove_by_ref(env, wp);
584 /* Add a breakpoint. */
585 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
586 CPUBreakpoint **breakpoint)
588 #if defined(TARGET_HAS_ICE)
591 bp = g_malloc(sizeof(*bp));
596 /* keep all GDB-injected breakpoints in front */
597 if (flags & BP_GDB) {
598 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
600 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
603 breakpoint_invalidate(ENV_GET_CPU(env), pc);
614 /* Remove a specific breakpoint. */
615 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
617 #if defined(TARGET_HAS_ICE)
620 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
621 if (bp->pc == pc && bp->flags == flags) {
622 cpu_breakpoint_remove_by_ref(env, bp);
632 /* Remove a specific breakpoint by reference. */
633 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
635 #if defined(TARGET_HAS_ICE)
636 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
638 breakpoint_invalidate(ENV_GET_CPU(env), breakpoint->pc);
644 /* Remove all matching breakpoints. */
645 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
647 #if defined(TARGET_HAS_ICE)
648 CPUBreakpoint *bp, *next;
650 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
651 if (bp->flags & mask)
652 cpu_breakpoint_remove_by_ref(env, bp);
657 /* enable or disable single step mode. EXCP_DEBUG is returned by the
658 CPU loop after each instruction */
659 void cpu_single_step(CPUState *cpu, int enabled)
661 #if defined(TARGET_HAS_ICE)
662 if (cpu->singlestep_enabled != enabled) {
663 cpu->singlestep_enabled = enabled;
665 kvm_update_guest_debug(cpu, 0);
667 /* must flush all the translated code to avoid inconsistencies */
668 /* XXX: only flush what is necessary */
669 CPUArchState *env = cpu->env_ptr;
676 void cpu_abort(CPUArchState *env, const char *fmt, ...)
678 CPUState *cpu = ENV_GET_CPU(env);
684 fprintf(stderr, "qemu: fatal: ");
685 vfprintf(stderr, fmt, ap);
686 fprintf(stderr, "\n");
687 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
688 if (qemu_log_enabled()) {
689 qemu_log("qemu: fatal: ");
690 qemu_log_vprintf(fmt, ap2);
692 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
698 #if defined(CONFIG_USER_ONLY)
700 struct sigaction act;
701 sigfillset(&act.sa_mask);
702 act.sa_handler = SIG_DFL;
703 sigaction(SIGABRT, &act, NULL);
709 #if !defined(CONFIG_USER_ONLY)
710 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
714 /* The list is protected by the iothread lock here. */
715 block = ram_list.mru_block;
716 if (block && addr - block->offset < block->length) {
719 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
720 if (addr - block->offset < block->length) {
725 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
729 ram_list.mru_block = block;
733 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
739 block = qemu_get_ram_block(start);
740 assert(block == qemu_get_ram_block(end - 1));
741 start1 = (uintptr_t)block->host + (start - block->offset);
742 cpu_tlb_reset_dirty_all(start1, length);
745 /* Note: start and end must be within the same ram block. */
746 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
751 start &= TARGET_PAGE_MASK;
752 end = TARGET_PAGE_ALIGN(end);
754 length = end - start;
757 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
760 tlb_reset_dirty_range_all(start, end, length);
764 static int cpu_physical_memory_set_dirty_tracking(int enable)
767 in_migration = enable;
771 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
772 MemoryRegionSection *section,
774 hwaddr paddr, hwaddr xlat,
776 target_ulong *address)
781 if (memory_region_is_ram(section->mr)) {
783 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
785 if (!section->readonly) {
786 iotlb |= PHYS_SECTION_NOTDIRTY;
788 iotlb |= PHYS_SECTION_ROM;
791 iotlb = section - address_space_memory.dispatch->sections;
795 /* Make accesses to pages with watchpoints go via the
796 watchpoint trap routines. */
797 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
798 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
799 /* Avoid trapping reads of pages with a write breakpoint. */
800 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
801 iotlb = PHYS_SECTION_WATCH + paddr;
802 *address |= TLB_MMIO;
810 #endif /* defined(CONFIG_USER_ONLY) */
812 #if !defined(CONFIG_USER_ONLY)
814 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
816 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
818 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
821 * Set a custom physical guest memory alloator.
822 * Accelerators with unusual needs may need this. Hopefully, we can
823 * get rid of it eventually.
825 void phys_mem_set_alloc(void *(*alloc)(size_t))
827 phys_mem_alloc = alloc;
830 static uint16_t phys_section_add(MemoryRegionSection *section)
832 /* The physical section number is ORed with a page-aligned
833 * pointer to produce the iotlb entries. Thus it should
834 * never overflow into the page-aligned value.
836 assert(next_map.sections_nb < TARGET_PAGE_SIZE);
838 if (next_map.sections_nb == next_map.sections_nb_alloc) {
839 next_map.sections_nb_alloc = MAX(next_map.sections_nb_alloc * 2,
841 next_map.sections = g_renew(MemoryRegionSection, next_map.sections,
842 next_map.sections_nb_alloc);
844 next_map.sections[next_map.sections_nb] = *section;
845 memory_region_ref(section->mr);
846 return next_map.sections_nb++;
849 static void phys_section_destroy(MemoryRegion *mr)
851 memory_region_unref(mr);
854 subpage_t *subpage = container_of(mr, subpage_t, iomem);
855 memory_region_destroy(&subpage->iomem);
860 static void phys_sections_free(PhysPageMap *map)
862 while (map->sections_nb > 0) {
863 MemoryRegionSection *section = &map->sections[--map->sections_nb];
864 phys_section_destroy(section->mr);
866 g_free(map->sections);
871 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
874 hwaddr base = section->offset_within_address_space
876 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
877 next_map.nodes, next_map.sections);
878 MemoryRegionSection subsection = {
879 .offset_within_address_space = base,
880 .size = int128_make64(TARGET_PAGE_SIZE),
884 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
886 if (!(existing->mr->subpage)) {
887 subpage = subpage_init(d->as, base);
888 subsection.mr = &subpage->iomem;
889 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
890 phys_section_add(&subsection));
892 subpage = container_of(existing->mr, subpage_t, iomem);
894 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
895 end = start + int128_get64(section->size) - 1;
896 subpage_register(subpage, start, end, phys_section_add(section));
900 static void register_multipage(AddressSpaceDispatch *d,
901 MemoryRegionSection *section)
903 hwaddr start_addr = section->offset_within_address_space;
904 uint16_t section_index = phys_section_add(section);
905 uint64_t num_pages = int128_get64(int128_rshift(section->size,
909 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
912 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
914 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
915 AddressSpaceDispatch *d = as->next_dispatch;
916 MemoryRegionSection now = *section, remain = *section;
917 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
919 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
920 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
921 - now.offset_within_address_space;
923 now.size = int128_min(int128_make64(left), now.size);
924 register_subpage(d, &now);
926 now.size = int128_zero();
928 while (int128_ne(remain.size, now.size)) {
929 remain.size = int128_sub(remain.size, now.size);
930 remain.offset_within_address_space += int128_get64(now.size);
931 remain.offset_within_region += int128_get64(now.size);
933 if (int128_lt(remain.size, page_size)) {
934 register_subpage(d, &now);
935 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
936 now.size = page_size;
937 register_subpage(d, &now);
939 now.size = int128_and(now.size, int128_neg(page_size));
940 register_multipage(d, &now);
945 void qemu_flush_coalesced_mmio_buffer(void)
948 kvm_flush_coalesced_mmio_buffer();
951 void qemu_mutex_lock_ramlist(void)
953 qemu_mutex_lock(&ram_list.mutex);
956 void qemu_mutex_unlock_ramlist(void)
958 qemu_mutex_unlock(&ram_list.mutex);
965 #define HUGETLBFS_MAGIC 0x958458f6
967 static long gethugepagesize(const char *path)
973 ret = statfs(path, &fs);
974 } while (ret != 0 && errno == EINTR);
981 if (fs.f_type != HUGETLBFS_MAGIC)
982 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
987 static sigjmp_buf sigjump;
989 static void sigbus_handler(int signal)
991 siglongjmp(sigjump, 1);
994 static void *file_ram_alloc(RAMBlock *block,
999 char *sanitized_name;
1003 unsigned long hpagesize;
1005 hpagesize = gethugepagesize(path);
1010 if (memory < hpagesize) {
1014 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1015 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1019 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1020 sanitized_name = g_strdup(block->mr->name);
1021 for (c = sanitized_name; *c != '\0'; c++) {
1026 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1028 g_free(sanitized_name);
1030 fd = mkstemp(filename);
1032 perror("unable to create backing store for hugepages");
1039 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1042 * ftruncate is not supported by hugetlbfs in older
1043 * hosts, so don't bother bailing out on errors.
1044 * If anything goes wrong with it under other filesystems,
1047 if (ftruncate(fd, memory))
1048 perror("ftruncate");
1050 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1051 if (area == MAP_FAILED) {
1052 perror("file_ram_alloc: can't mmap RAM pages");
1059 struct sigaction act, oldact;
1060 sigset_t set, oldset;
1062 memset(&act, 0, sizeof(act));
1063 act.sa_handler = &sigbus_handler;
1066 ret = sigaction(SIGBUS, &act, &oldact);
1068 perror("file_ram_alloc: failed to install signal handler");
1072 /* unblock SIGBUS */
1074 sigaddset(&set, SIGBUS);
1075 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1077 if (sigsetjmp(sigjump, 1)) {
1078 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1082 /* MAP_POPULATE silently ignores failures */
1083 for (i = 0; i < (memory/hpagesize)-1; i++) {
1084 memset(area + (hpagesize*i), 0, 1);
1087 ret = sigaction(SIGBUS, &oldact, NULL);
1089 perror("file_ram_alloc: failed to reinstall signal handler");
1093 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1100 static void *file_ram_alloc(RAMBlock *block,
1104 fprintf(stderr, "-mem-path not supported on this host\n");
1109 static ram_addr_t find_ram_offset(ram_addr_t size)
1111 RAMBlock *block, *next_block;
1112 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1114 assert(size != 0); /* it would hand out same offset multiple times */
1116 if (QTAILQ_EMPTY(&ram_list.blocks))
1119 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1120 ram_addr_t end, next = RAM_ADDR_MAX;
1122 end = block->offset + block->length;
1124 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1125 if (next_block->offset >= end) {
1126 next = MIN(next, next_block->offset);
1129 if (next - end >= size && next - end < mingap) {
1131 mingap = next - end;
1135 if (offset == RAM_ADDR_MAX) {
1136 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1144 ram_addr_t last_ram_offset(void)
1147 ram_addr_t last = 0;
1149 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1150 last = MAX(last, block->offset + block->length);
1155 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1159 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1160 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1161 "dump-guest-core", true)) {
1162 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1164 perror("qemu_madvise");
1165 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1166 "but dump_guest_core=off specified\n");
1171 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1173 RAMBlock *new_block, *block;
1176 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1177 if (block->offset == addr) {
1183 assert(!new_block->idstr[0]);
1186 char *id = qdev_get_dev_path(dev);
1188 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1192 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1194 /* This assumes the iothread lock is taken here too. */
1195 qemu_mutex_lock_ramlist();
1196 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1197 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1198 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1203 qemu_mutex_unlock_ramlist();
1206 static int memory_try_enable_merging(void *addr, size_t len)
1208 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1209 /* disabled by the user */
1213 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1216 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1219 RAMBlock *block, *new_block;
1221 size = TARGET_PAGE_ALIGN(size);
1222 new_block = g_malloc0(sizeof(*new_block));
1225 /* This assumes the iothread lock is taken here too. */
1226 qemu_mutex_lock_ramlist();
1228 new_block->offset = find_ram_offset(size);
1230 new_block->host = host;
1231 new_block->flags |= RAM_PREALLOC_MASK;
1232 } else if (xen_enabled()) {
1234 fprintf(stderr, "-mem-path not supported with Xen\n");
1237 xen_ram_alloc(new_block->offset, size, mr);
1240 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1242 * file_ram_alloc() needs to allocate just like
1243 * phys_mem_alloc, but we haven't bothered to provide
1247 "-mem-path not supported with this accelerator\n");
1250 new_block->host = file_ram_alloc(new_block, size, mem_path);
1252 if (!new_block->host) {
1253 new_block->host = phys_mem_alloc(size);
1254 if (!new_block->host) {
1255 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1256 new_block->mr->name, strerror(errno));
1259 memory_try_enable_merging(new_block->host, size);
1262 new_block->length = size;
1264 /* Keep the list sorted from biggest to smallest block. */
1265 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1266 if (block->length < new_block->length) {
1271 QTAILQ_INSERT_BEFORE(block, new_block, next);
1273 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1275 ram_list.mru_block = NULL;
1278 qemu_mutex_unlock_ramlist();
1280 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1281 last_ram_offset() >> TARGET_PAGE_BITS);
1282 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1283 0, size >> TARGET_PAGE_BITS);
1284 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1286 qemu_ram_setup_dump(new_block->host, size);
1287 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1288 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1291 kvm_setup_guest_memory(new_block->host, size);
1293 return new_block->offset;
1296 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1298 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1301 void qemu_ram_free_from_ptr(ram_addr_t addr)
1305 /* This assumes the iothread lock is taken here too. */
1306 qemu_mutex_lock_ramlist();
1307 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1308 if (addr == block->offset) {
1309 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1310 ram_list.mru_block = NULL;
1316 qemu_mutex_unlock_ramlist();
1319 void qemu_ram_free(ram_addr_t addr)
1323 /* This assumes the iothread lock is taken here too. */
1324 qemu_mutex_lock_ramlist();
1325 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1326 if (addr == block->offset) {
1327 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1328 ram_list.mru_block = NULL;
1330 if (block->flags & RAM_PREALLOC_MASK) {
1332 } else if (xen_enabled()) {
1333 xen_invalidate_map_cache_entry(block->host);
1335 } else if (block->fd >= 0) {
1336 munmap(block->host, block->length);
1340 qemu_anon_ram_free(block->host, block->length);
1346 qemu_mutex_unlock_ramlist();
1351 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1358 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1359 offset = addr - block->offset;
1360 if (offset < block->length) {
1361 vaddr = block->host + offset;
1362 if (block->flags & RAM_PREALLOC_MASK) {
1364 } else if (xen_enabled()) {
1368 munmap(vaddr, length);
1369 if (block->fd >= 0) {
1371 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1374 flags |= MAP_PRIVATE;
1376 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1377 flags, block->fd, offset);
1380 * Remap needs to match alloc. Accelerators that
1381 * set phys_mem_alloc never remap. If they did,
1382 * we'd need a remap hook here.
1384 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1386 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1387 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1390 if (area != vaddr) {
1391 fprintf(stderr, "Could not remap addr: "
1392 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1396 memory_try_enable_merging(vaddr, length);
1397 qemu_ram_setup_dump(vaddr, length);
1403 #endif /* !_WIN32 */
1405 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1406 With the exception of the softmmu code in this file, this should
1407 only be used for local memory (e.g. video ram) that the device owns,
1408 and knows it isn't going to access beyond the end of the block.
1410 It should not be used for general purpose DMA.
1411 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1413 void *qemu_get_ram_ptr(ram_addr_t addr)
1415 RAMBlock *block = qemu_get_ram_block(addr);
1417 if (xen_enabled()) {
1418 /* We need to check if the requested address is in the RAM
1419 * because we don't want to map the entire memory in QEMU.
1420 * In that case just map until the end of the page.
1422 if (block->offset == 0) {
1423 return xen_map_cache(addr, 0, 0);
1424 } else if (block->host == NULL) {
1426 xen_map_cache(block->offset, block->length, 1);
1429 return block->host + (addr - block->offset);
1432 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1433 * but takes a size argument */
1434 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1439 if (xen_enabled()) {
1440 return xen_map_cache(addr, *size, 1);
1444 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1445 if (addr - block->offset < block->length) {
1446 if (addr - block->offset + *size > block->length)
1447 *size = block->length - addr + block->offset;
1448 return block->host + (addr - block->offset);
1452 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1457 /* Some of the softmmu routines need to translate from a host pointer
1458 (typically a TLB entry) back to a ram offset. */
1459 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1462 uint8_t *host = ptr;
1464 if (xen_enabled()) {
1465 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1466 return qemu_get_ram_block(*ram_addr)->mr;
1469 block = ram_list.mru_block;
1470 if (block && block->host && host - block->host < block->length) {
1474 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1475 /* This case append when the block is not mapped. */
1476 if (block->host == NULL) {
1479 if (host - block->host < block->length) {
1487 *ram_addr = block->offset + (host - block->host);
1491 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1492 uint64_t val, unsigned size)
1495 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1496 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1497 tb_invalidate_phys_page_fast(ram_addr, size);
1498 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1502 stb_p(qemu_get_ram_ptr(ram_addr), val);
1505 stw_p(qemu_get_ram_ptr(ram_addr), val);
1508 stl_p(qemu_get_ram_ptr(ram_addr), val);
1513 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1514 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1515 /* we remove the notdirty callback only if the code has been
1517 if (dirty_flags == 0xff) {
1518 CPUArchState *env = current_cpu->env_ptr;
1519 tlb_set_dirty(env, env->mem_io_vaddr);
1523 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1524 unsigned size, bool is_write)
1529 static const MemoryRegionOps notdirty_mem_ops = {
1530 .write = notdirty_mem_write,
1531 .valid.accepts = notdirty_mem_accepts,
1532 .endianness = DEVICE_NATIVE_ENDIAN,
1535 /* Generate a debug exception if a watchpoint has been hit. */
1536 static void check_watchpoint(int offset, int len_mask, int flags)
1538 CPUArchState *env = current_cpu->env_ptr;
1539 target_ulong pc, cs_base;
1544 if (env->watchpoint_hit) {
1545 /* We re-entered the check after replacing the TB. Now raise
1546 * the debug interrupt so that is will trigger after the
1547 * current instruction. */
1548 cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1551 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1552 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1553 if ((vaddr == (wp->vaddr & len_mask) ||
1554 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1555 wp->flags |= BP_WATCHPOINT_HIT;
1556 if (!env->watchpoint_hit) {
1557 env->watchpoint_hit = wp;
1558 tb_check_watchpoint(env);
1559 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1560 env->exception_index = EXCP_DEBUG;
1563 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1564 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1565 cpu_resume_from_signal(env, NULL);
1569 wp->flags &= ~BP_WATCHPOINT_HIT;
1574 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1575 so these check for a hit then pass through to the normal out-of-line
1577 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1580 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1582 case 1: return ldub_phys(addr);
1583 case 2: return lduw_phys(addr);
1584 case 4: return ldl_phys(addr);
1589 static void watch_mem_write(void *opaque, hwaddr addr,
1590 uint64_t val, unsigned size)
1592 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1595 stb_phys(addr, val);
1598 stw_phys(addr, val);
1601 stl_phys(addr, val);
1607 static const MemoryRegionOps watch_mem_ops = {
1608 .read = watch_mem_read,
1609 .write = watch_mem_write,
1610 .endianness = DEVICE_NATIVE_ENDIAN,
1613 static uint64_t subpage_read(void *opaque, hwaddr addr,
1616 subpage_t *subpage = opaque;
1619 #if defined(DEBUG_SUBPAGE)
1620 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1621 subpage, len, addr);
1623 address_space_read(subpage->as, addr + subpage->base, buf, len);
1636 static void subpage_write(void *opaque, hwaddr addr,
1637 uint64_t value, unsigned len)
1639 subpage_t *subpage = opaque;
1642 #if defined(DEBUG_SUBPAGE)
1643 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1644 " value %"PRIx64"\n",
1645 __func__, subpage, len, addr, value);
1660 address_space_write(subpage->as, addr + subpage->base, buf, len);
1663 static bool subpage_accepts(void *opaque, hwaddr addr,
1664 unsigned len, bool is_write)
1666 subpage_t *subpage = opaque;
1667 #if defined(DEBUG_SUBPAGE)
1668 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1669 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1672 return address_space_access_valid(subpage->as, addr + subpage->base,
1676 static const MemoryRegionOps subpage_ops = {
1677 .read = subpage_read,
1678 .write = subpage_write,
1679 .valid.accepts = subpage_accepts,
1680 .endianness = DEVICE_NATIVE_ENDIAN,
1683 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1688 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1690 idx = SUBPAGE_IDX(start);
1691 eidx = SUBPAGE_IDX(end);
1692 #if defined(DEBUG_SUBPAGE)
1693 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1694 __func__, mmio, start, end, idx, eidx, section);
1696 for (; idx <= eidx; idx++) {
1697 mmio->sub_section[idx] = section;
1703 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1707 mmio = g_malloc0(sizeof(subpage_t));
1711 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1712 "subpage", TARGET_PAGE_SIZE);
1713 mmio->iomem.subpage = true;
1714 #if defined(DEBUG_SUBPAGE)
1715 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1716 mmio, base, TARGET_PAGE_SIZE);
1718 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1723 static uint16_t dummy_section(MemoryRegion *mr)
1725 MemoryRegionSection section = {
1727 .offset_within_address_space = 0,
1728 .offset_within_region = 0,
1729 .size = int128_2_64(),
1732 return phys_section_add(§ion);
1735 MemoryRegion *iotlb_to_region(hwaddr index)
1737 return address_space_memory.dispatch->sections[index & ~TARGET_PAGE_MASK].mr;
1740 static void io_mem_init(void)
1742 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1743 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1744 "unassigned", UINT64_MAX);
1745 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1746 "notdirty", UINT64_MAX);
1747 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1748 "watch", UINT64_MAX);
1751 static void mem_begin(MemoryListener *listener)
1753 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1754 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1756 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1758 as->next_dispatch = d;
1761 static void mem_commit(MemoryListener *listener)
1763 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1764 AddressSpaceDispatch *cur = as->dispatch;
1765 AddressSpaceDispatch *next = as->next_dispatch;
1767 next->nodes = next_map.nodes;
1768 next->sections = next_map.sections;
1770 phys_page_compact_all(next, next_map.nodes_nb);
1772 as->dispatch = next;
1776 static void core_begin(MemoryListener *listener)
1780 prev_map = g_new(PhysPageMap, 1);
1781 *prev_map = next_map;
1783 memset(&next_map, 0, sizeof(next_map));
1784 n = dummy_section(&io_mem_unassigned);
1785 assert(n == PHYS_SECTION_UNASSIGNED);
1786 n = dummy_section(&io_mem_notdirty);
1787 assert(n == PHYS_SECTION_NOTDIRTY);
1788 n = dummy_section(&io_mem_rom);
1789 assert(n == PHYS_SECTION_ROM);
1790 n = dummy_section(&io_mem_watch);
1791 assert(n == PHYS_SECTION_WATCH);
1794 /* This listener's commit run after the other AddressSpaceDispatch listeners'.
1795 * All AddressSpaceDispatch instances have switched to the next map.
1797 static void core_commit(MemoryListener *listener)
1799 phys_sections_free(prev_map);
1802 static void tcg_commit(MemoryListener *listener)
1806 /* since each CPU stores ram addresses in its TLB cache, we must
1807 reset the modified entries */
1810 CPUArchState *env = cpu->env_ptr;
1816 static void core_log_global_start(MemoryListener *listener)
1818 cpu_physical_memory_set_dirty_tracking(1);
1821 static void core_log_global_stop(MemoryListener *listener)
1823 cpu_physical_memory_set_dirty_tracking(0);
1826 static MemoryListener core_memory_listener = {
1827 .begin = core_begin,
1828 .commit = core_commit,
1829 .log_global_start = core_log_global_start,
1830 .log_global_stop = core_log_global_stop,
1834 static MemoryListener tcg_memory_listener = {
1835 .commit = tcg_commit,
1838 void address_space_init_dispatch(AddressSpace *as)
1840 as->dispatch = NULL;
1841 as->dispatch_listener = (MemoryListener) {
1843 .commit = mem_commit,
1844 .region_add = mem_add,
1845 .region_nop = mem_add,
1848 memory_listener_register(&as->dispatch_listener, as);
1851 void address_space_destroy_dispatch(AddressSpace *as)
1853 AddressSpaceDispatch *d = as->dispatch;
1855 memory_listener_unregister(&as->dispatch_listener);
1857 as->dispatch = NULL;
1860 static void memory_map_init(void)
1862 system_memory = g_malloc(sizeof(*system_memory));
1864 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1865 address_space_init(&address_space_memory, system_memory, "memory");
1867 system_io = g_malloc(sizeof(*system_io));
1868 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1870 address_space_init(&address_space_io, system_io, "I/O");
1872 memory_listener_register(&core_memory_listener, &address_space_memory);
1873 if (tcg_enabled()) {
1874 memory_listener_register(&tcg_memory_listener, &address_space_memory);
1878 MemoryRegion *get_system_memory(void)
1880 return system_memory;
1883 MemoryRegion *get_system_io(void)
1888 #endif /* !defined(CONFIG_USER_ONLY) */
1890 /* physical memory access (slow version, mainly for debug) */
1891 #if defined(CONFIG_USER_ONLY)
1892 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1893 uint8_t *buf, int len, int is_write)
1900 page = addr & TARGET_PAGE_MASK;
1901 l = (page + TARGET_PAGE_SIZE) - addr;
1904 flags = page_get_flags(page);
1905 if (!(flags & PAGE_VALID))
1908 if (!(flags & PAGE_WRITE))
1910 /* XXX: this code should not depend on lock_user */
1911 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1914 unlock_user(p, addr, l);
1916 if (!(flags & PAGE_READ))
1918 /* XXX: this code should not depend on lock_user */
1919 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1922 unlock_user(p, addr, 0);
1933 static void invalidate_and_set_dirty(hwaddr addr,
1936 if (!cpu_physical_memory_is_dirty(addr)) {
1937 /* invalidate code */
1938 tb_invalidate_phys_page_range(addr, addr + length, 0);
1940 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1942 xen_modified_memory(addr, length);
1945 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
1947 if (memory_region_is_ram(mr)) {
1948 return !(is_write && mr->readonly);
1950 if (memory_region_is_romd(mr)) {
1957 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1959 unsigned access_size_max = mr->ops->valid.max_access_size;
1961 /* Regions are assumed to support 1-4 byte accesses unless
1962 otherwise specified. */
1963 if (access_size_max == 0) {
1964 access_size_max = 4;
1967 /* Bound the maximum access by the alignment of the address. */
1968 if (!mr->ops->impl.unaligned) {
1969 unsigned align_size_max = addr & -addr;
1970 if (align_size_max != 0 && align_size_max < access_size_max) {
1971 access_size_max = align_size_max;
1975 /* Don't attempt accesses larger than the maximum. */
1976 if (l > access_size_max) {
1977 l = access_size_max;
1980 l = 1 << (qemu_fls(l) - 1);
1986 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1987 int len, bool is_write)
1998 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2001 if (!memory_access_is_direct(mr, is_write)) {
2002 l = memory_access_size(mr, l, addr1);
2003 /* XXX: could force current_cpu to NULL to avoid
2007 /* 64 bit write access */
2009 error |= io_mem_write(mr, addr1, val, 8);
2012 /* 32 bit write access */
2014 error |= io_mem_write(mr, addr1, val, 4);
2017 /* 16 bit write access */
2019 error |= io_mem_write(mr, addr1, val, 2);
2022 /* 8 bit write access */
2024 error |= io_mem_write(mr, addr1, val, 1);
2030 addr1 += memory_region_get_ram_addr(mr);
2032 ptr = qemu_get_ram_ptr(addr1);
2033 memcpy(ptr, buf, l);
2034 invalidate_and_set_dirty(addr1, l);
2037 if (!memory_access_is_direct(mr, is_write)) {
2039 l = memory_access_size(mr, l, addr1);
2042 /* 64 bit read access */
2043 error |= io_mem_read(mr, addr1, &val, 8);
2047 /* 32 bit read access */
2048 error |= io_mem_read(mr, addr1, &val, 4);
2052 /* 16 bit read access */
2053 error |= io_mem_read(mr, addr1, &val, 2);
2057 /* 8 bit read access */
2058 error |= io_mem_read(mr, addr1, &val, 1);
2066 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2067 memcpy(buf, ptr, l);
2078 bool address_space_write(AddressSpace *as, hwaddr addr,
2079 const uint8_t *buf, int len)
2081 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2084 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2086 return address_space_rw(as, addr, buf, len, false);
2090 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2091 int len, int is_write)
2093 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2096 /* used for ROM loading : can write in RAM and ROM */
2097 void cpu_physical_memory_write_rom(hwaddr addr,
2098 const uint8_t *buf, int len)
2107 mr = address_space_translate(&address_space_memory,
2108 addr, &addr1, &l, true);
2110 if (!(memory_region_is_ram(mr) ||
2111 memory_region_is_romd(mr))) {
2114 addr1 += memory_region_get_ram_addr(mr);
2116 ptr = qemu_get_ram_ptr(addr1);
2117 memcpy(ptr, buf, l);
2118 invalidate_and_set_dirty(addr1, l);
2133 static BounceBuffer bounce;
2135 typedef struct MapClient {
2137 void (*callback)(void *opaque);
2138 QLIST_ENTRY(MapClient) link;
2141 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2142 = QLIST_HEAD_INITIALIZER(map_client_list);
2144 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2146 MapClient *client = g_malloc(sizeof(*client));
2148 client->opaque = opaque;
2149 client->callback = callback;
2150 QLIST_INSERT_HEAD(&map_client_list, client, link);
2154 static void cpu_unregister_map_client(void *_client)
2156 MapClient *client = (MapClient *)_client;
2158 QLIST_REMOVE(client, link);
2162 static void cpu_notify_map_clients(void)
2166 while (!QLIST_EMPTY(&map_client_list)) {
2167 client = QLIST_FIRST(&map_client_list);
2168 client->callback(client->opaque);
2169 cpu_unregister_map_client(client);
2173 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2180 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2181 if (!memory_access_is_direct(mr, is_write)) {
2182 l = memory_access_size(mr, l, addr);
2183 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2194 /* Map a physical memory region into a host virtual address.
2195 * May map a subset of the requested range, given by and returned in *plen.
2196 * May return NULL if resources needed to perform the mapping are exhausted.
2197 * Use only for reads OR writes - not for read-modify-write operations.
2198 * Use cpu_register_map_client() to know when retrying the map operation is
2199 * likely to succeed.
2201 void *address_space_map(AddressSpace *as,
2208 hwaddr l, xlat, base;
2209 MemoryRegion *mr, *this_mr;
2217 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2218 if (!memory_access_is_direct(mr, is_write)) {
2219 if (bounce.buffer) {
2222 /* Avoid unbounded allocations */
2223 l = MIN(l, TARGET_PAGE_SIZE);
2224 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2228 memory_region_ref(mr);
2231 address_space_read(as, addr, bounce.buffer, l);
2235 return bounce.buffer;
2239 raddr = memory_region_get_ram_addr(mr);
2250 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2251 if (this_mr != mr || xlat != base + done) {
2256 memory_region_ref(mr);
2258 return qemu_ram_ptr_length(raddr + base, plen);
2261 /* Unmaps a memory region previously mapped by address_space_map().
2262 * Will also mark the memory as dirty if is_write == 1. access_len gives
2263 * the amount of memory that was actually read or written by the caller.
2265 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2266 int is_write, hwaddr access_len)
2268 if (buffer != bounce.buffer) {
2272 mr = qemu_ram_addr_from_host(buffer, &addr1);
2275 while (access_len) {
2277 l = TARGET_PAGE_SIZE;
2280 invalidate_and_set_dirty(addr1, l);
2285 if (xen_enabled()) {
2286 xen_invalidate_map_cache_entry(buffer);
2288 memory_region_unref(mr);
2292 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2294 qemu_vfree(bounce.buffer);
2295 bounce.buffer = NULL;
2296 memory_region_unref(bounce.mr);
2297 cpu_notify_map_clients();
2300 void *cpu_physical_memory_map(hwaddr addr,
2304 return address_space_map(&address_space_memory, addr, plen, is_write);
2307 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2308 int is_write, hwaddr access_len)
2310 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2313 /* warning: addr must be aligned */
2314 static inline uint32_t ldl_phys_internal(hwaddr addr,
2315 enum device_endian endian)
2323 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2325 if (l < 4 || !memory_access_is_direct(mr, false)) {
2327 io_mem_read(mr, addr1, &val, 4);
2328 #if defined(TARGET_WORDS_BIGENDIAN)
2329 if (endian == DEVICE_LITTLE_ENDIAN) {
2333 if (endian == DEVICE_BIG_ENDIAN) {
2339 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2343 case DEVICE_LITTLE_ENDIAN:
2344 val = ldl_le_p(ptr);
2346 case DEVICE_BIG_ENDIAN:
2347 val = ldl_be_p(ptr);
2357 uint32_t ldl_phys(hwaddr addr)
2359 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2362 uint32_t ldl_le_phys(hwaddr addr)
2364 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2367 uint32_t ldl_be_phys(hwaddr addr)
2369 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2372 /* warning: addr must be aligned */
2373 static inline uint64_t ldq_phys_internal(hwaddr addr,
2374 enum device_endian endian)
2382 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2384 if (l < 8 || !memory_access_is_direct(mr, false)) {
2386 io_mem_read(mr, addr1, &val, 8);
2387 #if defined(TARGET_WORDS_BIGENDIAN)
2388 if (endian == DEVICE_LITTLE_ENDIAN) {
2392 if (endian == DEVICE_BIG_ENDIAN) {
2398 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2402 case DEVICE_LITTLE_ENDIAN:
2403 val = ldq_le_p(ptr);
2405 case DEVICE_BIG_ENDIAN:
2406 val = ldq_be_p(ptr);
2416 uint64_t ldq_phys(hwaddr addr)
2418 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2421 uint64_t ldq_le_phys(hwaddr addr)
2423 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2426 uint64_t ldq_be_phys(hwaddr addr)
2428 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2432 uint32_t ldub_phys(hwaddr addr)
2435 cpu_physical_memory_read(addr, &val, 1);
2439 /* warning: addr must be aligned */
2440 static inline uint32_t lduw_phys_internal(hwaddr addr,
2441 enum device_endian endian)
2449 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2451 if (l < 2 || !memory_access_is_direct(mr, false)) {
2453 io_mem_read(mr, addr1, &val, 2);
2454 #if defined(TARGET_WORDS_BIGENDIAN)
2455 if (endian == DEVICE_LITTLE_ENDIAN) {
2459 if (endian == DEVICE_BIG_ENDIAN) {
2465 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2469 case DEVICE_LITTLE_ENDIAN:
2470 val = lduw_le_p(ptr);
2472 case DEVICE_BIG_ENDIAN:
2473 val = lduw_be_p(ptr);
2483 uint32_t lduw_phys(hwaddr addr)
2485 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2488 uint32_t lduw_le_phys(hwaddr addr)
2490 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2493 uint32_t lduw_be_phys(hwaddr addr)
2495 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2498 /* warning: addr must be aligned. The ram page is not masked as dirty
2499 and the code inside is not invalidated. It is useful if the dirty
2500 bits are used to track modified PTEs */
2501 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2508 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2510 if (l < 4 || !memory_access_is_direct(mr, true)) {
2511 io_mem_write(mr, addr1, val, 4);
2513 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2514 ptr = qemu_get_ram_ptr(addr1);
2517 if (unlikely(in_migration)) {
2518 if (!cpu_physical_memory_is_dirty(addr1)) {
2519 /* invalidate code */
2520 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2522 cpu_physical_memory_set_dirty_flags(
2523 addr1, (0xff & ~CODE_DIRTY_FLAG));
2529 /* warning: addr must be aligned */
2530 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2531 enum device_endian endian)
2538 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2540 if (l < 4 || !memory_access_is_direct(mr, true)) {
2541 #if defined(TARGET_WORDS_BIGENDIAN)
2542 if (endian == DEVICE_LITTLE_ENDIAN) {
2546 if (endian == DEVICE_BIG_ENDIAN) {
2550 io_mem_write(mr, addr1, val, 4);
2553 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2554 ptr = qemu_get_ram_ptr(addr1);
2556 case DEVICE_LITTLE_ENDIAN:
2559 case DEVICE_BIG_ENDIAN:
2566 invalidate_and_set_dirty(addr1, 4);
2570 void stl_phys(hwaddr addr, uint32_t val)
2572 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2575 void stl_le_phys(hwaddr addr, uint32_t val)
2577 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2580 void stl_be_phys(hwaddr addr, uint32_t val)
2582 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2586 void stb_phys(hwaddr addr, uint32_t val)
2589 cpu_physical_memory_write(addr, &v, 1);
2592 /* warning: addr must be aligned */
2593 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2594 enum device_endian endian)
2601 mr = address_space_translate(&address_space_memory, addr, &addr1, &l,
2603 if (l < 2 || !memory_access_is_direct(mr, true)) {
2604 #if defined(TARGET_WORDS_BIGENDIAN)
2605 if (endian == DEVICE_LITTLE_ENDIAN) {
2609 if (endian == DEVICE_BIG_ENDIAN) {
2613 io_mem_write(mr, addr1, val, 2);
2616 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2617 ptr = qemu_get_ram_ptr(addr1);
2619 case DEVICE_LITTLE_ENDIAN:
2622 case DEVICE_BIG_ENDIAN:
2629 invalidate_and_set_dirty(addr1, 2);
2633 void stw_phys(hwaddr addr, uint32_t val)
2635 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2638 void stw_le_phys(hwaddr addr, uint32_t val)
2640 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2643 void stw_be_phys(hwaddr addr, uint32_t val)
2645 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2649 void stq_phys(hwaddr addr, uint64_t val)
2652 cpu_physical_memory_write(addr, &val, 8);
2655 void stq_le_phys(hwaddr addr, uint64_t val)
2657 val = cpu_to_le64(val);
2658 cpu_physical_memory_write(addr, &val, 8);
2661 void stq_be_phys(hwaddr addr, uint64_t val)
2663 val = cpu_to_be64(val);
2664 cpu_physical_memory_write(addr, &val, 8);
2667 /* virtual memory access for debug (includes writing to ROM) */
2668 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2669 uint8_t *buf, int len, int is_write)
2676 page = addr & TARGET_PAGE_MASK;
2677 phys_addr = cpu_get_phys_page_debug(cpu, page);
2678 /* if no physical page mapped, return an error */
2679 if (phys_addr == -1)
2681 l = (page + TARGET_PAGE_SIZE) - addr;
2684 phys_addr += (addr & ~TARGET_PAGE_MASK);
2686 cpu_physical_memory_write_rom(phys_addr, buf, l);
2688 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2697 #if !defined(CONFIG_USER_ONLY)
2700 * A helper function for the _utterly broken_ virtio device model to find out if
2701 * it's running on a big endian machine. Don't do this at home kids!
2703 bool virtio_is_big_endian(void);
2704 bool virtio_is_big_endian(void)
2706 #if defined(TARGET_WORDS_BIGENDIAN)
2715 #ifndef CONFIG_USER_ONLY
2716 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2721 mr = address_space_translate(&address_space_memory,
2722 phys_addr, &phys_addr, &l, false);
2724 return !(memory_region_is_ram(mr) ||
2725 memory_region_is_romd(mr));
2728 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2732 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2733 func(block->host, block->offset, block->length, opaque);