4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
72 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
73 #define RAM_PREALLOC (1 << 0)
75 /* RAM is mmap-ed with MAP_SHARED */
76 #define RAM_SHARED (1 << 1)
80 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
81 /* current CPU in the current thread. It is only valid inside
83 DEFINE_TLS(CPUState *, current_cpu);
84 /* 0 = Do not count executed instructions.
85 1 = Precise instruction counting.
86 2 = Adaptive rate instruction counting. */
89 #if !defined(CONFIG_USER_ONLY)
91 typedef struct PhysPageEntry PhysPageEntry;
93 struct PhysPageEntry {
94 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
96 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
100 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
102 /* Size of the L2 (and L3, etc) page tables. */
103 #define ADDR_SPACE_BITS 64
106 #define P_L2_SIZE (1 << P_L2_BITS)
108 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
110 typedef PhysPageEntry Node[P_L2_SIZE];
112 typedef struct PhysPageMap {
113 unsigned sections_nb;
114 unsigned sections_nb_alloc;
116 unsigned nodes_nb_alloc;
118 MemoryRegionSection *sections;
121 struct AddressSpaceDispatch {
122 /* This is a multi-level map on the physical address space.
123 * The bottom level has pointers to MemoryRegionSections.
125 PhysPageEntry phys_map;
130 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
131 typedef struct subpage_t {
135 uint16_t sub_section[TARGET_PAGE_SIZE];
138 #define PHYS_SECTION_UNASSIGNED 0
139 #define PHYS_SECTION_NOTDIRTY 1
140 #define PHYS_SECTION_ROM 2
141 #define PHYS_SECTION_WATCH 3
143 static void io_mem_init(void);
144 static void memory_map_init(void);
145 static void tcg_commit(MemoryListener *listener);
147 static MemoryRegion io_mem_watch;
150 #if !defined(CONFIG_USER_ONLY)
152 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
154 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
155 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
156 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
157 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
161 static uint32_t phys_map_node_alloc(PhysPageMap *map)
166 ret = map->nodes_nb++;
167 assert(ret != PHYS_MAP_NODE_NIL);
168 assert(ret != map->nodes_nb_alloc);
169 for (i = 0; i < P_L2_SIZE; ++i) {
170 map->nodes[ret][i].skip = 1;
171 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
176 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
177 hwaddr *index, hwaddr *nb, uint16_t leaf,
182 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
184 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
185 lp->ptr = phys_map_node_alloc(map);
186 p = map->nodes[lp->ptr];
188 for (i = 0; i < P_L2_SIZE; i++) {
190 p[i].ptr = PHYS_SECTION_UNASSIGNED;
194 p = map->nodes[lp->ptr];
196 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
198 while (*nb && lp < &p[P_L2_SIZE]) {
199 if ((*index & (step - 1)) == 0 && *nb >= step) {
205 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
211 static void phys_page_set(AddressSpaceDispatch *d,
212 hwaddr index, hwaddr nb,
215 /* Wildly overreserve - it doesn't matter much. */
216 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
218 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
221 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
222 * and update our entry so we can skip it and go directly to the destination.
224 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
226 unsigned valid_ptr = P_L2_SIZE;
231 if (lp->ptr == PHYS_MAP_NODE_NIL) {
236 for (i = 0; i < P_L2_SIZE; i++) {
237 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
244 phys_page_compact(&p[i], nodes, compacted);
248 /* We can only compress if there's only one child. */
253 assert(valid_ptr < P_L2_SIZE);
255 /* Don't compress if it won't fit in the # of bits we have. */
256 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
260 lp->ptr = p[valid_ptr].ptr;
261 if (!p[valid_ptr].skip) {
262 /* If our only child is a leaf, make this a leaf. */
263 /* By design, we should have made this node a leaf to begin with so we
264 * should never reach here.
265 * But since it's so simple to handle this, let's do it just in case we
270 lp->skip += p[valid_ptr].skip;
274 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
276 DECLARE_BITMAP(compacted, nodes_nb);
278 if (d->phys_map.skip) {
279 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
283 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
284 Node *nodes, MemoryRegionSection *sections)
287 hwaddr index = addr >> TARGET_PAGE_BITS;
290 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
291 if (lp.ptr == PHYS_MAP_NODE_NIL) {
292 return §ions[PHYS_SECTION_UNASSIGNED];
295 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
298 if (sections[lp.ptr].size.hi ||
299 range_covers_byte(sections[lp.ptr].offset_within_address_space,
300 sections[lp.ptr].size.lo, addr)) {
301 return §ions[lp.ptr];
303 return §ions[PHYS_SECTION_UNASSIGNED];
307 bool memory_region_is_unassigned(MemoryRegion *mr)
309 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
310 && mr != &io_mem_watch;
313 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
315 bool resolve_subpage)
317 MemoryRegionSection *section;
320 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
321 if (resolve_subpage && section->mr->subpage) {
322 subpage = container_of(section->mr, subpage_t, iomem);
323 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
328 static MemoryRegionSection *
329 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
330 hwaddr *plen, bool resolve_subpage)
332 MemoryRegionSection *section;
335 section = address_space_lookup_region(d, addr, resolve_subpage);
336 /* Compute offset within MemoryRegionSection */
337 addr -= section->offset_within_address_space;
339 /* Compute offset within MemoryRegion */
340 *xlat = addr + section->offset_within_region;
342 diff = int128_sub(section->mr->size, int128_make64(addr));
343 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
347 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
349 if (memory_region_is_ram(mr)) {
350 return !(is_write && mr->readonly);
352 if (memory_region_is_romd(mr)) {
359 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
360 hwaddr *xlat, hwaddr *plen,
364 MemoryRegionSection *section;
369 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
372 if (!mr->iommu_ops) {
376 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
377 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
378 | (addr & iotlb.addr_mask));
379 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
380 if (!(iotlb.perm & (1 << is_write))) {
381 mr = &io_mem_unassigned;
385 as = iotlb.target_as;
388 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
389 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
390 len = MIN(page, len);
398 MemoryRegionSection *
399 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
402 MemoryRegionSection *section;
403 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
405 assert(!section->mr->iommu_ops);
410 void cpu_exec_init_all(void)
412 #if !defined(CONFIG_USER_ONLY)
413 qemu_mutex_init(&ram_list.mutex);
419 #if !defined(CONFIG_USER_ONLY)
421 static int cpu_common_post_load(void *opaque, int version_id)
423 CPUState *cpu = opaque;
425 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
426 version_id is increased. */
427 cpu->interrupt_request &= ~0x01;
433 static int cpu_common_pre_load(void *opaque)
435 CPUState *cpu = opaque;
437 cpu->exception_index = 0;
442 static bool cpu_common_exception_index_needed(void *opaque)
444 CPUState *cpu = opaque;
446 return cpu->exception_index != 0;
449 static const VMStateDescription vmstate_cpu_common_exception_index = {
450 .name = "cpu_common/exception_index",
452 .minimum_version_id = 1,
453 .fields = (VMStateField[]) {
454 VMSTATE_INT32(exception_index, CPUState),
455 VMSTATE_END_OF_LIST()
459 const VMStateDescription vmstate_cpu_common = {
460 .name = "cpu_common",
462 .minimum_version_id = 1,
463 .pre_load = cpu_common_pre_load,
464 .post_load = cpu_common_post_load,
465 .fields = (VMStateField[]) {
466 VMSTATE_UINT32(halted, CPUState),
467 VMSTATE_UINT32(interrupt_request, CPUState),
468 VMSTATE_END_OF_LIST()
470 .subsections = (VMStateSubsection[]) {
472 .vmsd = &vmstate_cpu_common_exception_index,
473 .needed = cpu_common_exception_index_needed,
482 CPUState *qemu_get_cpu(int index)
487 if (cpu->cpu_index == index) {
495 #if !defined(CONFIG_USER_ONLY)
496 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
498 /* We only support one address space per cpu at the moment. */
499 assert(cpu->as == as);
501 if (cpu->tcg_as_listener) {
502 memory_listener_unregister(cpu->tcg_as_listener);
504 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
506 cpu->tcg_as_listener->commit = tcg_commit;
507 memory_listener_register(cpu->tcg_as_listener, as);
511 void cpu_exec_init(CPUArchState *env)
513 CPUState *cpu = ENV_GET_CPU(env);
514 CPUClass *cc = CPU_GET_CLASS(cpu);
518 #if defined(CONFIG_USER_ONLY)
522 CPU_FOREACH(some_cpu) {
525 cpu->cpu_index = cpu_index;
527 QTAILQ_INIT(&cpu->breakpoints);
528 QTAILQ_INIT(&cpu->watchpoints);
529 #ifndef CONFIG_USER_ONLY
530 cpu->as = &address_space_memory;
531 cpu->thread_id = qemu_get_thread_id();
533 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
534 #if defined(CONFIG_USER_ONLY)
537 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
538 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
540 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
541 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
542 cpu_save, cpu_load, env);
543 assert(cc->vmsd == NULL);
544 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
546 if (cc->vmsd != NULL) {
547 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
551 #if defined(TARGET_HAS_ICE)
552 #if defined(CONFIG_USER_ONLY)
553 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
555 tb_invalidate_phys_page_range(pc, pc + 1, 0);
558 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
560 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
562 tb_invalidate_phys_addr(cpu->as,
563 phys | (pc & ~TARGET_PAGE_MASK));
567 #endif /* TARGET_HAS_ICE */
569 #if defined(CONFIG_USER_ONLY)
570 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
575 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
576 int flags, CPUWatchpoint **watchpoint)
581 /* Add a watchpoint. */
582 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
583 int flags, CPUWatchpoint **watchpoint)
585 vaddr len_mask = ~(len - 1);
588 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
589 if ((len & (len - 1)) || (addr & ~len_mask) ||
590 len == 0 || len > TARGET_PAGE_SIZE) {
591 error_report("tried to set invalid watchpoint at %"
592 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
595 wp = g_malloc(sizeof(*wp));
598 wp->len_mask = len_mask;
601 /* keep all GDB-injected watchpoints in front */
602 if (flags & BP_GDB) {
603 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
605 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
608 tlb_flush_page(cpu, addr);
615 /* Remove a specific watchpoint. */
616 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
619 vaddr len_mask = ~(len - 1);
622 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
623 if (addr == wp->vaddr && len_mask == wp->len_mask
624 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
625 cpu_watchpoint_remove_by_ref(cpu, wp);
632 /* Remove a specific watchpoint by reference. */
633 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
635 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
637 tlb_flush_page(cpu, watchpoint->vaddr);
642 /* Remove all matching watchpoints. */
643 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
645 CPUWatchpoint *wp, *next;
647 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
648 if (wp->flags & mask) {
649 cpu_watchpoint_remove_by_ref(cpu, wp);
655 /* Add a breakpoint. */
656 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
657 CPUBreakpoint **breakpoint)
659 #if defined(TARGET_HAS_ICE)
662 bp = g_malloc(sizeof(*bp));
667 /* keep all GDB-injected breakpoints in front */
668 if (flags & BP_GDB) {
669 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
671 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
674 breakpoint_invalidate(cpu, pc);
685 /* Remove a specific breakpoint. */
686 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
688 #if defined(TARGET_HAS_ICE)
691 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
692 if (bp->pc == pc && bp->flags == flags) {
693 cpu_breakpoint_remove_by_ref(cpu, bp);
703 /* Remove a specific breakpoint by reference. */
704 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
706 #if defined(TARGET_HAS_ICE)
707 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
709 breakpoint_invalidate(cpu, breakpoint->pc);
715 /* Remove all matching breakpoints. */
716 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
718 #if defined(TARGET_HAS_ICE)
719 CPUBreakpoint *bp, *next;
721 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
722 if (bp->flags & mask) {
723 cpu_breakpoint_remove_by_ref(cpu, bp);
729 /* enable or disable single step mode. EXCP_DEBUG is returned by the
730 CPU loop after each instruction */
731 void cpu_single_step(CPUState *cpu, int enabled)
733 #if defined(TARGET_HAS_ICE)
734 if (cpu->singlestep_enabled != enabled) {
735 cpu->singlestep_enabled = enabled;
737 kvm_update_guest_debug(cpu, 0);
739 /* must flush all the translated code to avoid inconsistencies */
740 /* XXX: only flush what is necessary */
741 CPUArchState *env = cpu->env_ptr;
748 void cpu_abort(CPUState *cpu, const char *fmt, ...)
755 fprintf(stderr, "qemu: fatal: ");
756 vfprintf(stderr, fmt, ap);
757 fprintf(stderr, "\n");
758 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
759 if (qemu_log_enabled()) {
760 qemu_log("qemu: fatal: ");
761 qemu_log_vprintf(fmt, ap2);
763 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
769 #if defined(CONFIG_USER_ONLY)
771 struct sigaction act;
772 sigfillset(&act.sa_mask);
773 act.sa_handler = SIG_DFL;
774 sigaction(SIGABRT, &act, NULL);
780 #if !defined(CONFIG_USER_ONLY)
781 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
785 /* The list is protected by the iothread lock here. */
786 block = ram_list.mru_block;
787 if (block && addr - block->offset < block->length) {
790 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
791 if (addr - block->offset < block->length) {
796 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
800 ram_list.mru_block = block;
804 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
810 end = TARGET_PAGE_ALIGN(start + length);
811 start &= TARGET_PAGE_MASK;
813 block = qemu_get_ram_block(start);
814 assert(block == qemu_get_ram_block(end - 1));
815 start1 = (uintptr_t)block->host + (start - block->offset);
816 cpu_tlb_reset_dirty_all(start1, length);
819 /* Note: start and end must be within the same ram block. */
820 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
825 cpu_physical_memory_clear_dirty_range(start, length, client);
828 tlb_reset_dirty_range_all(start, length);
832 static void cpu_physical_memory_set_dirty_tracking(bool enable)
834 in_migration = enable;
837 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
838 MemoryRegionSection *section,
840 hwaddr paddr, hwaddr xlat,
842 target_ulong *address)
847 if (memory_region_is_ram(section->mr)) {
849 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
851 if (!section->readonly) {
852 iotlb |= PHYS_SECTION_NOTDIRTY;
854 iotlb |= PHYS_SECTION_ROM;
857 iotlb = section - section->address_space->dispatch->map.sections;
861 /* Make accesses to pages with watchpoints go via the
862 watchpoint trap routines. */
863 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
864 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
865 /* Avoid trapping reads of pages with a write breakpoint. */
866 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
867 iotlb = PHYS_SECTION_WATCH + paddr;
868 *address |= TLB_MMIO;
876 #endif /* defined(CONFIG_USER_ONLY) */
878 #if !defined(CONFIG_USER_ONLY)
880 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
882 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
884 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
887 * Set a custom physical guest memory alloator.
888 * Accelerators with unusual needs may need this. Hopefully, we can
889 * get rid of it eventually.
891 void phys_mem_set_alloc(void *(*alloc)(size_t))
893 phys_mem_alloc = alloc;
896 static uint16_t phys_section_add(PhysPageMap *map,
897 MemoryRegionSection *section)
899 /* The physical section number is ORed with a page-aligned
900 * pointer to produce the iotlb entries. Thus it should
901 * never overflow into the page-aligned value.
903 assert(map->sections_nb < TARGET_PAGE_SIZE);
905 if (map->sections_nb == map->sections_nb_alloc) {
906 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
907 map->sections = g_renew(MemoryRegionSection, map->sections,
908 map->sections_nb_alloc);
910 map->sections[map->sections_nb] = *section;
911 memory_region_ref(section->mr);
912 return map->sections_nb++;
915 static void phys_section_destroy(MemoryRegion *mr)
917 memory_region_unref(mr);
920 subpage_t *subpage = container_of(mr, subpage_t, iomem);
921 object_unref(OBJECT(&subpage->iomem));
926 static void phys_sections_free(PhysPageMap *map)
928 while (map->sections_nb > 0) {
929 MemoryRegionSection *section = &map->sections[--map->sections_nb];
930 phys_section_destroy(section->mr);
932 g_free(map->sections);
936 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
939 hwaddr base = section->offset_within_address_space
941 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
942 d->map.nodes, d->map.sections);
943 MemoryRegionSection subsection = {
944 .offset_within_address_space = base,
945 .size = int128_make64(TARGET_PAGE_SIZE),
949 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
951 if (!(existing->mr->subpage)) {
952 subpage = subpage_init(d->as, base);
953 subsection.address_space = d->as;
954 subsection.mr = &subpage->iomem;
955 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
956 phys_section_add(&d->map, &subsection));
958 subpage = container_of(existing->mr, subpage_t, iomem);
960 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
961 end = start + int128_get64(section->size) - 1;
962 subpage_register(subpage, start, end,
963 phys_section_add(&d->map, section));
967 static void register_multipage(AddressSpaceDispatch *d,
968 MemoryRegionSection *section)
970 hwaddr start_addr = section->offset_within_address_space;
971 uint16_t section_index = phys_section_add(&d->map, section);
972 uint64_t num_pages = int128_get64(int128_rshift(section->size,
976 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
979 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
981 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
982 AddressSpaceDispatch *d = as->next_dispatch;
983 MemoryRegionSection now = *section, remain = *section;
984 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
986 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
987 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
988 - now.offset_within_address_space;
990 now.size = int128_min(int128_make64(left), now.size);
991 register_subpage(d, &now);
993 now.size = int128_zero();
995 while (int128_ne(remain.size, now.size)) {
996 remain.size = int128_sub(remain.size, now.size);
997 remain.offset_within_address_space += int128_get64(now.size);
998 remain.offset_within_region += int128_get64(now.size);
1000 if (int128_lt(remain.size, page_size)) {
1001 register_subpage(d, &now);
1002 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1003 now.size = page_size;
1004 register_subpage(d, &now);
1006 now.size = int128_and(now.size, int128_neg(page_size));
1007 register_multipage(d, &now);
1012 void qemu_flush_coalesced_mmio_buffer(void)
1015 kvm_flush_coalesced_mmio_buffer();
1018 void qemu_mutex_lock_ramlist(void)
1020 qemu_mutex_lock(&ram_list.mutex);
1023 void qemu_mutex_unlock_ramlist(void)
1025 qemu_mutex_unlock(&ram_list.mutex);
1030 #include <sys/vfs.h>
1032 #define HUGETLBFS_MAGIC 0x958458f6
1034 static long gethugepagesize(const char *path, Error **errp)
1040 ret = statfs(path, &fs);
1041 } while (ret != 0 && errno == EINTR);
1044 error_setg_errno(errp, errno, "failed to get page size of file %s",
1049 if (fs.f_type != HUGETLBFS_MAGIC)
1050 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1055 static void *file_ram_alloc(RAMBlock *block,
1061 char *sanitized_name;
1066 Error *local_err = NULL;
1068 hpagesize = gethugepagesize(path, &local_err);
1070 error_propagate(errp, local_err);
1074 if (memory < hpagesize) {
1075 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1076 "or larger than huge page size 0x%" PRIx64,
1081 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1083 "host lacks kvm mmu notifiers, -mem-path unsupported");
1087 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1088 sanitized_name = g_strdup(memory_region_name(block->mr));
1089 for (c = sanitized_name; *c != '\0'; c++) {
1094 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1096 g_free(sanitized_name);
1098 fd = mkstemp(filename);
1100 error_setg_errno(errp, errno,
1101 "unable to create backing store for hugepages");
1108 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1111 * ftruncate is not supported by hugetlbfs in older
1112 * hosts, so don't bother bailing out on errors.
1113 * If anything goes wrong with it under other filesystems,
1116 if (ftruncate(fd, memory)) {
1117 perror("ftruncate");
1120 area = mmap(0, memory, PROT_READ | PROT_WRITE,
1121 (block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE),
1123 if (area == MAP_FAILED) {
1124 error_setg_errno(errp, errno,
1125 "unable to map backing store for hugepages");
1131 os_mem_prealloc(fd, area, memory);
1145 static ram_addr_t find_ram_offset(ram_addr_t size)
1147 RAMBlock *block, *next_block;
1148 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1150 assert(size != 0); /* it would hand out same offset multiple times */
1152 if (QTAILQ_EMPTY(&ram_list.blocks))
1155 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1156 ram_addr_t end, next = RAM_ADDR_MAX;
1158 end = block->offset + block->length;
1160 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1161 if (next_block->offset >= end) {
1162 next = MIN(next, next_block->offset);
1165 if (next - end >= size && next - end < mingap) {
1167 mingap = next - end;
1171 if (offset == RAM_ADDR_MAX) {
1172 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1180 ram_addr_t last_ram_offset(void)
1183 ram_addr_t last = 0;
1185 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1186 last = MAX(last, block->offset + block->length);
1191 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1195 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1196 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1197 "dump-guest-core", true)) {
1198 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1200 perror("qemu_madvise");
1201 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1202 "but dump_guest_core=off specified\n");
1207 static RAMBlock *find_ram_block(ram_addr_t addr)
1211 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1212 if (block->offset == addr) {
1220 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1222 RAMBlock *new_block = find_ram_block(addr);
1226 assert(!new_block->idstr[0]);
1229 char *id = qdev_get_dev_path(dev);
1231 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1235 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1237 /* This assumes the iothread lock is taken here too. */
1238 qemu_mutex_lock_ramlist();
1239 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1240 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1241 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1246 qemu_mutex_unlock_ramlist();
1249 void qemu_ram_unset_idstr(ram_addr_t addr)
1251 RAMBlock *block = find_ram_block(addr);
1254 memset(block->idstr, 0, sizeof(block->idstr));
1258 static int memory_try_enable_merging(void *addr, size_t len)
1260 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1261 /* disabled by the user */
1265 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1268 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1271 ram_addr_t old_ram_size, new_ram_size;
1273 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1275 /* This assumes the iothread lock is taken here too. */
1276 qemu_mutex_lock_ramlist();
1277 new_block->offset = find_ram_offset(new_block->length);
1279 if (!new_block->host) {
1280 if (xen_enabled()) {
1281 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1283 new_block->host = phys_mem_alloc(new_block->length);
1284 if (!new_block->host) {
1285 error_setg_errno(errp, errno,
1286 "cannot set up guest memory '%s'",
1287 memory_region_name(new_block->mr));
1288 qemu_mutex_unlock_ramlist();
1291 memory_try_enable_merging(new_block->host, new_block->length);
1295 /* Keep the list sorted from biggest to smallest block. */
1296 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1297 if (block->length < new_block->length) {
1302 QTAILQ_INSERT_BEFORE(block, new_block, next);
1304 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1306 ram_list.mru_block = NULL;
1309 qemu_mutex_unlock_ramlist();
1311 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1313 if (new_ram_size > old_ram_size) {
1315 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1316 ram_list.dirty_memory[i] =
1317 bitmap_zero_extend(ram_list.dirty_memory[i],
1318 old_ram_size, new_ram_size);
1321 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1323 qemu_ram_setup_dump(new_block->host, new_block->length);
1324 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1325 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1327 if (kvm_enabled()) {
1328 kvm_setup_guest_memory(new_block->host, new_block->length);
1331 return new_block->offset;
1335 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1336 bool share, const char *mem_path,
1339 RAMBlock *new_block;
1341 Error *local_err = NULL;
1343 if (xen_enabled()) {
1344 error_setg(errp, "-mem-path not supported with Xen");
1348 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1350 * file_ram_alloc() needs to allocate just like
1351 * phys_mem_alloc, but we haven't bothered to provide
1355 "-mem-path not supported with this accelerator");
1359 size = TARGET_PAGE_ALIGN(size);
1360 new_block = g_malloc0(sizeof(*new_block));
1362 new_block->length = size;
1363 new_block->flags = share ? RAM_SHARED : 0;
1364 new_block->host = file_ram_alloc(new_block, size,
1366 if (!new_block->host) {
1371 addr = ram_block_add(new_block, &local_err);
1374 error_propagate(errp, local_err);
1381 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1382 MemoryRegion *mr, Error **errp)
1384 RAMBlock *new_block;
1386 Error *local_err = NULL;
1388 size = TARGET_PAGE_ALIGN(size);
1389 new_block = g_malloc0(sizeof(*new_block));
1391 new_block->length = size;
1393 new_block->host = host;
1395 new_block->flags |= RAM_PREALLOC;
1397 addr = ram_block_add(new_block, &local_err);
1400 error_propagate(errp, local_err);
1406 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1408 return qemu_ram_alloc_from_ptr(size, NULL, mr, errp);
1411 void qemu_ram_free_from_ptr(ram_addr_t addr)
1415 /* This assumes the iothread lock is taken here too. */
1416 qemu_mutex_lock_ramlist();
1417 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1418 if (addr == block->offset) {
1419 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1420 ram_list.mru_block = NULL;
1426 qemu_mutex_unlock_ramlist();
1429 void qemu_ram_free(ram_addr_t addr)
1433 /* This assumes the iothread lock is taken here too. */
1434 qemu_mutex_lock_ramlist();
1435 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1436 if (addr == block->offset) {
1437 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1438 ram_list.mru_block = NULL;
1440 if (block->flags & RAM_PREALLOC) {
1442 } else if (xen_enabled()) {
1443 xen_invalidate_map_cache_entry(block->host);
1445 } else if (block->fd >= 0) {
1446 munmap(block->host, block->length);
1450 qemu_anon_ram_free(block->host, block->length);
1456 qemu_mutex_unlock_ramlist();
1461 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1468 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1469 offset = addr - block->offset;
1470 if (offset < block->length) {
1471 vaddr = block->host + offset;
1472 if (block->flags & RAM_PREALLOC) {
1474 } else if (xen_enabled()) {
1478 munmap(vaddr, length);
1479 if (block->fd >= 0) {
1480 flags |= (block->flags & RAM_SHARED ?
1481 MAP_SHARED : MAP_PRIVATE);
1482 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1483 flags, block->fd, offset);
1486 * Remap needs to match alloc. Accelerators that
1487 * set phys_mem_alloc never remap. If they did,
1488 * we'd need a remap hook here.
1490 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1492 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1493 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1496 if (area != vaddr) {
1497 fprintf(stderr, "Could not remap addr: "
1498 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1502 memory_try_enable_merging(vaddr, length);
1503 qemu_ram_setup_dump(vaddr, length);
1509 #endif /* !_WIN32 */
1511 int qemu_get_ram_fd(ram_addr_t addr)
1513 RAMBlock *block = qemu_get_ram_block(addr);
1518 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1520 RAMBlock *block = qemu_get_ram_block(addr);
1525 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1526 With the exception of the softmmu code in this file, this should
1527 only be used for local memory (e.g. video ram) that the device owns,
1528 and knows it isn't going to access beyond the end of the block.
1530 It should not be used for general purpose DMA.
1531 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1533 void *qemu_get_ram_ptr(ram_addr_t addr)
1535 RAMBlock *block = qemu_get_ram_block(addr);
1537 if (xen_enabled()) {
1538 /* We need to check if the requested address is in the RAM
1539 * because we don't want to map the entire memory in QEMU.
1540 * In that case just map until the end of the page.
1542 if (block->offset == 0) {
1543 return xen_map_cache(addr, 0, 0);
1544 } else if (block->host == NULL) {
1546 xen_map_cache(block->offset, block->length, 1);
1549 return block->host + (addr - block->offset);
1552 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1553 * but takes a size argument */
1554 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1559 if (xen_enabled()) {
1560 return xen_map_cache(addr, *size, 1);
1564 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1565 if (addr - block->offset < block->length) {
1566 if (addr - block->offset + *size > block->length)
1567 *size = block->length - addr + block->offset;
1568 return block->host + (addr - block->offset);
1572 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1577 /* Some of the softmmu routines need to translate from a host pointer
1578 (typically a TLB entry) back to a ram offset. */
1579 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1582 uint8_t *host = ptr;
1584 if (xen_enabled()) {
1585 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1586 return qemu_get_ram_block(*ram_addr)->mr;
1589 block = ram_list.mru_block;
1590 if (block && block->host && host - block->host < block->length) {
1594 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1595 /* This case append when the block is not mapped. */
1596 if (block->host == NULL) {
1599 if (host - block->host < block->length) {
1607 *ram_addr = block->offset + (host - block->host);
1611 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1612 uint64_t val, unsigned size)
1614 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1615 tb_invalidate_phys_page_fast(ram_addr, size);
1619 stb_p(qemu_get_ram_ptr(ram_addr), val);
1622 stw_p(qemu_get_ram_ptr(ram_addr), val);
1625 stl_p(qemu_get_ram_ptr(ram_addr), val);
1630 cpu_physical_memory_set_dirty_range_nocode(ram_addr, size);
1631 /* we remove the notdirty callback only if the code has been
1633 if (!cpu_physical_memory_is_clean(ram_addr)) {
1634 CPUArchState *env = current_cpu->env_ptr;
1635 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1639 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1640 unsigned size, bool is_write)
1645 static const MemoryRegionOps notdirty_mem_ops = {
1646 .write = notdirty_mem_write,
1647 .valid.accepts = notdirty_mem_accepts,
1648 .endianness = DEVICE_NATIVE_ENDIAN,
1651 /* Generate a debug exception if a watchpoint has been hit. */
1652 static void check_watchpoint(int offset, int len_mask, int flags)
1654 CPUState *cpu = current_cpu;
1655 CPUArchState *env = cpu->env_ptr;
1656 target_ulong pc, cs_base;
1661 if (cpu->watchpoint_hit) {
1662 /* We re-entered the check after replacing the TB. Now raise
1663 * the debug interrupt so that is will trigger after the
1664 * current instruction. */
1665 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1668 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1669 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1670 if ((vaddr == (wp->vaddr & len_mask) ||
1671 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1672 wp->flags |= BP_WATCHPOINT_HIT;
1673 if (!cpu->watchpoint_hit) {
1674 cpu->watchpoint_hit = wp;
1675 tb_check_watchpoint(cpu);
1676 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1677 cpu->exception_index = EXCP_DEBUG;
1680 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1681 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1682 cpu_resume_from_signal(cpu, NULL);
1686 wp->flags &= ~BP_WATCHPOINT_HIT;
1691 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1692 so these check for a hit then pass through to the normal out-of-line
1694 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1697 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1699 case 1: return ldub_phys(&address_space_memory, addr);
1700 case 2: return lduw_phys(&address_space_memory, addr);
1701 case 4: return ldl_phys(&address_space_memory, addr);
1706 static void watch_mem_write(void *opaque, hwaddr addr,
1707 uint64_t val, unsigned size)
1709 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1712 stb_phys(&address_space_memory, addr, val);
1715 stw_phys(&address_space_memory, addr, val);
1718 stl_phys(&address_space_memory, addr, val);
1724 static const MemoryRegionOps watch_mem_ops = {
1725 .read = watch_mem_read,
1726 .write = watch_mem_write,
1727 .endianness = DEVICE_NATIVE_ENDIAN,
1730 static uint64_t subpage_read(void *opaque, hwaddr addr,
1733 subpage_t *subpage = opaque;
1736 #if defined(DEBUG_SUBPAGE)
1737 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1738 subpage, len, addr);
1740 address_space_read(subpage->as, addr + subpage->base, buf, len);
1753 static void subpage_write(void *opaque, hwaddr addr,
1754 uint64_t value, unsigned len)
1756 subpage_t *subpage = opaque;
1759 #if defined(DEBUG_SUBPAGE)
1760 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1761 " value %"PRIx64"\n",
1762 __func__, subpage, len, addr, value);
1777 address_space_write(subpage->as, addr + subpage->base, buf, len);
1780 static bool subpage_accepts(void *opaque, hwaddr addr,
1781 unsigned len, bool is_write)
1783 subpage_t *subpage = opaque;
1784 #if defined(DEBUG_SUBPAGE)
1785 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1786 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1789 return address_space_access_valid(subpage->as, addr + subpage->base,
1793 static const MemoryRegionOps subpage_ops = {
1794 .read = subpage_read,
1795 .write = subpage_write,
1796 .valid.accepts = subpage_accepts,
1797 .endianness = DEVICE_NATIVE_ENDIAN,
1800 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1805 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1807 idx = SUBPAGE_IDX(start);
1808 eidx = SUBPAGE_IDX(end);
1809 #if defined(DEBUG_SUBPAGE)
1810 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1811 __func__, mmio, start, end, idx, eidx, section);
1813 for (; idx <= eidx; idx++) {
1814 mmio->sub_section[idx] = section;
1820 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1824 mmio = g_malloc0(sizeof(subpage_t));
1828 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1829 NULL, TARGET_PAGE_SIZE);
1830 mmio->iomem.subpage = true;
1831 #if defined(DEBUG_SUBPAGE)
1832 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1833 mmio, base, TARGET_PAGE_SIZE);
1835 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1840 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1844 MemoryRegionSection section = {
1845 .address_space = as,
1847 .offset_within_address_space = 0,
1848 .offset_within_region = 0,
1849 .size = int128_2_64(),
1852 return phys_section_add(map, §ion);
1855 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1857 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1860 static void io_mem_init(void)
1862 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
1863 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1865 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1867 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1871 static void mem_begin(MemoryListener *listener)
1873 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1874 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1877 n = dummy_section(&d->map, as, &io_mem_unassigned);
1878 assert(n == PHYS_SECTION_UNASSIGNED);
1879 n = dummy_section(&d->map, as, &io_mem_notdirty);
1880 assert(n == PHYS_SECTION_NOTDIRTY);
1881 n = dummy_section(&d->map, as, &io_mem_rom);
1882 assert(n == PHYS_SECTION_ROM);
1883 n = dummy_section(&d->map, as, &io_mem_watch);
1884 assert(n == PHYS_SECTION_WATCH);
1886 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1888 as->next_dispatch = d;
1891 static void mem_commit(MemoryListener *listener)
1893 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1894 AddressSpaceDispatch *cur = as->dispatch;
1895 AddressSpaceDispatch *next = as->next_dispatch;
1897 phys_page_compact_all(next, next->map.nodes_nb);
1899 as->dispatch = next;
1902 phys_sections_free(&cur->map);
1907 static void tcg_commit(MemoryListener *listener)
1911 /* since each CPU stores ram addresses in its TLB cache, we must
1912 reset the modified entries */
1915 /* FIXME: Disentangle the cpu.h circular files deps so we can
1916 directly get the right CPU from listener. */
1917 if (cpu->tcg_as_listener != listener) {
1924 static void core_log_global_start(MemoryListener *listener)
1926 cpu_physical_memory_set_dirty_tracking(true);
1929 static void core_log_global_stop(MemoryListener *listener)
1931 cpu_physical_memory_set_dirty_tracking(false);
1934 static MemoryListener core_memory_listener = {
1935 .log_global_start = core_log_global_start,
1936 .log_global_stop = core_log_global_stop,
1940 void address_space_init_dispatch(AddressSpace *as)
1942 as->dispatch = NULL;
1943 as->dispatch_listener = (MemoryListener) {
1945 .commit = mem_commit,
1946 .region_add = mem_add,
1947 .region_nop = mem_add,
1950 memory_listener_register(&as->dispatch_listener, as);
1953 void address_space_destroy_dispatch(AddressSpace *as)
1955 AddressSpaceDispatch *d = as->dispatch;
1957 memory_listener_unregister(&as->dispatch_listener);
1959 as->dispatch = NULL;
1962 static void memory_map_init(void)
1964 system_memory = g_malloc(sizeof(*system_memory));
1966 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1967 address_space_init(&address_space_memory, system_memory, "memory");
1969 system_io = g_malloc(sizeof(*system_io));
1970 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1972 address_space_init(&address_space_io, system_io, "I/O");
1974 memory_listener_register(&core_memory_listener, &address_space_memory);
1977 MemoryRegion *get_system_memory(void)
1979 return system_memory;
1982 MemoryRegion *get_system_io(void)
1987 #endif /* !defined(CONFIG_USER_ONLY) */
1989 /* physical memory access (slow version, mainly for debug) */
1990 #if defined(CONFIG_USER_ONLY)
1991 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1992 uint8_t *buf, int len, int is_write)
1999 page = addr & TARGET_PAGE_MASK;
2000 l = (page + TARGET_PAGE_SIZE) - addr;
2003 flags = page_get_flags(page);
2004 if (!(flags & PAGE_VALID))
2007 if (!(flags & PAGE_WRITE))
2009 /* XXX: this code should not depend on lock_user */
2010 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2013 unlock_user(p, addr, l);
2015 if (!(flags & PAGE_READ))
2017 /* XXX: this code should not depend on lock_user */
2018 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2021 unlock_user(p, addr, 0);
2032 static void invalidate_and_set_dirty(hwaddr addr,
2035 if (cpu_physical_memory_is_clean(addr)) {
2036 /* invalidate code */
2037 tb_invalidate_phys_page_range(addr, addr + length, 0);
2039 cpu_physical_memory_set_dirty_range_nocode(addr, length);
2041 xen_modified_memory(addr, length);
2044 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2046 unsigned access_size_max = mr->ops->valid.max_access_size;
2048 /* Regions are assumed to support 1-4 byte accesses unless
2049 otherwise specified. */
2050 if (access_size_max == 0) {
2051 access_size_max = 4;
2054 /* Bound the maximum access by the alignment of the address. */
2055 if (!mr->ops->impl.unaligned) {
2056 unsigned align_size_max = addr & -addr;
2057 if (align_size_max != 0 && align_size_max < access_size_max) {
2058 access_size_max = align_size_max;
2062 /* Don't attempt accesses larger than the maximum. */
2063 if (l > access_size_max) {
2064 l = access_size_max;
2067 l = 1 << (qemu_fls(l) - 1);
2073 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2074 int len, bool is_write)
2085 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2088 if (!memory_access_is_direct(mr, is_write)) {
2089 l = memory_access_size(mr, l, addr1);
2090 /* XXX: could force current_cpu to NULL to avoid
2094 /* 64 bit write access */
2096 error |= io_mem_write(mr, addr1, val, 8);
2099 /* 32 bit write access */
2101 error |= io_mem_write(mr, addr1, val, 4);
2104 /* 16 bit write access */
2106 error |= io_mem_write(mr, addr1, val, 2);
2109 /* 8 bit write access */
2111 error |= io_mem_write(mr, addr1, val, 1);
2117 addr1 += memory_region_get_ram_addr(mr);
2119 ptr = qemu_get_ram_ptr(addr1);
2120 memcpy(ptr, buf, l);
2121 invalidate_and_set_dirty(addr1, l);
2124 if (!memory_access_is_direct(mr, is_write)) {
2126 l = memory_access_size(mr, l, addr1);
2129 /* 64 bit read access */
2130 error |= io_mem_read(mr, addr1, &val, 8);
2134 /* 32 bit read access */
2135 error |= io_mem_read(mr, addr1, &val, 4);
2139 /* 16 bit read access */
2140 error |= io_mem_read(mr, addr1, &val, 2);
2144 /* 8 bit read access */
2145 error |= io_mem_read(mr, addr1, &val, 1);
2153 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2154 memcpy(buf, ptr, l);
2165 bool address_space_write(AddressSpace *as, hwaddr addr,
2166 const uint8_t *buf, int len)
2168 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2171 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2173 return address_space_rw(as, addr, buf, len, false);
2177 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2178 int len, int is_write)
2180 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2183 enum write_rom_type {
2188 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2189 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2198 mr = address_space_translate(as, addr, &addr1, &l, true);
2200 if (!(memory_region_is_ram(mr) ||
2201 memory_region_is_romd(mr))) {
2204 addr1 += memory_region_get_ram_addr(mr);
2206 ptr = qemu_get_ram_ptr(addr1);
2209 memcpy(ptr, buf, l);
2210 invalidate_and_set_dirty(addr1, l);
2213 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2223 /* used for ROM loading : can write in RAM and ROM */
2224 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2225 const uint8_t *buf, int len)
2227 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2230 void cpu_flush_icache_range(hwaddr start, int len)
2233 * This function should do the same thing as an icache flush that was
2234 * triggered from within the guest. For TCG we are always cache coherent,
2235 * so there is no need to flush anything. For KVM / Xen we need to flush
2236 * the host's instruction cache at least.
2238 if (tcg_enabled()) {
2242 cpu_physical_memory_write_rom_internal(&address_space_memory,
2243 start, NULL, len, FLUSH_CACHE);
2253 static BounceBuffer bounce;
2255 typedef struct MapClient {
2257 void (*callback)(void *opaque);
2258 QLIST_ENTRY(MapClient) link;
2261 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2262 = QLIST_HEAD_INITIALIZER(map_client_list);
2264 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2266 MapClient *client = g_malloc(sizeof(*client));
2268 client->opaque = opaque;
2269 client->callback = callback;
2270 QLIST_INSERT_HEAD(&map_client_list, client, link);
2274 static void cpu_unregister_map_client(void *_client)
2276 MapClient *client = (MapClient *)_client;
2278 QLIST_REMOVE(client, link);
2282 static void cpu_notify_map_clients(void)
2286 while (!QLIST_EMPTY(&map_client_list)) {
2287 client = QLIST_FIRST(&map_client_list);
2288 client->callback(client->opaque);
2289 cpu_unregister_map_client(client);
2293 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2300 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2301 if (!memory_access_is_direct(mr, is_write)) {
2302 l = memory_access_size(mr, l, addr);
2303 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2314 /* Map a physical memory region into a host virtual address.
2315 * May map a subset of the requested range, given by and returned in *plen.
2316 * May return NULL if resources needed to perform the mapping are exhausted.
2317 * Use only for reads OR writes - not for read-modify-write operations.
2318 * Use cpu_register_map_client() to know when retrying the map operation is
2319 * likely to succeed.
2321 void *address_space_map(AddressSpace *as,
2328 hwaddr l, xlat, base;
2329 MemoryRegion *mr, *this_mr;
2337 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2338 if (!memory_access_is_direct(mr, is_write)) {
2339 if (bounce.buffer) {
2342 /* Avoid unbounded allocations */
2343 l = MIN(l, TARGET_PAGE_SIZE);
2344 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2348 memory_region_ref(mr);
2351 address_space_read(as, addr, bounce.buffer, l);
2355 return bounce.buffer;
2359 raddr = memory_region_get_ram_addr(mr);
2370 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2371 if (this_mr != mr || xlat != base + done) {
2376 memory_region_ref(mr);
2378 return qemu_ram_ptr_length(raddr + base, plen);
2381 /* Unmaps a memory region previously mapped by address_space_map().
2382 * Will also mark the memory as dirty if is_write == 1. access_len gives
2383 * the amount of memory that was actually read or written by the caller.
2385 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2386 int is_write, hwaddr access_len)
2388 if (buffer != bounce.buffer) {
2392 mr = qemu_ram_addr_from_host(buffer, &addr1);
2395 invalidate_and_set_dirty(addr1, access_len);
2397 if (xen_enabled()) {
2398 xen_invalidate_map_cache_entry(buffer);
2400 memory_region_unref(mr);
2404 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2406 qemu_vfree(bounce.buffer);
2407 bounce.buffer = NULL;
2408 memory_region_unref(bounce.mr);
2409 cpu_notify_map_clients();
2412 void *cpu_physical_memory_map(hwaddr addr,
2416 return address_space_map(&address_space_memory, addr, plen, is_write);
2419 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2420 int is_write, hwaddr access_len)
2422 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2425 /* warning: addr must be aligned */
2426 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2427 enum device_endian endian)
2435 mr = address_space_translate(as, addr, &addr1, &l, false);
2436 if (l < 4 || !memory_access_is_direct(mr, false)) {
2438 io_mem_read(mr, addr1, &val, 4);
2439 #if defined(TARGET_WORDS_BIGENDIAN)
2440 if (endian == DEVICE_LITTLE_ENDIAN) {
2444 if (endian == DEVICE_BIG_ENDIAN) {
2450 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2454 case DEVICE_LITTLE_ENDIAN:
2455 val = ldl_le_p(ptr);
2457 case DEVICE_BIG_ENDIAN:
2458 val = ldl_be_p(ptr);
2468 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2470 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2473 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2475 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2478 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2480 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2483 /* warning: addr must be aligned */
2484 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2485 enum device_endian endian)
2493 mr = address_space_translate(as, addr, &addr1, &l,
2495 if (l < 8 || !memory_access_is_direct(mr, false)) {
2497 io_mem_read(mr, addr1, &val, 8);
2498 #if defined(TARGET_WORDS_BIGENDIAN)
2499 if (endian == DEVICE_LITTLE_ENDIAN) {
2503 if (endian == DEVICE_BIG_ENDIAN) {
2509 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2513 case DEVICE_LITTLE_ENDIAN:
2514 val = ldq_le_p(ptr);
2516 case DEVICE_BIG_ENDIAN:
2517 val = ldq_be_p(ptr);
2527 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2529 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2532 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2534 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2537 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2539 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2543 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2546 address_space_rw(as, addr, &val, 1, 0);
2550 /* warning: addr must be aligned */
2551 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2552 enum device_endian endian)
2560 mr = address_space_translate(as, addr, &addr1, &l,
2562 if (l < 2 || !memory_access_is_direct(mr, false)) {
2564 io_mem_read(mr, addr1, &val, 2);
2565 #if defined(TARGET_WORDS_BIGENDIAN)
2566 if (endian == DEVICE_LITTLE_ENDIAN) {
2570 if (endian == DEVICE_BIG_ENDIAN) {
2576 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2580 case DEVICE_LITTLE_ENDIAN:
2581 val = lduw_le_p(ptr);
2583 case DEVICE_BIG_ENDIAN:
2584 val = lduw_be_p(ptr);
2594 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2596 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2599 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2601 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2604 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2606 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2609 /* warning: addr must be aligned. The ram page is not masked as dirty
2610 and the code inside is not invalidated. It is useful if the dirty
2611 bits are used to track modified PTEs */
2612 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2619 mr = address_space_translate(as, addr, &addr1, &l,
2621 if (l < 4 || !memory_access_is_direct(mr, true)) {
2622 io_mem_write(mr, addr1, val, 4);
2624 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2625 ptr = qemu_get_ram_ptr(addr1);
2628 if (unlikely(in_migration)) {
2629 if (cpu_physical_memory_is_clean(addr1)) {
2630 /* invalidate code */
2631 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2633 cpu_physical_memory_set_dirty_range_nocode(addr1, 4);
2639 /* warning: addr must be aligned */
2640 static inline void stl_phys_internal(AddressSpace *as,
2641 hwaddr addr, uint32_t val,
2642 enum device_endian endian)
2649 mr = address_space_translate(as, addr, &addr1, &l,
2651 if (l < 4 || !memory_access_is_direct(mr, true)) {
2652 #if defined(TARGET_WORDS_BIGENDIAN)
2653 if (endian == DEVICE_LITTLE_ENDIAN) {
2657 if (endian == DEVICE_BIG_ENDIAN) {
2661 io_mem_write(mr, addr1, val, 4);
2664 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2665 ptr = qemu_get_ram_ptr(addr1);
2667 case DEVICE_LITTLE_ENDIAN:
2670 case DEVICE_BIG_ENDIAN:
2677 invalidate_and_set_dirty(addr1, 4);
2681 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2683 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2686 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2688 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2691 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2693 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2697 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2700 address_space_rw(as, addr, &v, 1, 1);
2703 /* warning: addr must be aligned */
2704 static inline void stw_phys_internal(AddressSpace *as,
2705 hwaddr addr, uint32_t val,
2706 enum device_endian endian)
2713 mr = address_space_translate(as, addr, &addr1, &l, true);
2714 if (l < 2 || !memory_access_is_direct(mr, true)) {
2715 #if defined(TARGET_WORDS_BIGENDIAN)
2716 if (endian == DEVICE_LITTLE_ENDIAN) {
2720 if (endian == DEVICE_BIG_ENDIAN) {
2724 io_mem_write(mr, addr1, val, 2);
2727 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2728 ptr = qemu_get_ram_ptr(addr1);
2730 case DEVICE_LITTLE_ENDIAN:
2733 case DEVICE_BIG_ENDIAN:
2740 invalidate_and_set_dirty(addr1, 2);
2744 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2746 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2749 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2751 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2754 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2756 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2760 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2763 address_space_rw(as, addr, (void *) &val, 8, 1);
2766 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2768 val = cpu_to_le64(val);
2769 address_space_rw(as, addr, (void *) &val, 8, 1);
2772 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2774 val = cpu_to_be64(val);
2775 address_space_rw(as, addr, (void *) &val, 8, 1);
2778 /* virtual memory access for debug (includes writing to ROM) */
2779 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2780 uint8_t *buf, int len, int is_write)
2787 page = addr & TARGET_PAGE_MASK;
2788 phys_addr = cpu_get_phys_page_debug(cpu, page);
2789 /* if no physical page mapped, return an error */
2790 if (phys_addr == -1)
2792 l = (page + TARGET_PAGE_SIZE) - addr;
2795 phys_addr += (addr & ~TARGET_PAGE_MASK);
2797 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2799 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2810 * A helper function for the _utterly broken_ virtio device model to find out if
2811 * it's running on a big endian machine. Don't do this at home kids!
2813 bool target_words_bigendian(void);
2814 bool target_words_bigendian(void)
2816 #if defined(TARGET_WORDS_BIGENDIAN)
2823 #ifndef CONFIG_USER_ONLY
2824 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2829 mr = address_space_translate(&address_space_memory,
2830 phys_addr, &phys_addr, &l, false);
2832 return !(memory_region_is_ram(mr) ||
2833 memory_region_is_romd(mr));
2836 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2840 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2841 func(block->host, block->offset, block->length, opaque);