4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "qemu/error-report.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
46 #include "exec/cpu-all.h"
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
51 #include "exec/memory-internal.h"
52 #include "exec/ram_addr.h"
53 #include "qemu/cache-utils.h"
55 #include "qemu/range.h"
57 //#define DEBUG_SUBPAGE
59 #if !defined(CONFIG_USER_ONLY)
60 static bool in_migration;
62 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
64 static MemoryRegion *system_memory;
65 static MemoryRegion *system_io;
67 AddressSpace address_space_io;
68 AddressSpace address_space_memory;
70 MemoryRegion io_mem_rom, io_mem_notdirty;
71 static MemoryRegion io_mem_unassigned;
73 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
74 #define RAM_PREALLOC (1 << 0)
78 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
79 /* current CPU in the current thread. It is only valid inside
81 DEFINE_TLS(CPUState *, current_cpu);
82 /* 0 = Do not count executed instructions.
83 1 = Precise instruction counting.
84 2 = Adaptive rate instruction counting. */
87 #if !defined(CONFIG_USER_ONLY)
89 typedef struct PhysPageEntry PhysPageEntry;
91 struct PhysPageEntry {
92 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
94 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
98 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
100 /* Size of the L2 (and L3, etc) page tables. */
101 #define ADDR_SPACE_BITS 64
104 #define P_L2_SIZE (1 << P_L2_BITS)
106 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
108 typedef PhysPageEntry Node[P_L2_SIZE];
110 typedef struct PhysPageMap {
111 unsigned sections_nb;
112 unsigned sections_nb_alloc;
114 unsigned nodes_nb_alloc;
116 MemoryRegionSection *sections;
119 struct AddressSpaceDispatch {
120 /* This is a multi-level map on the physical address space.
121 * The bottom level has pointers to MemoryRegionSections.
123 PhysPageEntry phys_map;
128 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
129 typedef struct subpage_t {
133 uint16_t sub_section[TARGET_PAGE_SIZE];
136 #define PHYS_SECTION_UNASSIGNED 0
137 #define PHYS_SECTION_NOTDIRTY 1
138 #define PHYS_SECTION_ROM 2
139 #define PHYS_SECTION_WATCH 3
141 static void io_mem_init(void);
142 static void memory_map_init(void);
143 static void tcg_commit(MemoryListener *listener);
145 static MemoryRegion io_mem_watch;
148 #if !defined(CONFIG_USER_ONLY)
150 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
152 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
153 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
154 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
155 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
159 static uint32_t phys_map_node_alloc(PhysPageMap *map)
164 ret = map->nodes_nb++;
165 assert(ret != PHYS_MAP_NODE_NIL);
166 assert(ret != map->nodes_nb_alloc);
167 for (i = 0; i < P_L2_SIZE; ++i) {
168 map->nodes[ret][i].skip = 1;
169 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
174 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
175 hwaddr *index, hwaddr *nb, uint16_t leaf,
180 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
182 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
183 lp->ptr = phys_map_node_alloc(map);
184 p = map->nodes[lp->ptr];
186 for (i = 0; i < P_L2_SIZE; i++) {
188 p[i].ptr = PHYS_SECTION_UNASSIGNED;
192 p = map->nodes[lp->ptr];
194 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
196 while (*nb && lp < &p[P_L2_SIZE]) {
197 if ((*index & (step - 1)) == 0 && *nb >= step) {
203 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
209 static void phys_page_set(AddressSpaceDispatch *d,
210 hwaddr index, hwaddr nb,
213 /* Wildly overreserve - it doesn't matter much. */
214 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
216 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
219 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
220 * and update our entry so we can skip it and go directly to the destination.
222 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
224 unsigned valid_ptr = P_L2_SIZE;
229 if (lp->ptr == PHYS_MAP_NODE_NIL) {
234 for (i = 0; i < P_L2_SIZE; i++) {
235 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
242 phys_page_compact(&p[i], nodes, compacted);
246 /* We can only compress if there's only one child. */
251 assert(valid_ptr < P_L2_SIZE);
253 /* Don't compress if it won't fit in the # of bits we have. */
254 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
258 lp->ptr = p[valid_ptr].ptr;
259 if (!p[valid_ptr].skip) {
260 /* If our only child is a leaf, make this a leaf. */
261 /* By design, we should have made this node a leaf to begin with so we
262 * should never reach here.
263 * But since it's so simple to handle this, let's do it just in case we
268 lp->skip += p[valid_ptr].skip;
272 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
274 DECLARE_BITMAP(compacted, nodes_nb);
276 if (d->phys_map.skip) {
277 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
281 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
282 Node *nodes, MemoryRegionSection *sections)
285 hwaddr index = addr >> TARGET_PAGE_BITS;
288 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
289 if (lp.ptr == PHYS_MAP_NODE_NIL) {
290 return §ions[PHYS_SECTION_UNASSIGNED];
293 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
296 if (sections[lp.ptr].size.hi ||
297 range_covers_byte(sections[lp.ptr].offset_within_address_space,
298 sections[lp.ptr].size.lo, addr)) {
299 return §ions[lp.ptr];
301 return §ions[PHYS_SECTION_UNASSIGNED];
305 bool memory_region_is_unassigned(MemoryRegion *mr)
307 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
308 && mr != &io_mem_watch;
311 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
313 bool resolve_subpage)
315 MemoryRegionSection *section;
318 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
319 if (resolve_subpage && section->mr->subpage) {
320 subpage = container_of(section->mr, subpage_t, iomem);
321 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
326 static MemoryRegionSection *
327 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
328 hwaddr *plen, bool resolve_subpage)
330 MemoryRegionSection *section;
333 section = address_space_lookup_region(d, addr, resolve_subpage);
334 /* Compute offset within MemoryRegionSection */
335 addr -= section->offset_within_address_space;
337 /* Compute offset within MemoryRegion */
338 *xlat = addr + section->offset_within_region;
340 diff = int128_sub(section->mr->size, int128_make64(addr));
341 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
345 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
347 if (memory_region_is_ram(mr)) {
348 return !(is_write && mr->readonly);
350 if (memory_region_is_romd(mr)) {
357 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
358 hwaddr *xlat, hwaddr *plen,
362 MemoryRegionSection *section;
367 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
370 if (!mr->iommu_ops) {
374 iotlb = mr->iommu_ops->translate(mr, addr);
375 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
376 | (addr & iotlb.addr_mask));
377 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
378 if (!(iotlb.perm & (1 << is_write))) {
379 mr = &io_mem_unassigned;
383 as = iotlb.target_as;
386 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
387 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
388 len = MIN(page, len);
396 MemoryRegionSection *
397 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
400 MemoryRegionSection *section;
401 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
403 assert(!section->mr->iommu_ops);
408 void cpu_exec_init_all(void)
410 #if !defined(CONFIG_USER_ONLY)
411 qemu_mutex_init(&ram_list.mutex);
417 #if !defined(CONFIG_USER_ONLY)
419 static int cpu_common_post_load(void *opaque, int version_id)
421 CPUState *cpu = opaque;
423 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
424 version_id is increased. */
425 cpu->interrupt_request &= ~0x01;
431 const VMStateDescription vmstate_cpu_common = {
432 .name = "cpu_common",
434 .minimum_version_id = 1,
435 .post_load = cpu_common_post_load,
436 .fields = (VMStateField[]) {
437 VMSTATE_UINT32(halted, CPUState),
438 VMSTATE_UINT32(interrupt_request, CPUState),
439 VMSTATE_END_OF_LIST()
445 CPUState *qemu_get_cpu(int index)
450 if (cpu->cpu_index == index) {
458 #if !defined(CONFIG_USER_ONLY)
459 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
461 /* We only support one address space per cpu at the moment. */
462 assert(cpu->as == as);
464 if (cpu->tcg_as_listener) {
465 memory_listener_unregister(cpu->tcg_as_listener);
467 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
469 cpu->tcg_as_listener->commit = tcg_commit;
470 memory_listener_register(cpu->tcg_as_listener, as);
474 void cpu_exec_init(CPUArchState *env)
476 CPUState *cpu = ENV_GET_CPU(env);
477 CPUClass *cc = CPU_GET_CLASS(cpu);
481 #if defined(CONFIG_USER_ONLY)
485 CPU_FOREACH(some_cpu) {
488 cpu->cpu_index = cpu_index;
490 QTAILQ_INIT(&cpu->breakpoints);
491 QTAILQ_INIT(&cpu->watchpoints);
492 #ifndef CONFIG_USER_ONLY
493 cpu->as = &address_space_memory;
494 cpu->thread_id = qemu_get_thread_id();
496 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
497 #if defined(CONFIG_USER_ONLY)
500 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
501 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
503 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
504 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
505 cpu_save, cpu_load, env);
506 assert(cc->vmsd == NULL);
507 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
509 if (cc->vmsd != NULL) {
510 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
514 #if defined(TARGET_HAS_ICE)
515 #if defined(CONFIG_USER_ONLY)
516 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
518 tb_invalidate_phys_page_range(pc, pc + 1, 0);
521 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
523 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
525 tb_invalidate_phys_addr(cpu->as,
526 phys | (pc & ~TARGET_PAGE_MASK));
530 #endif /* TARGET_HAS_ICE */
532 #if defined(CONFIG_USER_ONLY)
533 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
538 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
539 int flags, CPUWatchpoint **watchpoint)
544 /* Add a watchpoint. */
545 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
546 int flags, CPUWatchpoint **watchpoint)
548 vaddr len_mask = ~(len - 1);
551 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
552 if ((len & (len - 1)) || (addr & ~len_mask) ||
553 len == 0 || len > TARGET_PAGE_SIZE) {
554 error_report("tried to set invalid watchpoint at %"
555 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
558 wp = g_malloc(sizeof(*wp));
561 wp->len_mask = len_mask;
564 /* keep all GDB-injected watchpoints in front */
565 if (flags & BP_GDB) {
566 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
568 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
571 tlb_flush_page(cpu, addr);
578 /* Remove a specific watchpoint. */
579 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
582 vaddr len_mask = ~(len - 1);
585 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
586 if (addr == wp->vaddr && len_mask == wp->len_mask
587 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
588 cpu_watchpoint_remove_by_ref(cpu, wp);
595 /* Remove a specific watchpoint by reference. */
596 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
598 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
600 tlb_flush_page(cpu, watchpoint->vaddr);
605 /* Remove all matching watchpoints. */
606 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
608 CPUWatchpoint *wp, *next;
610 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
611 if (wp->flags & mask) {
612 cpu_watchpoint_remove_by_ref(cpu, wp);
618 /* Add a breakpoint. */
619 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
620 CPUBreakpoint **breakpoint)
622 #if defined(TARGET_HAS_ICE)
625 bp = g_malloc(sizeof(*bp));
630 /* keep all GDB-injected breakpoints in front */
631 if (flags & BP_GDB) {
632 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
634 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
637 breakpoint_invalidate(cpu, pc);
648 /* Remove a specific breakpoint. */
649 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
651 #if defined(TARGET_HAS_ICE)
654 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
655 if (bp->pc == pc && bp->flags == flags) {
656 cpu_breakpoint_remove_by_ref(cpu, bp);
666 /* Remove a specific breakpoint by reference. */
667 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
669 #if defined(TARGET_HAS_ICE)
670 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
672 breakpoint_invalidate(cpu, breakpoint->pc);
678 /* Remove all matching breakpoints. */
679 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
681 #if defined(TARGET_HAS_ICE)
682 CPUBreakpoint *bp, *next;
684 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
685 if (bp->flags & mask) {
686 cpu_breakpoint_remove_by_ref(cpu, bp);
692 /* enable or disable single step mode. EXCP_DEBUG is returned by the
693 CPU loop after each instruction */
694 void cpu_single_step(CPUState *cpu, int enabled)
696 #if defined(TARGET_HAS_ICE)
697 if (cpu->singlestep_enabled != enabled) {
698 cpu->singlestep_enabled = enabled;
700 kvm_update_guest_debug(cpu, 0);
702 /* must flush all the translated code to avoid inconsistencies */
703 /* XXX: only flush what is necessary */
704 CPUArchState *env = cpu->env_ptr;
711 void cpu_abort(CPUState *cpu, const char *fmt, ...)
718 fprintf(stderr, "qemu: fatal: ");
719 vfprintf(stderr, fmt, ap);
720 fprintf(stderr, "\n");
721 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
722 if (qemu_log_enabled()) {
723 qemu_log("qemu: fatal: ");
724 qemu_log_vprintf(fmt, ap2);
726 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
732 #if defined(CONFIG_USER_ONLY)
734 struct sigaction act;
735 sigfillset(&act.sa_mask);
736 act.sa_handler = SIG_DFL;
737 sigaction(SIGABRT, &act, NULL);
743 #if !defined(CONFIG_USER_ONLY)
744 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
748 /* The list is protected by the iothread lock here. */
749 block = ram_list.mru_block;
750 if (block && addr - block->offset < block->length) {
753 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
754 if (addr - block->offset < block->length) {
759 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
763 ram_list.mru_block = block;
767 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
773 end = TARGET_PAGE_ALIGN(start + length);
774 start &= TARGET_PAGE_MASK;
776 block = qemu_get_ram_block(start);
777 assert(block == qemu_get_ram_block(end - 1));
778 start1 = (uintptr_t)block->host + (start - block->offset);
779 cpu_tlb_reset_dirty_all(start1, length);
782 /* Note: start and end must be within the same ram block. */
783 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
788 cpu_physical_memory_clear_dirty_range(start, length, client);
791 tlb_reset_dirty_range_all(start, length);
795 static void cpu_physical_memory_set_dirty_tracking(bool enable)
797 in_migration = enable;
800 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
801 MemoryRegionSection *section,
803 hwaddr paddr, hwaddr xlat,
805 target_ulong *address)
810 if (memory_region_is_ram(section->mr)) {
812 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
814 if (!section->readonly) {
815 iotlb |= PHYS_SECTION_NOTDIRTY;
817 iotlb |= PHYS_SECTION_ROM;
820 iotlb = section - section->address_space->dispatch->map.sections;
824 /* Make accesses to pages with watchpoints go via the
825 watchpoint trap routines. */
826 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
827 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
828 /* Avoid trapping reads of pages with a write breakpoint. */
829 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
830 iotlb = PHYS_SECTION_WATCH + paddr;
831 *address |= TLB_MMIO;
839 #endif /* defined(CONFIG_USER_ONLY) */
841 #if !defined(CONFIG_USER_ONLY)
843 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
845 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
847 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
850 * Set a custom physical guest memory alloator.
851 * Accelerators with unusual needs may need this. Hopefully, we can
852 * get rid of it eventually.
854 void phys_mem_set_alloc(void *(*alloc)(size_t))
856 phys_mem_alloc = alloc;
859 static uint16_t phys_section_add(PhysPageMap *map,
860 MemoryRegionSection *section)
862 /* The physical section number is ORed with a page-aligned
863 * pointer to produce the iotlb entries. Thus it should
864 * never overflow into the page-aligned value.
866 assert(map->sections_nb < TARGET_PAGE_SIZE);
868 if (map->sections_nb == map->sections_nb_alloc) {
869 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
870 map->sections = g_renew(MemoryRegionSection, map->sections,
871 map->sections_nb_alloc);
873 map->sections[map->sections_nb] = *section;
874 memory_region_ref(section->mr);
875 return map->sections_nb++;
878 static void phys_section_destroy(MemoryRegion *mr)
880 memory_region_unref(mr);
883 subpage_t *subpage = container_of(mr, subpage_t, iomem);
884 memory_region_destroy(&subpage->iomem);
889 static void phys_sections_free(PhysPageMap *map)
891 while (map->sections_nb > 0) {
892 MemoryRegionSection *section = &map->sections[--map->sections_nb];
893 phys_section_destroy(section->mr);
895 g_free(map->sections);
899 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
902 hwaddr base = section->offset_within_address_space
904 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
905 d->map.nodes, d->map.sections);
906 MemoryRegionSection subsection = {
907 .offset_within_address_space = base,
908 .size = int128_make64(TARGET_PAGE_SIZE),
912 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
914 if (!(existing->mr->subpage)) {
915 subpage = subpage_init(d->as, base);
916 subsection.address_space = d->as;
917 subsection.mr = &subpage->iomem;
918 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
919 phys_section_add(&d->map, &subsection));
921 subpage = container_of(existing->mr, subpage_t, iomem);
923 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
924 end = start + int128_get64(section->size) - 1;
925 subpage_register(subpage, start, end,
926 phys_section_add(&d->map, section));
930 static void register_multipage(AddressSpaceDispatch *d,
931 MemoryRegionSection *section)
933 hwaddr start_addr = section->offset_within_address_space;
934 uint16_t section_index = phys_section_add(&d->map, section);
935 uint64_t num_pages = int128_get64(int128_rshift(section->size,
939 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
942 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
944 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
945 AddressSpaceDispatch *d = as->next_dispatch;
946 MemoryRegionSection now = *section, remain = *section;
947 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
949 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
950 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
951 - now.offset_within_address_space;
953 now.size = int128_min(int128_make64(left), now.size);
954 register_subpage(d, &now);
956 now.size = int128_zero();
958 while (int128_ne(remain.size, now.size)) {
959 remain.size = int128_sub(remain.size, now.size);
960 remain.offset_within_address_space += int128_get64(now.size);
961 remain.offset_within_region += int128_get64(now.size);
963 if (int128_lt(remain.size, page_size)) {
964 register_subpage(d, &now);
965 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
966 now.size = page_size;
967 register_subpage(d, &now);
969 now.size = int128_and(now.size, int128_neg(page_size));
970 register_multipage(d, &now);
975 void qemu_flush_coalesced_mmio_buffer(void)
978 kvm_flush_coalesced_mmio_buffer();
981 void qemu_mutex_lock_ramlist(void)
983 qemu_mutex_lock(&ram_list.mutex);
986 void qemu_mutex_unlock_ramlist(void)
988 qemu_mutex_unlock(&ram_list.mutex);
995 #define HUGETLBFS_MAGIC 0x958458f6
997 static long gethugepagesize(const char *path)
1003 ret = statfs(path, &fs);
1004 } while (ret != 0 && errno == EINTR);
1011 if (fs.f_type != HUGETLBFS_MAGIC)
1012 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1017 static void *file_ram_alloc(RAMBlock *block,
1022 char *sanitized_name;
1026 unsigned long hpagesize;
1028 hpagesize = gethugepagesize(path);
1033 if (memory < hpagesize) {
1037 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1038 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1042 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1043 sanitized_name = g_strdup(block->mr->name);
1044 for (c = sanitized_name; *c != '\0'; c++) {
1049 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1051 g_free(sanitized_name);
1053 fd = mkstemp(filename);
1055 perror("unable to create backing store for hugepages");
1062 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1065 * ftruncate is not supported by hugetlbfs in older
1066 * hosts, so don't bother bailing out on errors.
1067 * If anything goes wrong with it under other filesystems,
1070 if (ftruncate(fd, memory))
1071 perror("ftruncate");
1073 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1074 if (area == MAP_FAILED) {
1075 perror("file_ram_alloc: can't mmap RAM pages");
1081 os_mem_prealloc(fd, area, memory);
1095 static ram_addr_t find_ram_offset(ram_addr_t size)
1097 RAMBlock *block, *next_block;
1098 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1100 assert(size != 0); /* it would hand out same offset multiple times */
1102 if (QTAILQ_EMPTY(&ram_list.blocks))
1105 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1106 ram_addr_t end, next = RAM_ADDR_MAX;
1108 end = block->offset + block->length;
1110 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1111 if (next_block->offset >= end) {
1112 next = MIN(next, next_block->offset);
1115 if (next - end >= size && next - end < mingap) {
1117 mingap = next - end;
1121 if (offset == RAM_ADDR_MAX) {
1122 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1130 ram_addr_t last_ram_offset(void)
1133 ram_addr_t last = 0;
1135 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1136 last = MAX(last, block->offset + block->length);
1141 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1145 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1146 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1147 "dump-guest-core", true)) {
1148 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1150 perror("qemu_madvise");
1151 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1152 "but dump_guest_core=off specified\n");
1157 static RAMBlock *find_ram_block(ram_addr_t addr)
1161 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1162 if (block->offset == addr) {
1170 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1172 RAMBlock *new_block = find_ram_block(addr);
1176 assert(!new_block->idstr[0]);
1179 char *id = qdev_get_dev_path(dev);
1181 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1185 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1187 /* This assumes the iothread lock is taken here too. */
1188 qemu_mutex_lock_ramlist();
1189 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1190 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1191 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1196 qemu_mutex_unlock_ramlist();
1199 void qemu_ram_unset_idstr(ram_addr_t addr)
1201 RAMBlock *block = find_ram_block(addr);
1204 memset(block->idstr, 0, sizeof(block->idstr));
1208 static int memory_try_enable_merging(void *addr, size_t len)
1210 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1211 /* disabled by the user */
1215 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1218 static ram_addr_t ram_block_add(RAMBlock *new_block)
1221 ram_addr_t old_ram_size, new_ram_size;
1223 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1225 /* This assumes the iothread lock is taken here too. */
1226 qemu_mutex_lock_ramlist();
1227 new_block->offset = find_ram_offset(new_block->length);
1229 if (!new_block->host) {
1230 if (xen_enabled()) {
1231 xen_ram_alloc(new_block->offset, new_block->length, new_block->mr);
1233 new_block->host = phys_mem_alloc(new_block->length);
1234 if (!new_block->host) {
1235 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1236 new_block->mr->name, strerror(errno));
1239 memory_try_enable_merging(new_block->host, new_block->length);
1243 /* Keep the list sorted from biggest to smallest block. */
1244 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1245 if (block->length < new_block->length) {
1250 QTAILQ_INSERT_BEFORE(block, new_block, next);
1252 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1254 ram_list.mru_block = NULL;
1257 qemu_mutex_unlock_ramlist();
1259 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1261 if (new_ram_size > old_ram_size) {
1263 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1264 ram_list.dirty_memory[i] =
1265 bitmap_zero_extend(ram_list.dirty_memory[i],
1266 old_ram_size, new_ram_size);
1269 cpu_physical_memory_set_dirty_range(new_block->offset, new_block->length);
1271 qemu_ram_setup_dump(new_block->host, new_block->length);
1272 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_HUGEPAGE);
1273 qemu_madvise(new_block->host, new_block->length, QEMU_MADV_DONTFORK);
1275 if (kvm_enabled()) {
1276 kvm_setup_guest_memory(new_block->host, new_block->length);
1279 return new_block->offset;
1283 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1284 const char *mem_path)
1286 RAMBlock *new_block;
1288 if (xen_enabled()) {
1289 fprintf(stderr, "-mem-path not supported with Xen\n");
1293 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1295 * file_ram_alloc() needs to allocate just like
1296 * phys_mem_alloc, but we haven't bothered to provide
1300 "-mem-path not supported with this accelerator\n");
1304 size = TARGET_PAGE_ALIGN(size);
1305 new_block = g_malloc0(sizeof(*new_block));
1307 new_block->length = size;
1308 new_block->host = file_ram_alloc(new_block, size, mem_path);
1309 return ram_block_add(new_block);
1313 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1316 RAMBlock *new_block;
1318 size = TARGET_PAGE_ALIGN(size);
1319 new_block = g_malloc0(sizeof(*new_block));
1321 new_block->length = size;
1323 new_block->host = host;
1325 new_block->flags |= RAM_PREALLOC;
1327 return ram_block_add(new_block);
1330 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1332 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1335 void qemu_ram_free_from_ptr(ram_addr_t addr)
1339 /* This assumes the iothread lock is taken here too. */
1340 qemu_mutex_lock_ramlist();
1341 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1342 if (addr == block->offset) {
1343 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1344 ram_list.mru_block = NULL;
1350 qemu_mutex_unlock_ramlist();
1353 void qemu_ram_free(ram_addr_t addr)
1357 /* This assumes the iothread lock is taken here too. */
1358 qemu_mutex_lock_ramlist();
1359 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1360 if (addr == block->offset) {
1361 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1362 ram_list.mru_block = NULL;
1364 if (block->flags & RAM_PREALLOC) {
1366 } else if (xen_enabled()) {
1367 xen_invalidate_map_cache_entry(block->host);
1369 } else if (block->fd >= 0) {
1370 munmap(block->host, block->length);
1374 qemu_anon_ram_free(block->host, block->length);
1380 qemu_mutex_unlock_ramlist();
1385 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1392 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1393 offset = addr - block->offset;
1394 if (offset < block->length) {
1395 vaddr = block->host + offset;
1396 if (block->flags & RAM_PREALLOC) {
1398 } else if (xen_enabled()) {
1402 munmap(vaddr, length);
1403 if (block->fd >= 0) {
1405 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1408 flags |= MAP_PRIVATE;
1410 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1411 flags, block->fd, offset);
1414 * Remap needs to match alloc. Accelerators that
1415 * set phys_mem_alloc never remap. If they did,
1416 * we'd need a remap hook here.
1418 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1420 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1421 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1424 if (area != vaddr) {
1425 fprintf(stderr, "Could not remap addr: "
1426 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1430 memory_try_enable_merging(vaddr, length);
1431 qemu_ram_setup_dump(vaddr, length);
1437 #endif /* !_WIN32 */
1439 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1440 With the exception of the softmmu code in this file, this should
1441 only be used for local memory (e.g. video ram) that the device owns,
1442 and knows it isn't going to access beyond the end of the block.
1444 It should not be used for general purpose DMA.
1445 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1447 void *qemu_get_ram_ptr(ram_addr_t addr)
1449 RAMBlock *block = qemu_get_ram_block(addr);
1451 if (xen_enabled()) {
1452 /* We need to check if the requested address is in the RAM
1453 * because we don't want to map the entire memory in QEMU.
1454 * In that case just map until the end of the page.
1456 if (block->offset == 0) {
1457 return xen_map_cache(addr, 0, 0);
1458 } else if (block->host == NULL) {
1460 xen_map_cache(block->offset, block->length, 1);
1463 return block->host + (addr - block->offset);
1466 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1467 * but takes a size argument */
1468 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1473 if (xen_enabled()) {
1474 return xen_map_cache(addr, *size, 1);
1478 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1479 if (addr - block->offset < block->length) {
1480 if (addr - block->offset + *size > block->length)
1481 *size = block->length - addr + block->offset;
1482 return block->host + (addr - block->offset);
1486 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1491 /* Some of the softmmu routines need to translate from a host pointer
1492 (typically a TLB entry) back to a ram offset. */
1493 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1496 uint8_t *host = ptr;
1498 if (xen_enabled()) {
1499 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1500 return qemu_get_ram_block(*ram_addr)->mr;
1503 block = ram_list.mru_block;
1504 if (block && block->host && host - block->host < block->length) {
1508 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1509 /* This case append when the block is not mapped. */
1510 if (block->host == NULL) {
1513 if (host - block->host < block->length) {
1521 *ram_addr = block->offset + (host - block->host);
1525 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1526 uint64_t val, unsigned size)
1528 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1529 tb_invalidate_phys_page_fast(ram_addr, size);
1533 stb_p(qemu_get_ram_ptr(ram_addr), val);
1536 stw_p(qemu_get_ram_ptr(ram_addr), val);
1539 stl_p(qemu_get_ram_ptr(ram_addr), val);
1544 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1545 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1546 /* we remove the notdirty callback only if the code has been
1548 if (!cpu_physical_memory_is_clean(ram_addr)) {
1549 CPUArchState *env = current_cpu->env_ptr;
1550 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1554 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1555 unsigned size, bool is_write)
1560 static const MemoryRegionOps notdirty_mem_ops = {
1561 .write = notdirty_mem_write,
1562 .valid.accepts = notdirty_mem_accepts,
1563 .endianness = DEVICE_NATIVE_ENDIAN,
1566 /* Generate a debug exception if a watchpoint has been hit. */
1567 static void check_watchpoint(int offset, int len_mask, int flags)
1569 CPUState *cpu = current_cpu;
1570 CPUArchState *env = cpu->env_ptr;
1571 target_ulong pc, cs_base;
1576 if (cpu->watchpoint_hit) {
1577 /* We re-entered the check after replacing the TB. Now raise
1578 * the debug interrupt so that is will trigger after the
1579 * current instruction. */
1580 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1583 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1584 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1585 if ((vaddr == (wp->vaddr & len_mask) ||
1586 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1587 wp->flags |= BP_WATCHPOINT_HIT;
1588 if (!cpu->watchpoint_hit) {
1589 cpu->watchpoint_hit = wp;
1590 tb_check_watchpoint(cpu);
1591 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1592 cpu->exception_index = EXCP_DEBUG;
1595 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1596 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1597 cpu_resume_from_signal(cpu, NULL);
1601 wp->flags &= ~BP_WATCHPOINT_HIT;
1606 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1607 so these check for a hit then pass through to the normal out-of-line
1609 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1612 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1614 case 1: return ldub_phys(&address_space_memory, addr);
1615 case 2: return lduw_phys(&address_space_memory, addr);
1616 case 4: return ldl_phys(&address_space_memory, addr);
1621 static void watch_mem_write(void *opaque, hwaddr addr,
1622 uint64_t val, unsigned size)
1624 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1627 stb_phys(&address_space_memory, addr, val);
1630 stw_phys(&address_space_memory, addr, val);
1633 stl_phys(&address_space_memory, addr, val);
1639 static const MemoryRegionOps watch_mem_ops = {
1640 .read = watch_mem_read,
1641 .write = watch_mem_write,
1642 .endianness = DEVICE_NATIVE_ENDIAN,
1645 static uint64_t subpage_read(void *opaque, hwaddr addr,
1648 subpage_t *subpage = opaque;
1651 #if defined(DEBUG_SUBPAGE)
1652 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1653 subpage, len, addr);
1655 address_space_read(subpage->as, addr + subpage->base, buf, len);
1668 static void subpage_write(void *opaque, hwaddr addr,
1669 uint64_t value, unsigned len)
1671 subpage_t *subpage = opaque;
1674 #if defined(DEBUG_SUBPAGE)
1675 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1676 " value %"PRIx64"\n",
1677 __func__, subpage, len, addr, value);
1692 address_space_write(subpage->as, addr + subpage->base, buf, len);
1695 static bool subpage_accepts(void *opaque, hwaddr addr,
1696 unsigned len, bool is_write)
1698 subpage_t *subpage = opaque;
1699 #if defined(DEBUG_SUBPAGE)
1700 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1701 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1704 return address_space_access_valid(subpage->as, addr + subpage->base,
1708 static const MemoryRegionOps subpage_ops = {
1709 .read = subpage_read,
1710 .write = subpage_write,
1711 .valid.accepts = subpage_accepts,
1712 .endianness = DEVICE_NATIVE_ENDIAN,
1715 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1720 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1722 idx = SUBPAGE_IDX(start);
1723 eidx = SUBPAGE_IDX(end);
1724 #if defined(DEBUG_SUBPAGE)
1725 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1726 __func__, mmio, start, end, idx, eidx, section);
1728 for (; idx <= eidx; idx++) {
1729 mmio->sub_section[idx] = section;
1735 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1739 mmio = g_malloc0(sizeof(subpage_t));
1743 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1744 "subpage", TARGET_PAGE_SIZE);
1745 mmio->iomem.subpage = true;
1746 #if defined(DEBUG_SUBPAGE)
1747 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1748 mmio, base, TARGET_PAGE_SIZE);
1750 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1755 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
1759 MemoryRegionSection section = {
1760 .address_space = as,
1762 .offset_within_address_space = 0,
1763 .offset_within_region = 0,
1764 .size = int128_2_64(),
1767 return phys_section_add(map, §ion);
1770 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1772 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1775 static void io_mem_init(void)
1777 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1778 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1779 "unassigned", UINT64_MAX);
1780 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1781 "notdirty", UINT64_MAX);
1782 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1783 "watch", UINT64_MAX);
1786 static void mem_begin(MemoryListener *listener)
1788 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1789 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1792 n = dummy_section(&d->map, as, &io_mem_unassigned);
1793 assert(n == PHYS_SECTION_UNASSIGNED);
1794 n = dummy_section(&d->map, as, &io_mem_notdirty);
1795 assert(n == PHYS_SECTION_NOTDIRTY);
1796 n = dummy_section(&d->map, as, &io_mem_rom);
1797 assert(n == PHYS_SECTION_ROM);
1798 n = dummy_section(&d->map, as, &io_mem_watch);
1799 assert(n == PHYS_SECTION_WATCH);
1801 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1803 as->next_dispatch = d;
1806 static void mem_commit(MemoryListener *listener)
1808 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1809 AddressSpaceDispatch *cur = as->dispatch;
1810 AddressSpaceDispatch *next = as->next_dispatch;
1812 phys_page_compact_all(next, next->map.nodes_nb);
1814 as->dispatch = next;
1817 phys_sections_free(&cur->map);
1822 static void tcg_commit(MemoryListener *listener)
1826 /* since each CPU stores ram addresses in its TLB cache, we must
1827 reset the modified entries */
1830 /* FIXME: Disentangle the cpu.h circular files deps so we can
1831 directly get the right CPU from listener. */
1832 if (cpu->tcg_as_listener != listener) {
1839 static void core_log_global_start(MemoryListener *listener)
1841 cpu_physical_memory_set_dirty_tracking(true);
1844 static void core_log_global_stop(MemoryListener *listener)
1846 cpu_physical_memory_set_dirty_tracking(false);
1849 static MemoryListener core_memory_listener = {
1850 .log_global_start = core_log_global_start,
1851 .log_global_stop = core_log_global_stop,
1855 void address_space_init_dispatch(AddressSpace *as)
1857 as->dispatch = NULL;
1858 as->dispatch_listener = (MemoryListener) {
1860 .commit = mem_commit,
1861 .region_add = mem_add,
1862 .region_nop = mem_add,
1865 memory_listener_register(&as->dispatch_listener, as);
1868 void address_space_destroy_dispatch(AddressSpace *as)
1870 AddressSpaceDispatch *d = as->dispatch;
1872 memory_listener_unregister(&as->dispatch_listener);
1874 as->dispatch = NULL;
1877 static void memory_map_init(void)
1879 system_memory = g_malloc(sizeof(*system_memory));
1881 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1882 address_space_init(&address_space_memory, system_memory, "memory");
1884 system_io = g_malloc(sizeof(*system_io));
1885 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1887 address_space_init(&address_space_io, system_io, "I/O");
1889 memory_listener_register(&core_memory_listener, &address_space_memory);
1892 MemoryRegion *get_system_memory(void)
1894 return system_memory;
1897 MemoryRegion *get_system_io(void)
1902 #endif /* !defined(CONFIG_USER_ONLY) */
1904 /* physical memory access (slow version, mainly for debug) */
1905 #if defined(CONFIG_USER_ONLY)
1906 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1907 uint8_t *buf, int len, int is_write)
1914 page = addr & TARGET_PAGE_MASK;
1915 l = (page + TARGET_PAGE_SIZE) - addr;
1918 flags = page_get_flags(page);
1919 if (!(flags & PAGE_VALID))
1922 if (!(flags & PAGE_WRITE))
1924 /* XXX: this code should not depend on lock_user */
1925 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1928 unlock_user(p, addr, l);
1930 if (!(flags & PAGE_READ))
1932 /* XXX: this code should not depend on lock_user */
1933 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1936 unlock_user(p, addr, 0);
1947 static void invalidate_and_set_dirty(hwaddr addr,
1950 if (cpu_physical_memory_is_clean(addr)) {
1951 /* invalidate code */
1952 tb_invalidate_phys_page_range(addr, addr + length, 0);
1954 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1955 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1957 xen_modified_memory(addr, length);
1960 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1962 unsigned access_size_max = mr->ops->valid.max_access_size;
1964 /* Regions are assumed to support 1-4 byte accesses unless
1965 otherwise specified. */
1966 if (access_size_max == 0) {
1967 access_size_max = 4;
1970 /* Bound the maximum access by the alignment of the address. */
1971 if (!mr->ops->impl.unaligned) {
1972 unsigned align_size_max = addr & -addr;
1973 if (align_size_max != 0 && align_size_max < access_size_max) {
1974 access_size_max = align_size_max;
1978 /* Don't attempt accesses larger than the maximum. */
1979 if (l > access_size_max) {
1980 l = access_size_max;
1983 l = 1 << (qemu_fls(l) - 1);
1989 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1990 int len, bool is_write)
2001 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2004 if (!memory_access_is_direct(mr, is_write)) {
2005 l = memory_access_size(mr, l, addr1);
2006 /* XXX: could force current_cpu to NULL to avoid
2010 /* 64 bit write access */
2012 error |= io_mem_write(mr, addr1, val, 8);
2015 /* 32 bit write access */
2017 error |= io_mem_write(mr, addr1, val, 4);
2020 /* 16 bit write access */
2022 error |= io_mem_write(mr, addr1, val, 2);
2025 /* 8 bit write access */
2027 error |= io_mem_write(mr, addr1, val, 1);
2033 addr1 += memory_region_get_ram_addr(mr);
2035 ptr = qemu_get_ram_ptr(addr1);
2036 memcpy(ptr, buf, l);
2037 invalidate_and_set_dirty(addr1, l);
2040 if (!memory_access_is_direct(mr, is_write)) {
2042 l = memory_access_size(mr, l, addr1);
2045 /* 64 bit read access */
2046 error |= io_mem_read(mr, addr1, &val, 8);
2050 /* 32 bit read access */
2051 error |= io_mem_read(mr, addr1, &val, 4);
2055 /* 16 bit read access */
2056 error |= io_mem_read(mr, addr1, &val, 2);
2060 /* 8 bit read access */
2061 error |= io_mem_read(mr, addr1, &val, 1);
2069 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2070 memcpy(buf, ptr, l);
2081 bool address_space_write(AddressSpace *as, hwaddr addr,
2082 const uint8_t *buf, int len)
2084 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2087 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2089 return address_space_rw(as, addr, buf, len, false);
2093 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2094 int len, int is_write)
2096 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2099 enum write_rom_type {
2104 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2105 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2114 mr = address_space_translate(as, addr, &addr1, &l, true);
2116 if (!(memory_region_is_ram(mr) ||
2117 memory_region_is_romd(mr))) {
2120 addr1 += memory_region_get_ram_addr(mr);
2122 ptr = qemu_get_ram_ptr(addr1);
2125 memcpy(ptr, buf, l);
2126 invalidate_and_set_dirty(addr1, l);
2129 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2139 /* used for ROM loading : can write in RAM and ROM */
2140 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2141 const uint8_t *buf, int len)
2143 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2146 void cpu_flush_icache_range(hwaddr start, int len)
2149 * This function should do the same thing as an icache flush that was
2150 * triggered from within the guest. For TCG we are always cache coherent,
2151 * so there is no need to flush anything. For KVM / Xen we need to flush
2152 * the host's instruction cache at least.
2154 if (tcg_enabled()) {
2158 cpu_physical_memory_write_rom_internal(&address_space_memory,
2159 start, NULL, len, FLUSH_CACHE);
2169 static BounceBuffer bounce;
2171 typedef struct MapClient {
2173 void (*callback)(void *opaque);
2174 QLIST_ENTRY(MapClient) link;
2177 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2178 = QLIST_HEAD_INITIALIZER(map_client_list);
2180 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2182 MapClient *client = g_malloc(sizeof(*client));
2184 client->opaque = opaque;
2185 client->callback = callback;
2186 QLIST_INSERT_HEAD(&map_client_list, client, link);
2190 static void cpu_unregister_map_client(void *_client)
2192 MapClient *client = (MapClient *)_client;
2194 QLIST_REMOVE(client, link);
2198 static void cpu_notify_map_clients(void)
2202 while (!QLIST_EMPTY(&map_client_list)) {
2203 client = QLIST_FIRST(&map_client_list);
2204 client->callback(client->opaque);
2205 cpu_unregister_map_client(client);
2209 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2216 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2217 if (!memory_access_is_direct(mr, is_write)) {
2218 l = memory_access_size(mr, l, addr);
2219 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2230 /* Map a physical memory region into a host virtual address.
2231 * May map a subset of the requested range, given by and returned in *plen.
2232 * May return NULL if resources needed to perform the mapping are exhausted.
2233 * Use only for reads OR writes - not for read-modify-write operations.
2234 * Use cpu_register_map_client() to know when retrying the map operation is
2235 * likely to succeed.
2237 void *address_space_map(AddressSpace *as,
2244 hwaddr l, xlat, base;
2245 MemoryRegion *mr, *this_mr;
2253 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2254 if (!memory_access_is_direct(mr, is_write)) {
2255 if (bounce.buffer) {
2258 /* Avoid unbounded allocations */
2259 l = MIN(l, TARGET_PAGE_SIZE);
2260 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2264 memory_region_ref(mr);
2267 address_space_read(as, addr, bounce.buffer, l);
2271 return bounce.buffer;
2275 raddr = memory_region_get_ram_addr(mr);
2286 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2287 if (this_mr != mr || xlat != base + done) {
2292 memory_region_ref(mr);
2294 return qemu_ram_ptr_length(raddr + base, plen);
2297 /* Unmaps a memory region previously mapped by address_space_map().
2298 * Will also mark the memory as dirty if is_write == 1. access_len gives
2299 * the amount of memory that was actually read or written by the caller.
2301 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2302 int is_write, hwaddr access_len)
2304 if (buffer != bounce.buffer) {
2308 mr = qemu_ram_addr_from_host(buffer, &addr1);
2311 while (access_len) {
2313 l = TARGET_PAGE_SIZE;
2316 invalidate_and_set_dirty(addr1, l);
2321 if (xen_enabled()) {
2322 xen_invalidate_map_cache_entry(buffer);
2324 memory_region_unref(mr);
2328 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2330 qemu_vfree(bounce.buffer);
2331 bounce.buffer = NULL;
2332 memory_region_unref(bounce.mr);
2333 cpu_notify_map_clients();
2336 void *cpu_physical_memory_map(hwaddr addr,
2340 return address_space_map(&address_space_memory, addr, plen, is_write);
2343 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2344 int is_write, hwaddr access_len)
2346 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2349 /* warning: addr must be aligned */
2350 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2351 enum device_endian endian)
2359 mr = address_space_translate(as, addr, &addr1, &l, false);
2360 if (l < 4 || !memory_access_is_direct(mr, false)) {
2362 io_mem_read(mr, addr1, &val, 4);
2363 #if defined(TARGET_WORDS_BIGENDIAN)
2364 if (endian == DEVICE_LITTLE_ENDIAN) {
2368 if (endian == DEVICE_BIG_ENDIAN) {
2374 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2378 case DEVICE_LITTLE_ENDIAN:
2379 val = ldl_le_p(ptr);
2381 case DEVICE_BIG_ENDIAN:
2382 val = ldl_be_p(ptr);
2392 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2394 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2397 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2399 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2402 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2404 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2407 /* warning: addr must be aligned */
2408 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2409 enum device_endian endian)
2417 mr = address_space_translate(as, addr, &addr1, &l,
2419 if (l < 8 || !memory_access_is_direct(mr, false)) {
2421 io_mem_read(mr, addr1, &val, 8);
2422 #if defined(TARGET_WORDS_BIGENDIAN)
2423 if (endian == DEVICE_LITTLE_ENDIAN) {
2427 if (endian == DEVICE_BIG_ENDIAN) {
2433 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2437 case DEVICE_LITTLE_ENDIAN:
2438 val = ldq_le_p(ptr);
2440 case DEVICE_BIG_ENDIAN:
2441 val = ldq_be_p(ptr);
2451 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2453 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2456 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2458 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2461 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2463 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2467 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2470 address_space_rw(as, addr, &val, 1, 0);
2474 /* warning: addr must be aligned */
2475 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2476 enum device_endian endian)
2484 mr = address_space_translate(as, addr, &addr1, &l,
2486 if (l < 2 || !memory_access_is_direct(mr, false)) {
2488 io_mem_read(mr, addr1, &val, 2);
2489 #if defined(TARGET_WORDS_BIGENDIAN)
2490 if (endian == DEVICE_LITTLE_ENDIAN) {
2494 if (endian == DEVICE_BIG_ENDIAN) {
2500 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2504 case DEVICE_LITTLE_ENDIAN:
2505 val = lduw_le_p(ptr);
2507 case DEVICE_BIG_ENDIAN:
2508 val = lduw_be_p(ptr);
2518 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2520 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2523 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2525 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2528 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2530 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2533 /* warning: addr must be aligned. The ram page is not masked as dirty
2534 and the code inside is not invalidated. It is useful if the dirty
2535 bits are used to track modified PTEs */
2536 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2543 mr = address_space_translate(as, addr, &addr1, &l,
2545 if (l < 4 || !memory_access_is_direct(mr, true)) {
2546 io_mem_write(mr, addr1, val, 4);
2548 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2549 ptr = qemu_get_ram_ptr(addr1);
2552 if (unlikely(in_migration)) {
2553 if (cpu_physical_memory_is_clean(addr1)) {
2554 /* invalidate code */
2555 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2557 cpu_physical_memory_set_dirty_flag(addr1,
2558 DIRTY_MEMORY_MIGRATION);
2559 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2565 /* warning: addr must be aligned */
2566 static inline void stl_phys_internal(AddressSpace *as,
2567 hwaddr addr, uint32_t val,
2568 enum device_endian endian)
2575 mr = address_space_translate(as, addr, &addr1, &l,
2577 if (l < 4 || !memory_access_is_direct(mr, true)) {
2578 #if defined(TARGET_WORDS_BIGENDIAN)
2579 if (endian == DEVICE_LITTLE_ENDIAN) {
2583 if (endian == DEVICE_BIG_ENDIAN) {
2587 io_mem_write(mr, addr1, val, 4);
2590 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2591 ptr = qemu_get_ram_ptr(addr1);
2593 case DEVICE_LITTLE_ENDIAN:
2596 case DEVICE_BIG_ENDIAN:
2603 invalidate_and_set_dirty(addr1, 4);
2607 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2609 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2612 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2614 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2617 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2619 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2623 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2626 address_space_rw(as, addr, &v, 1, 1);
2629 /* warning: addr must be aligned */
2630 static inline void stw_phys_internal(AddressSpace *as,
2631 hwaddr addr, uint32_t val,
2632 enum device_endian endian)
2639 mr = address_space_translate(as, addr, &addr1, &l, true);
2640 if (l < 2 || !memory_access_is_direct(mr, true)) {
2641 #if defined(TARGET_WORDS_BIGENDIAN)
2642 if (endian == DEVICE_LITTLE_ENDIAN) {
2646 if (endian == DEVICE_BIG_ENDIAN) {
2650 io_mem_write(mr, addr1, val, 2);
2653 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2654 ptr = qemu_get_ram_ptr(addr1);
2656 case DEVICE_LITTLE_ENDIAN:
2659 case DEVICE_BIG_ENDIAN:
2666 invalidate_and_set_dirty(addr1, 2);
2670 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2672 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2675 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2677 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2680 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2682 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2686 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2689 address_space_rw(as, addr, (void *) &val, 8, 1);
2692 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2694 val = cpu_to_le64(val);
2695 address_space_rw(as, addr, (void *) &val, 8, 1);
2698 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2700 val = cpu_to_be64(val);
2701 address_space_rw(as, addr, (void *) &val, 8, 1);
2704 /* virtual memory access for debug (includes writing to ROM) */
2705 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2706 uint8_t *buf, int len, int is_write)
2713 page = addr & TARGET_PAGE_MASK;
2714 phys_addr = cpu_get_phys_page_debug(cpu, page);
2715 /* if no physical page mapped, return an error */
2716 if (phys_addr == -1)
2718 l = (page + TARGET_PAGE_SIZE) - addr;
2721 phys_addr += (addr & ~TARGET_PAGE_MASK);
2723 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2725 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2735 #if !defined(CONFIG_USER_ONLY)
2738 * A helper function for the _utterly broken_ virtio device model to find out if
2739 * it's running on a big endian machine. Don't do this at home kids!
2741 bool virtio_is_big_endian(void);
2742 bool virtio_is_big_endian(void)
2744 #if defined(TARGET_WORDS_BIGENDIAN)
2753 #ifndef CONFIG_USER_ONLY
2754 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2759 mr = address_space_translate(&address_space_memory,
2760 phys_addr, &phys_addr, &l, false);
2762 return !(memory_region_is_ram(mr) ||
2763 memory_region_is_romd(mr));
2766 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2770 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2771 func(block->host, block->offset, block->length, opaque);