4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
30 #include "qemu/osdep.h"
31 #include "sysemu/kvm.h"
32 #include "sysemu/sysemu.h"
33 #include "hw/xen/xen.h"
34 #include "qemu/timer.h"
35 #include "qemu/config-file.h"
36 #include "exec/memory.h"
37 #include "sysemu/dma.h"
38 #include "exec/address-spaces.h"
39 #if defined(CONFIG_USER_ONLY)
41 #else /* !CONFIG_USER_ONLY */
42 #include "sysemu/xen-mapcache.h"
45 #include "exec/cpu-all.h"
47 #include "exec/cputlb.h"
48 #include "translate-all.h"
50 #include "exec/memory-internal.h"
51 #include "exec/ram_addr.h"
52 #include "qemu/cache-utils.h"
54 #include "qemu/range.h"
56 //#define DEBUG_SUBPAGE
58 #if !defined(CONFIG_USER_ONLY)
59 static bool in_migration;
61 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
63 static MemoryRegion *system_memory;
64 static MemoryRegion *system_io;
66 AddressSpace address_space_io;
67 AddressSpace address_space_memory;
69 MemoryRegion io_mem_rom, io_mem_notdirty;
70 static MemoryRegion io_mem_unassigned;
74 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
75 /* current CPU in the current thread. It is only valid inside
77 DEFINE_TLS(CPUState *, current_cpu);
78 /* 0 = Do not count executed instructions.
79 1 = Precise instruction counting.
80 2 = Adaptive rate instruction counting. */
83 #if !defined(CONFIG_USER_ONLY)
85 typedef struct PhysPageEntry PhysPageEntry;
87 struct PhysPageEntry {
88 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
90 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
94 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
96 /* Size of the L2 (and L3, etc) page tables. */
97 #define ADDR_SPACE_BITS 64
100 #define P_L2_SIZE (1 << P_L2_BITS)
102 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
104 typedef PhysPageEntry Node[P_L2_SIZE];
106 typedef struct PhysPageMap {
107 unsigned sections_nb;
108 unsigned sections_nb_alloc;
110 unsigned nodes_nb_alloc;
112 MemoryRegionSection *sections;
115 struct AddressSpaceDispatch {
116 /* This is a multi-level map on the physical address space.
117 * The bottom level has pointers to MemoryRegionSections.
119 PhysPageEntry phys_map;
124 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
125 typedef struct subpage_t {
129 uint16_t sub_section[TARGET_PAGE_SIZE];
132 #define PHYS_SECTION_UNASSIGNED 0
133 #define PHYS_SECTION_NOTDIRTY 1
134 #define PHYS_SECTION_ROM 2
135 #define PHYS_SECTION_WATCH 3
137 static void io_mem_init(void);
138 static void memory_map_init(void);
139 static void tcg_commit(MemoryListener *listener);
141 static MemoryRegion io_mem_watch;
144 #if !defined(CONFIG_USER_ONLY)
146 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
148 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
149 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
150 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
151 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
155 static uint32_t phys_map_node_alloc(PhysPageMap *map)
160 ret = map->nodes_nb++;
161 assert(ret != PHYS_MAP_NODE_NIL);
162 assert(ret != map->nodes_nb_alloc);
163 for (i = 0; i < P_L2_SIZE; ++i) {
164 map->nodes[ret][i].skip = 1;
165 map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
170 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
171 hwaddr *index, hwaddr *nb, uint16_t leaf,
176 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
178 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
179 lp->ptr = phys_map_node_alloc(map);
180 p = map->nodes[lp->ptr];
182 for (i = 0; i < P_L2_SIZE; i++) {
184 p[i].ptr = PHYS_SECTION_UNASSIGNED;
188 p = map->nodes[lp->ptr];
190 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
192 while (*nb && lp < &p[P_L2_SIZE]) {
193 if ((*index & (step - 1)) == 0 && *nb >= step) {
199 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
205 static void phys_page_set(AddressSpaceDispatch *d,
206 hwaddr index, hwaddr nb,
209 /* Wildly overreserve - it doesn't matter much. */
210 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
212 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
215 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
216 * and update our entry so we can skip it and go directly to the destination.
218 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
220 unsigned valid_ptr = P_L2_SIZE;
225 if (lp->ptr == PHYS_MAP_NODE_NIL) {
230 for (i = 0; i < P_L2_SIZE; i++) {
231 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
238 phys_page_compact(&p[i], nodes, compacted);
242 /* We can only compress if there's only one child. */
247 assert(valid_ptr < P_L2_SIZE);
249 /* Don't compress if it won't fit in the # of bits we have. */
250 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
254 lp->ptr = p[valid_ptr].ptr;
255 if (!p[valid_ptr].skip) {
256 /* If our only child is a leaf, make this a leaf. */
257 /* By design, we should have made this node a leaf to begin with so we
258 * should never reach here.
259 * But since it's so simple to handle this, let's do it just in case we
264 lp->skip += p[valid_ptr].skip;
268 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
270 DECLARE_BITMAP(compacted, nodes_nb);
272 if (d->phys_map.skip) {
273 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
277 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
278 Node *nodes, MemoryRegionSection *sections)
281 hwaddr index = addr >> TARGET_PAGE_BITS;
284 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
285 if (lp.ptr == PHYS_MAP_NODE_NIL) {
286 return §ions[PHYS_SECTION_UNASSIGNED];
289 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
292 if (sections[lp.ptr].size.hi ||
293 range_covers_byte(sections[lp.ptr].offset_within_address_space,
294 sections[lp.ptr].size.lo, addr)) {
295 return §ions[lp.ptr];
297 return §ions[PHYS_SECTION_UNASSIGNED];
301 bool memory_region_is_unassigned(MemoryRegion *mr)
303 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
304 && mr != &io_mem_watch;
307 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
309 bool resolve_subpage)
311 MemoryRegionSection *section;
314 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
315 if (resolve_subpage && section->mr->subpage) {
316 subpage = container_of(section->mr, subpage_t, iomem);
317 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
322 static MemoryRegionSection *
323 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
324 hwaddr *plen, bool resolve_subpage)
326 MemoryRegionSection *section;
329 section = address_space_lookup_region(d, addr, resolve_subpage);
330 /* Compute offset within MemoryRegionSection */
331 addr -= section->offset_within_address_space;
333 /* Compute offset within MemoryRegion */
334 *xlat = addr + section->offset_within_region;
336 diff = int128_sub(section->mr->size, int128_make64(addr));
337 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
341 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
343 if (memory_region_is_ram(mr)) {
344 return !(is_write && mr->readonly);
346 if (memory_region_is_romd(mr)) {
353 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
354 hwaddr *xlat, hwaddr *plen,
358 MemoryRegionSection *section;
363 section = address_space_translate_internal(as->dispatch, addr, &addr, plen, true);
366 if (!mr->iommu_ops) {
370 iotlb = mr->iommu_ops->translate(mr, addr);
371 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
372 | (addr & iotlb.addr_mask));
373 len = MIN(len, (addr | iotlb.addr_mask) - addr + 1);
374 if (!(iotlb.perm & (1 << is_write))) {
375 mr = &io_mem_unassigned;
379 as = iotlb.target_as;
382 if (memory_access_is_direct(mr, is_write)) {
383 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
384 len = MIN(page, len);
392 MemoryRegionSection *
393 address_space_translate_for_iotlb(AddressSpace *as, hwaddr addr, hwaddr *xlat,
396 MemoryRegionSection *section;
397 section = address_space_translate_internal(as->dispatch, addr, xlat, plen, false);
399 assert(!section->mr->iommu_ops);
404 void cpu_exec_init_all(void)
406 #if !defined(CONFIG_USER_ONLY)
407 qemu_mutex_init(&ram_list.mutex);
413 #if !defined(CONFIG_USER_ONLY)
415 static int cpu_common_post_load(void *opaque, int version_id)
417 CPUState *cpu = opaque;
419 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
420 version_id is increased. */
421 cpu->interrupt_request &= ~0x01;
422 tlb_flush(cpu->env_ptr, 1);
427 const VMStateDescription vmstate_cpu_common = {
428 .name = "cpu_common",
430 .minimum_version_id = 1,
431 .minimum_version_id_old = 1,
432 .post_load = cpu_common_post_load,
433 .fields = (VMStateField []) {
434 VMSTATE_UINT32(halted, CPUState),
435 VMSTATE_UINT32(interrupt_request, CPUState),
436 VMSTATE_END_OF_LIST()
442 CPUState *qemu_get_cpu(int index)
447 if (cpu->cpu_index == index) {
455 #if !defined(CONFIG_USER_ONLY)
456 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
458 /* We only support one address space per cpu at the moment. */
459 assert(cpu->as == as);
461 if (cpu->tcg_as_listener) {
462 memory_listener_unregister(cpu->tcg_as_listener);
464 cpu->tcg_as_listener = g_new0(MemoryListener, 1);
466 cpu->tcg_as_listener->commit = tcg_commit;
467 memory_listener_register(cpu->tcg_as_listener, as);
471 void cpu_exec_init(CPUArchState *env)
473 CPUState *cpu = ENV_GET_CPU(env);
474 CPUClass *cc = CPU_GET_CLASS(cpu);
478 #if defined(CONFIG_USER_ONLY)
482 CPU_FOREACH(some_cpu) {
485 cpu->cpu_index = cpu_index;
487 QTAILQ_INIT(&cpu->breakpoints);
488 QTAILQ_INIT(&cpu->watchpoints);
489 #ifndef CONFIG_USER_ONLY
490 cpu->as = &address_space_memory;
491 cpu->thread_id = qemu_get_thread_id();
493 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
494 #if defined(CONFIG_USER_ONLY)
497 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
498 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
500 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
501 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
502 cpu_save, cpu_load, env);
503 assert(cc->vmsd == NULL);
504 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
506 if (cc->vmsd != NULL) {
507 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
511 #if defined(TARGET_HAS_ICE)
512 #if defined(CONFIG_USER_ONLY)
513 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
515 tb_invalidate_phys_page_range(pc, pc + 1, 0);
518 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
520 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
522 tb_invalidate_phys_addr(cpu->as,
523 phys | (pc & ~TARGET_PAGE_MASK));
527 #endif /* TARGET_HAS_ICE */
529 #if defined(CONFIG_USER_ONLY)
530 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
535 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
536 int flags, CPUWatchpoint **watchpoint)
541 /* Add a watchpoint. */
542 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
543 int flags, CPUWatchpoint **watchpoint)
545 CPUState *cpu = ENV_GET_CPU(env);
546 target_ulong len_mask = ~(len - 1);
549 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
550 if ((len & (len - 1)) || (addr & ~len_mask) ||
551 len == 0 || len > TARGET_PAGE_SIZE) {
552 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
553 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
556 wp = g_malloc(sizeof(*wp));
559 wp->len_mask = len_mask;
562 /* keep all GDB-injected watchpoints in front */
563 if (flags & BP_GDB) {
564 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
566 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
569 tlb_flush_page(env, addr);
576 /* Remove a specific watchpoint. */
577 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
580 CPUState *cpu = ENV_GET_CPU(env);
581 target_ulong len_mask = ~(len - 1);
584 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
585 if (addr == wp->vaddr && len_mask == wp->len_mask
586 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
587 cpu_watchpoint_remove_by_ref(env, wp);
594 /* Remove a specific watchpoint by reference. */
595 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
597 CPUState *cpu = ENV_GET_CPU(env);
599 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
601 tlb_flush_page(env, watchpoint->vaddr);
606 /* Remove all matching watchpoints. */
607 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
609 CPUState *cpu = ENV_GET_CPU(env);
610 CPUWatchpoint *wp, *next;
612 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
613 if (wp->flags & mask)
614 cpu_watchpoint_remove_by_ref(env, wp);
619 /* Add a breakpoint. */
620 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
621 CPUBreakpoint **breakpoint)
623 #if defined(TARGET_HAS_ICE)
624 CPUState *cpu = ENV_GET_CPU(env);
627 bp = g_malloc(sizeof(*bp));
632 /* keep all GDB-injected breakpoints in front */
633 if (flags & BP_GDB) {
634 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
636 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
639 breakpoint_invalidate(cpu, pc);
650 /* Remove a specific breakpoint. */
651 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
653 #if defined(TARGET_HAS_ICE)
654 CPUState *cpu = ENV_GET_CPU(env);
657 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
658 if (bp->pc == pc && bp->flags == flags) {
659 cpu_breakpoint_remove_by_ref(env, bp);
669 /* Remove a specific breakpoint by reference. */
670 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
672 #if defined(TARGET_HAS_ICE)
673 CPUState *cpu = ENV_GET_CPU(env);
675 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
677 breakpoint_invalidate(cpu, breakpoint->pc);
683 /* Remove all matching breakpoints. */
684 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
686 #if defined(TARGET_HAS_ICE)
687 CPUState *cpu = ENV_GET_CPU(env);
688 CPUBreakpoint *bp, *next;
690 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
691 if (bp->flags & mask)
692 cpu_breakpoint_remove_by_ref(env, bp);
697 /* enable or disable single step mode. EXCP_DEBUG is returned by the
698 CPU loop after each instruction */
699 void cpu_single_step(CPUState *cpu, int enabled)
701 #if defined(TARGET_HAS_ICE)
702 if (cpu->singlestep_enabled != enabled) {
703 cpu->singlestep_enabled = enabled;
705 kvm_update_guest_debug(cpu, 0);
707 /* must flush all the translated code to avoid inconsistencies */
708 /* XXX: only flush what is necessary */
709 CPUArchState *env = cpu->env_ptr;
716 void cpu_abort(CPUArchState *env, const char *fmt, ...)
718 CPUState *cpu = ENV_GET_CPU(env);
724 fprintf(stderr, "qemu: fatal: ");
725 vfprintf(stderr, fmt, ap);
726 fprintf(stderr, "\n");
727 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
728 if (qemu_log_enabled()) {
729 qemu_log("qemu: fatal: ");
730 qemu_log_vprintf(fmt, ap2);
732 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
738 #if defined(CONFIG_USER_ONLY)
740 struct sigaction act;
741 sigfillset(&act.sa_mask);
742 act.sa_handler = SIG_DFL;
743 sigaction(SIGABRT, &act, NULL);
749 #if !defined(CONFIG_USER_ONLY)
750 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
754 /* The list is protected by the iothread lock here. */
755 block = ram_list.mru_block;
756 if (block && addr - block->offset < block->length) {
759 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
760 if (addr - block->offset < block->length) {
765 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
769 ram_list.mru_block = block;
773 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
779 end = TARGET_PAGE_ALIGN(start + length);
780 start &= TARGET_PAGE_MASK;
782 block = qemu_get_ram_block(start);
783 assert(block == qemu_get_ram_block(end - 1));
784 start1 = (uintptr_t)block->host + (start - block->offset);
785 cpu_tlb_reset_dirty_all(start1, length);
788 /* Note: start and end must be within the same ram block. */
789 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
794 cpu_physical_memory_clear_dirty_range(start, length, client);
797 tlb_reset_dirty_range_all(start, length);
801 static void cpu_physical_memory_set_dirty_tracking(bool enable)
803 in_migration = enable;
806 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
807 MemoryRegionSection *section,
809 hwaddr paddr, hwaddr xlat,
811 target_ulong *address)
813 CPUState *cpu = ENV_GET_CPU(env);
817 if (memory_region_is_ram(section->mr)) {
819 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
821 if (!section->readonly) {
822 iotlb |= PHYS_SECTION_NOTDIRTY;
824 iotlb |= PHYS_SECTION_ROM;
827 iotlb = section - section->address_space->dispatch->map.sections;
831 /* Make accesses to pages with watchpoints go via the
832 watchpoint trap routines. */
833 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
834 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
835 /* Avoid trapping reads of pages with a write breakpoint. */
836 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
837 iotlb = PHYS_SECTION_WATCH + paddr;
838 *address |= TLB_MMIO;
846 #endif /* defined(CONFIG_USER_ONLY) */
848 #if !defined(CONFIG_USER_ONLY)
850 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
852 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
854 static void *(*phys_mem_alloc)(size_t size) = qemu_anon_ram_alloc;
857 * Set a custom physical guest memory alloator.
858 * Accelerators with unusual needs may need this. Hopefully, we can
859 * get rid of it eventually.
861 void phys_mem_set_alloc(void *(*alloc)(size_t))
863 phys_mem_alloc = alloc;
866 static uint16_t phys_section_add(PhysPageMap *map,
867 MemoryRegionSection *section)
869 /* The physical section number is ORed with a page-aligned
870 * pointer to produce the iotlb entries. Thus it should
871 * never overflow into the page-aligned value.
873 assert(map->sections_nb < TARGET_PAGE_SIZE);
875 if (map->sections_nb == map->sections_nb_alloc) {
876 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
877 map->sections = g_renew(MemoryRegionSection, map->sections,
878 map->sections_nb_alloc);
880 map->sections[map->sections_nb] = *section;
881 memory_region_ref(section->mr);
882 return map->sections_nb++;
885 static void phys_section_destroy(MemoryRegion *mr)
887 memory_region_unref(mr);
890 subpage_t *subpage = container_of(mr, subpage_t, iomem);
891 memory_region_destroy(&subpage->iomem);
896 static void phys_sections_free(PhysPageMap *map)
898 while (map->sections_nb > 0) {
899 MemoryRegionSection *section = &map->sections[--map->sections_nb];
900 phys_section_destroy(section->mr);
902 g_free(map->sections);
906 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
909 hwaddr base = section->offset_within_address_space
911 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
912 d->map.nodes, d->map.sections);
913 MemoryRegionSection subsection = {
914 .offset_within_address_space = base,
915 .size = int128_make64(TARGET_PAGE_SIZE),
919 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
921 if (!(existing->mr->subpage)) {
922 subpage = subpage_init(d->as, base);
923 subsection.address_space = d->as;
924 subsection.mr = &subpage->iomem;
925 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
926 phys_section_add(&d->map, &subsection));
928 subpage = container_of(existing->mr, subpage_t, iomem);
930 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
931 end = start + int128_get64(section->size) - 1;
932 subpage_register(subpage, start, end,
933 phys_section_add(&d->map, section));
937 static void register_multipage(AddressSpaceDispatch *d,
938 MemoryRegionSection *section)
940 hwaddr start_addr = section->offset_within_address_space;
941 uint16_t section_index = phys_section_add(&d->map, section);
942 uint64_t num_pages = int128_get64(int128_rshift(section->size,
946 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
949 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
951 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
952 AddressSpaceDispatch *d = as->next_dispatch;
953 MemoryRegionSection now = *section, remain = *section;
954 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
956 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
957 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
958 - now.offset_within_address_space;
960 now.size = int128_min(int128_make64(left), now.size);
961 register_subpage(d, &now);
963 now.size = int128_zero();
965 while (int128_ne(remain.size, now.size)) {
966 remain.size = int128_sub(remain.size, now.size);
967 remain.offset_within_address_space += int128_get64(now.size);
968 remain.offset_within_region += int128_get64(now.size);
970 if (int128_lt(remain.size, page_size)) {
971 register_subpage(d, &now);
972 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
973 now.size = page_size;
974 register_subpage(d, &now);
976 now.size = int128_and(now.size, int128_neg(page_size));
977 register_multipage(d, &now);
982 void qemu_flush_coalesced_mmio_buffer(void)
985 kvm_flush_coalesced_mmio_buffer();
988 void qemu_mutex_lock_ramlist(void)
990 qemu_mutex_lock(&ram_list.mutex);
993 void qemu_mutex_unlock_ramlist(void)
995 qemu_mutex_unlock(&ram_list.mutex);
1000 #include <sys/vfs.h>
1002 #define HUGETLBFS_MAGIC 0x958458f6
1004 static long gethugepagesize(const char *path)
1010 ret = statfs(path, &fs);
1011 } while (ret != 0 && errno == EINTR);
1018 if (fs.f_type != HUGETLBFS_MAGIC)
1019 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1024 static sigjmp_buf sigjump;
1026 static void sigbus_handler(int signal)
1028 siglongjmp(sigjump, 1);
1031 static void *file_ram_alloc(RAMBlock *block,
1036 char *sanitized_name;
1040 unsigned long hpagesize;
1042 hpagesize = gethugepagesize(path);
1047 if (memory < hpagesize) {
1051 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1052 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
1056 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1057 sanitized_name = g_strdup(block->mr->name);
1058 for (c = sanitized_name; *c != '\0'; c++) {
1063 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1065 g_free(sanitized_name);
1067 fd = mkstemp(filename);
1069 perror("unable to create backing store for hugepages");
1076 memory = (memory+hpagesize-1) & ~(hpagesize-1);
1079 * ftruncate is not supported by hugetlbfs in older
1080 * hosts, so don't bother bailing out on errors.
1081 * If anything goes wrong with it under other filesystems,
1084 if (ftruncate(fd, memory))
1085 perror("ftruncate");
1087 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
1088 if (area == MAP_FAILED) {
1089 perror("file_ram_alloc: can't mmap RAM pages");
1096 struct sigaction act, oldact;
1097 sigset_t set, oldset;
1099 memset(&act, 0, sizeof(act));
1100 act.sa_handler = &sigbus_handler;
1103 ret = sigaction(SIGBUS, &act, &oldact);
1105 perror("file_ram_alloc: failed to install signal handler");
1109 /* unblock SIGBUS */
1111 sigaddset(&set, SIGBUS);
1112 pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
1114 if (sigsetjmp(sigjump, 1)) {
1115 fprintf(stderr, "file_ram_alloc: failed to preallocate pages\n");
1119 /* MAP_POPULATE silently ignores failures */
1120 for (i = 0; i < (memory/hpagesize); i++) {
1121 memset(area + (hpagesize*i), 0, 1);
1124 ret = sigaction(SIGBUS, &oldact, NULL);
1126 perror("file_ram_alloc: failed to reinstall signal handler");
1130 pthread_sigmask(SIG_SETMASK, &oldset, NULL);
1143 static void *file_ram_alloc(RAMBlock *block,
1147 fprintf(stderr, "-mem-path not supported on this host\n");
1152 static ram_addr_t find_ram_offset(ram_addr_t size)
1154 RAMBlock *block, *next_block;
1155 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1157 assert(size != 0); /* it would hand out same offset multiple times */
1159 if (QTAILQ_EMPTY(&ram_list.blocks))
1162 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1163 ram_addr_t end, next = RAM_ADDR_MAX;
1165 end = block->offset + block->length;
1167 QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
1168 if (next_block->offset >= end) {
1169 next = MIN(next, next_block->offset);
1172 if (next - end >= size && next - end < mingap) {
1174 mingap = next - end;
1178 if (offset == RAM_ADDR_MAX) {
1179 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1187 ram_addr_t last_ram_offset(void)
1190 ram_addr_t last = 0;
1192 QTAILQ_FOREACH(block, &ram_list.blocks, next)
1193 last = MAX(last, block->offset + block->length);
1198 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1202 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1203 if (!qemu_opt_get_bool(qemu_get_machine_opts(),
1204 "dump-guest-core", true)) {
1205 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1207 perror("qemu_madvise");
1208 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1209 "but dump_guest_core=off specified\n");
1214 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1216 RAMBlock *new_block, *block;
1219 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1220 if (block->offset == addr) {
1226 assert(!new_block->idstr[0]);
1229 char *id = qdev_get_dev_path(dev);
1231 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1235 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1237 /* This assumes the iothread lock is taken here too. */
1238 qemu_mutex_lock_ramlist();
1239 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1240 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1241 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1246 qemu_mutex_unlock_ramlist();
1249 static int memory_try_enable_merging(void *addr, size_t len)
1251 if (!qemu_opt_get_bool(qemu_get_machine_opts(), "mem-merge", true)) {
1252 /* disabled by the user */
1256 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1259 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1262 RAMBlock *block, *new_block;
1263 ram_addr_t old_ram_size, new_ram_size;
1265 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1267 size = TARGET_PAGE_ALIGN(size);
1268 new_block = g_malloc0(sizeof(*new_block));
1271 /* This assumes the iothread lock is taken here too. */
1272 qemu_mutex_lock_ramlist();
1274 new_block->offset = find_ram_offset(size);
1276 new_block->host = host;
1277 new_block->flags |= RAM_PREALLOC_MASK;
1278 } else if (xen_enabled()) {
1280 fprintf(stderr, "-mem-path not supported with Xen\n");
1283 xen_ram_alloc(new_block->offset, size, mr);
1286 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1288 * file_ram_alloc() needs to allocate just like
1289 * phys_mem_alloc, but we haven't bothered to provide
1293 "-mem-path not supported with this accelerator\n");
1296 new_block->host = file_ram_alloc(new_block, size, mem_path);
1298 if (!new_block->host) {
1299 new_block->host = phys_mem_alloc(size);
1300 if (!new_block->host) {
1301 fprintf(stderr, "Cannot set up guest memory '%s': %s\n",
1302 new_block->mr->name, strerror(errno));
1305 memory_try_enable_merging(new_block->host, size);
1308 new_block->length = size;
1310 /* Keep the list sorted from biggest to smallest block. */
1311 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1312 if (block->length < new_block->length) {
1317 QTAILQ_INSERT_BEFORE(block, new_block, next);
1319 QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1321 ram_list.mru_block = NULL;
1324 qemu_mutex_unlock_ramlist();
1326 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1328 if (new_ram_size > old_ram_size) {
1330 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1331 ram_list.dirty_memory[i] =
1332 bitmap_zero_extend(ram_list.dirty_memory[i],
1333 old_ram_size, new_ram_size);
1336 cpu_physical_memory_set_dirty_range(new_block->offset, size);
1338 qemu_ram_setup_dump(new_block->host, size);
1339 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1340 qemu_madvise(new_block->host, size, QEMU_MADV_DONTFORK);
1343 kvm_setup_guest_memory(new_block->host, size);
1345 return new_block->offset;
1348 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1350 return qemu_ram_alloc_from_ptr(size, NULL, mr);
1353 void qemu_ram_free_from_ptr(ram_addr_t addr)
1357 /* This assumes the iothread lock is taken here too. */
1358 qemu_mutex_lock_ramlist();
1359 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1360 if (addr == block->offset) {
1361 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1362 ram_list.mru_block = NULL;
1368 qemu_mutex_unlock_ramlist();
1371 void qemu_ram_free(ram_addr_t addr)
1375 /* This assumes the iothread lock is taken here too. */
1376 qemu_mutex_lock_ramlist();
1377 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1378 if (addr == block->offset) {
1379 QTAILQ_REMOVE(&ram_list.blocks, block, next);
1380 ram_list.mru_block = NULL;
1382 if (block->flags & RAM_PREALLOC_MASK) {
1384 } else if (xen_enabled()) {
1385 xen_invalidate_map_cache_entry(block->host);
1387 } else if (block->fd >= 0) {
1388 munmap(block->host, block->length);
1392 qemu_anon_ram_free(block->host, block->length);
1398 qemu_mutex_unlock_ramlist();
1403 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1410 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1411 offset = addr - block->offset;
1412 if (offset < block->length) {
1413 vaddr = block->host + offset;
1414 if (block->flags & RAM_PREALLOC_MASK) {
1416 } else if (xen_enabled()) {
1420 munmap(vaddr, length);
1421 if (block->fd >= 0) {
1423 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1426 flags |= MAP_PRIVATE;
1428 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1429 flags, block->fd, offset);
1432 * Remap needs to match alloc. Accelerators that
1433 * set phys_mem_alloc never remap. If they did,
1434 * we'd need a remap hook here.
1436 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1438 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1439 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1442 if (area != vaddr) {
1443 fprintf(stderr, "Could not remap addr: "
1444 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1448 memory_try_enable_merging(vaddr, length);
1449 qemu_ram_setup_dump(vaddr, length);
1455 #endif /* !_WIN32 */
1457 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1458 With the exception of the softmmu code in this file, this should
1459 only be used for local memory (e.g. video ram) that the device owns,
1460 and knows it isn't going to access beyond the end of the block.
1462 It should not be used for general purpose DMA.
1463 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1465 void *qemu_get_ram_ptr(ram_addr_t addr)
1467 RAMBlock *block = qemu_get_ram_block(addr);
1469 if (xen_enabled()) {
1470 /* We need to check if the requested address is in the RAM
1471 * because we don't want to map the entire memory in QEMU.
1472 * In that case just map until the end of the page.
1474 if (block->offset == 0) {
1475 return xen_map_cache(addr, 0, 0);
1476 } else if (block->host == NULL) {
1478 xen_map_cache(block->offset, block->length, 1);
1481 return block->host + (addr - block->offset);
1484 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1485 * but takes a size argument */
1486 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1491 if (xen_enabled()) {
1492 return xen_map_cache(addr, *size, 1);
1496 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1497 if (addr - block->offset < block->length) {
1498 if (addr - block->offset + *size > block->length)
1499 *size = block->length - addr + block->offset;
1500 return block->host + (addr - block->offset);
1504 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1509 /* Some of the softmmu routines need to translate from a host pointer
1510 (typically a TLB entry) back to a ram offset. */
1511 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1514 uint8_t *host = ptr;
1516 if (xen_enabled()) {
1517 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1518 return qemu_get_ram_block(*ram_addr)->mr;
1521 block = ram_list.mru_block;
1522 if (block && block->host && host - block->host < block->length) {
1526 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1527 /* This case append when the block is not mapped. */
1528 if (block->host == NULL) {
1531 if (host - block->host < block->length) {
1539 *ram_addr = block->offset + (host - block->host);
1543 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1544 uint64_t val, unsigned size)
1546 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1547 tb_invalidate_phys_page_fast(ram_addr, size);
1551 stb_p(qemu_get_ram_ptr(ram_addr), val);
1554 stw_p(qemu_get_ram_ptr(ram_addr), val);
1557 stl_p(qemu_get_ram_ptr(ram_addr), val);
1562 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
1563 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
1564 /* we remove the notdirty callback only if the code has been
1566 if (!cpu_physical_memory_is_clean(ram_addr)) {
1567 CPUArchState *env = current_cpu->env_ptr;
1568 tlb_set_dirty(env, current_cpu->mem_io_vaddr);
1572 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1573 unsigned size, bool is_write)
1578 static const MemoryRegionOps notdirty_mem_ops = {
1579 .write = notdirty_mem_write,
1580 .valid.accepts = notdirty_mem_accepts,
1581 .endianness = DEVICE_NATIVE_ENDIAN,
1584 /* Generate a debug exception if a watchpoint has been hit. */
1585 static void check_watchpoint(int offset, int len_mask, int flags)
1587 CPUState *cpu = current_cpu;
1588 CPUArchState *env = cpu->env_ptr;
1589 target_ulong pc, cs_base;
1594 if (cpu->watchpoint_hit) {
1595 /* We re-entered the check after replacing the TB. Now raise
1596 * the debug interrupt so that is will trigger after the
1597 * current instruction. */
1598 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1601 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1602 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1603 if ((vaddr == (wp->vaddr & len_mask) ||
1604 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1605 wp->flags |= BP_WATCHPOINT_HIT;
1606 if (!cpu->watchpoint_hit) {
1607 cpu->watchpoint_hit = wp;
1608 tb_check_watchpoint(env);
1609 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1610 cpu->exception_index = EXCP_DEBUG;
1613 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1614 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1615 cpu_resume_from_signal(env, NULL);
1619 wp->flags &= ~BP_WATCHPOINT_HIT;
1624 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
1625 so these check for a hit then pass through to the normal out-of-line
1627 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1630 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1632 case 1: return ldub_phys(&address_space_memory, addr);
1633 case 2: return lduw_phys(&address_space_memory, addr);
1634 case 4: return ldl_phys(&address_space_memory, addr);
1639 static void watch_mem_write(void *opaque, hwaddr addr,
1640 uint64_t val, unsigned size)
1642 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1645 stb_phys(&address_space_memory, addr, val);
1648 stw_phys(&address_space_memory, addr, val);
1651 stl_phys(&address_space_memory, addr, val);
1657 static const MemoryRegionOps watch_mem_ops = {
1658 .read = watch_mem_read,
1659 .write = watch_mem_write,
1660 .endianness = DEVICE_NATIVE_ENDIAN,
1663 static uint64_t subpage_read(void *opaque, hwaddr addr,
1666 subpage_t *subpage = opaque;
1669 #if defined(DEBUG_SUBPAGE)
1670 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
1671 subpage, len, addr);
1673 address_space_read(subpage->as, addr + subpage->base, buf, len);
1686 static void subpage_write(void *opaque, hwaddr addr,
1687 uint64_t value, unsigned len)
1689 subpage_t *subpage = opaque;
1692 #if defined(DEBUG_SUBPAGE)
1693 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
1694 " value %"PRIx64"\n",
1695 __func__, subpage, len, addr, value);
1710 address_space_write(subpage->as, addr + subpage->base, buf, len);
1713 static bool subpage_accepts(void *opaque, hwaddr addr,
1714 unsigned len, bool is_write)
1716 subpage_t *subpage = opaque;
1717 #if defined(DEBUG_SUBPAGE)
1718 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
1719 __func__, subpage, is_write ? 'w' : 'r', len, addr);
1722 return address_space_access_valid(subpage->as, addr + subpage->base,
1726 static const MemoryRegionOps subpage_ops = {
1727 .read = subpage_read,
1728 .write = subpage_write,
1729 .valid.accepts = subpage_accepts,
1730 .endianness = DEVICE_NATIVE_ENDIAN,
1733 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1738 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1740 idx = SUBPAGE_IDX(start);
1741 eidx = SUBPAGE_IDX(end);
1742 #if defined(DEBUG_SUBPAGE)
1743 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
1744 __func__, mmio, start, end, idx, eidx, section);
1746 for (; idx <= eidx; idx++) {
1747 mmio->sub_section[idx] = section;
1753 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
1757 mmio = g_malloc0(sizeof(subpage_t));
1761 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
1762 "subpage", TARGET_PAGE_SIZE);
1763 mmio->iomem.subpage = true;
1764 #if defined(DEBUG_SUBPAGE)
1765 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
1766 mmio, base, TARGET_PAGE_SIZE);
1768 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
1773 static uint16_t dummy_section(PhysPageMap *map, MemoryRegion *mr)
1775 MemoryRegionSection section = {
1776 .address_space = &address_space_memory,
1778 .offset_within_address_space = 0,
1779 .offset_within_region = 0,
1780 .size = int128_2_64(),
1783 return phys_section_add(map, §ion);
1786 MemoryRegion *iotlb_to_region(AddressSpace *as, hwaddr index)
1788 return as->dispatch->map.sections[index & ~TARGET_PAGE_MASK].mr;
1791 static void io_mem_init(void)
1793 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, "rom", UINT64_MAX);
1794 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
1795 "unassigned", UINT64_MAX);
1796 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
1797 "notdirty", UINT64_MAX);
1798 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
1799 "watch", UINT64_MAX);
1802 static void mem_begin(MemoryListener *listener)
1804 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1805 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
1808 n = dummy_section(&d->map, &io_mem_unassigned);
1809 assert(n == PHYS_SECTION_UNASSIGNED);
1810 n = dummy_section(&d->map, &io_mem_notdirty);
1811 assert(n == PHYS_SECTION_NOTDIRTY);
1812 n = dummy_section(&d->map, &io_mem_rom);
1813 assert(n == PHYS_SECTION_ROM);
1814 n = dummy_section(&d->map, &io_mem_watch);
1815 assert(n == PHYS_SECTION_WATCH);
1817 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
1819 as->next_dispatch = d;
1822 static void mem_commit(MemoryListener *listener)
1824 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1825 AddressSpaceDispatch *cur = as->dispatch;
1826 AddressSpaceDispatch *next = as->next_dispatch;
1828 phys_page_compact_all(next, next->map.nodes_nb);
1830 as->dispatch = next;
1833 phys_sections_free(&cur->map);
1838 static void tcg_commit(MemoryListener *listener)
1842 /* since each CPU stores ram addresses in its TLB cache, we must
1843 reset the modified entries */
1846 CPUArchState *env = cpu->env_ptr;
1848 /* FIXME: Disentangle the cpu.h circular files deps so we can
1849 directly get the right CPU from listener. */
1850 if (cpu->tcg_as_listener != listener) {
1857 static void core_log_global_start(MemoryListener *listener)
1859 cpu_physical_memory_set_dirty_tracking(true);
1862 static void core_log_global_stop(MemoryListener *listener)
1864 cpu_physical_memory_set_dirty_tracking(false);
1867 static MemoryListener core_memory_listener = {
1868 .log_global_start = core_log_global_start,
1869 .log_global_stop = core_log_global_stop,
1873 void address_space_init_dispatch(AddressSpace *as)
1875 as->dispatch = NULL;
1876 as->dispatch_listener = (MemoryListener) {
1878 .commit = mem_commit,
1879 .region_add = mem_add,
1880 .region_nop = mem_add,
1883 memory_listener_register(&as->dispatch_listener, as);
1886 void address_space_destroy_dispatch(AddressSpace *as)
1888 AddressSpaceDispatch *d = as->dispatch;
1890 memory_listener_unregister(&as->dispatch_listener);
1892 as->dispatch = NULL;
1895 static void memory_map_init(void)
1897 system_memory = g_malloc(sizeof(*system_memory));
1899 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
1900 address_space_init(&address_space_memory, system_memory, "memory");
1902 system_io = g_malloc(sizeof(*system_io));
1903 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
1905 address_space_init(&address_space_io, system_io, "I/O");
1907 memory_listener_register(&core_memory_listener, &address_space_memory);
1910 MemoryRegion *get_system_memory(void)
1912 return system_memory;
1915 MemoryRegion *get_system_io(void)
1920 #endif /* !defined(CONFIG_USER_ONLY) */
1922 /* physical memory access (slow version, mainly for debug) */
1923 #if defined(CONFIG_USER_ONLY)
1924 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
1925 uint8_t *buf, int len, int is_write)
1932 page = addr & TARGET_PAGE_MASK;
1933 l = (page + TARGET_PAGE_SIZE) - addr;
1936 flags = page_get_flags(page);
1937 if (!(flags & PAGE_VALID))
1940 if (!(flags & PAGE_WRITE))
1942 /* XXX: this code should not depend on lock_user */
1943 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1946 unlock_user(p, addr, l);
1948 if (!(flags & PAGE_READ))
1950 /* XXX: this code should not depend on lock_user */
1951 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1954 unlock_user(p, addr, 0);
1965 static void invalidate_and_set_dirty(hwaddr addr,
1968 if (cpu_physical_memory_is_clean(addr)) {
1969 /* invalidate code */
1970 tb_invalidate_phys_page_range(addr, addr + length, 0);
1972 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
1973 cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
1975 xen_modified_memory(addr, length);
1978 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
1980 unsigned access_size_max = mr->ops->valid.max_access_size;
1982 /* Regions are assumed to support 1-4 byte accesses unless
1983 otherwise specified. */
1984 if (access_size_max == 0) {
1985 access_size_max = 4;
1988 /* Bound the maximum access by the alignment of the address. */
1989 if (!mr->ops->impl.unaligned) {
1990 unsigned align_size_max = addr & -addr;
1991 if (align_size_max != 0 && align_size_max < access_size_max) {
1992 access_size_max = align_size_max;
1996 /* Don't attempt accesses larger than the maximum. */
1997 if (l > access_size_max) {
1998 l = access_size_max;
2001 l = 1 << (qemu_fls(l) - 1);
2007 bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
2008 int len, bool is_write)
2019 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2022 if (!memory_access_is_direct(mr, is_write)) {
2023 l = memory_access_size(mr, l, addr1);
2024 /* XXX: could force current_cpu to NULL to avoid
2028 /* 64 bit write access */
2030 error |= io_mem_write(mr, addr1, val, 8);
2033 /* 32 bit write access */
2035 error |= io_mem_write(mr, addr1, val, 4);
2038 /* 16 bit write access */
2040 error |= io_mem_write(mr, addr1, val, 2);
2043 /* 8 bit write access */
2045 error |= io_mem_write(mr, addr1, val, 1);
2051 addr1 += memory_region_get_ram_addr(mr);
2053 ptr = qemu_get_ram_ptr(addr1);
2054 memcpy(ptr, buf, l);
2055 invalidate_and_set_dirty(addr1, l);
2058 if (!memory_access_is_direct(mr, is_write)) {
2060 l = memory_access_size(mr, l, addr1);
2063 /* 64 bit read access */
2064 error |= io_mem_read(mr, addr1, &val, 8);
2068 /* 32 bit read access */
2069 error |= io_mem_read(mr, addr1, &val, 4);
2073 /* 16 bit read access */
2074 error |= io_mem_read(mr, addr1, &val, 2);
2078 /* 8 bit read access */
2079 error |= io_mem_read(mr, addr1, &val, 1);
2087 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2088 memcpy(buf, ptr, l);
2099 bool address_space_write(AddressSpace *as, hwaddr addr,
2100 const uint8_t *buf, int len)
2102 return address_space_rw(as, addr, (uint8_t *)buf, len, true);
2105 bool address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
2107 return address_space_rw(as, addr, buf, len, false);
2111 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2112 int len, int is_write)
2114 address_space_rw(&address_space_memory, addr, buf, len, is_write);
2117 enum write_rom_type {
2122 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2123 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2132 mr = address_space_translate(as, addr, &addr1, &l, true);
2134 if (!(memory_region_is_ram(mr) ||
2135 memory_region_is_romd(mr))) {
2138 addr1 += memory_region_get_ram_addr(mr);
2140 ptr = qemu_get_ram_ptr(addr1);
2143 memcpy(ptr, buf, l);
2144 invalidate_and_set_dirty(addr1, l);
2147 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2157 /* used for ROM loading : can write in RAM and ROM */
2158 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2159 const uint8_t *buf, int len)
2161 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2164 void cpu_flush_icache_range(hwaddr start, int len)
2167 * This function should do the same thing as an icache flush that was
2168 * triggered from within the guest. For TCG we are always cache coherent,
2169 * so there is no need to flush anything. For KVM / Xen we need to flush
2170 * the host's instruction cache at least.
2172 if (tcg_enabled()) {
2176 cpu_physical_memory_write_rom_internal(&address_space_memory,
2177 start, NULL, len, FLUSH_CACHE);
2187 static BounceBuffer bounce;
2189 typedef struct MapClient {
2191 void (*callback)(void *opaque);
2192 QLIST_ENTRY(MapClient) link;
2195 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2196 = QLIST_HEAD_INITIALIZER(map_client_list);
2198 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2200 MapClient *client = g_malloc(sizeof(*client));
2202 client->opaque = opaque;
2203 client->callback = callback;
2204 QLIST_INSERT_HEAD(&map_client_list, client, link);
2208 static void cpu_unregister_map_client(void *_client)
2210 MapClient *client = (MapClient *)_client;
2212 QLIST_REMOVE(client, link);
2216 static void cpu_notify_map_clients(void)
2220 while (!QLIST_EMPTY(&map_client_list)) {
2221 client = QLIST_FIRST(&map_client_list);
2222 client->callback(client->opaque);
2223 cpu_unregister_map_client(client);
2227 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2234 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2235 if (!memory_access_is_direct(mr, is_write)) {
2236 l = memory_access_size(mr, l, addr);
2237 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2248 /* Map a physical memory region into a host virtual address.
2249 * May map a subset of the requested range, given by and returned in *plen.
2250 * May return NULL if resources needed to perform the mapping are exhausted.
2251 * Use only for reads OR writes - not for read-modify-write operations.
2252 * Use cpu_register_map_client() to know when retrying the map operation is
2253 * likely to succeed.
2255 void *address_space_map(AddressSpace *as,
2262 hwaddr l, xlat, base;
2263 MemoryRegion *mr, *this_mr;
2271 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2272 if (!memory_access_is_direct(mr, is_write)) {
2273 if (bounce.buffer) {
2276 /* Avoid unbounded allocations */
2277 l = MIN(l, TARGET_PAGE_SIZE);
2278 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2282 memory_region_ref(mr);
2285 address_space_read(as, addr, bounce.buffer, l);
2289 return bounce.buffer;
2293 raddr = memory_region_get_ram_addr(mr);
2304 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2305 if (this_mr != mr || xlat != base + done) {
2310 memory_region_ref(mr);
2312 return qemu_ram_ptr_length(raddr + base, plen);
2315 /* Unmaps a memory region previously mapped by address_space_map().
2316 * Will also mark the memory as dirty if is_write == 1. access_len gives
2317 * the amount of memory that was actually read or written by the caller.
2319 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2320 int is_write, hwaddr access_len)
2322 if (buffer != bounce.buffer) {
2326 mr = qemu_ram_addr_from_host(buffer, &addr1);
2329 while (access_len) {
2331 l = TARGET_PAGE_SIZE;
2334 invalidate_and_set_dirty(addr1, l);
2339 if (xen_enabled()) {
2340 xen_invalidate_map_cache_entry(buffer);
2342 memory_region_unref(mr);
2346 address_space_write(as, bounce.addr, bounce.buffer, access_len);
2348 qemu_vfree(bounce.buffer);
2349 bounce.buffer = NULL;
2350 memory_region_unref(bounce.mr);
2351 cpu_notify_map_clients();
2354 void *cpu_physical_memory_map(hwaddr addr,
2358 return address_space_map(&address_space_memory, addr, plen, is_write);
2361 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2362 int is_write, hwaddr access_len)
2364 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2367 /* warning: addr must be aligned */
2368 static inline uint32_t ldl_phys_internal(AddressSpace *as, hwaddr addr,
2369 enum device_endian endian)
2377 mr = address_space_translate(as, addr, &addr1, &l, false);
2378 if (l < 4 || !memory_access_is_direct(mr, false)) {
2380 io_mem_read(mr, addr1, &val, 4);
2381 #if defined(TARGET_WORDS_BIGENDIAN)
2382 if (endian == DEVICE_LITTLE_ENDIAN) {
2386 if (endian == DEVICE_BIG_ENDIAN) {
2392 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2396 case DEVICE_LITTLE_ENDIAN:
2397 val = ldl_le_p(ptr);
2399 case DEVICE_BIG_ENDIAN:
2400 val = ldl_be_p(ptr);
2410 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2412 return ldl_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2415 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2417 return ldl_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2420 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2422 return ldl_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2425 /* warning: addr must be aligned */
2426 static inline uint64_t ldq_phys_internal(AddressSpace *as, hwaddr addr,
2427 enum device_endian endian)
2435 mr = address_space_translate(as, addr, &addr1, &l,
2437 if (l < 8 || !memory_access_is_direct(mr, false)) {
2439 io_mem_read(mr, addr1, &val, 8);
2440 #if defined(TARGET_WORDS_BIGENDIAN)
2441 if (endian == DEVICE_LITTLE_ENDIAN) {
2445 if (endian == DEVICE_BIG_ENDIAN) {
2451 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2455 case DEVICE_LITTLE_ENDIAN:
2456 val = ldq_le_p(ptr);
2458 case DEVICE_BIG_ENDIAN:
2459 val = ldq_be_p(ptr);
2469 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
2471 return ldq_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2474 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
2476 return ldq_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2479 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
2481 return ldq_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2485 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
2488 address_space_rw(as, addr, &val, 1, 0);
2492 /* warning: addr must be aligned */
2493 static inline uint32_t lduw_phys_internal(AddressSpace *as, hwaddr addr,
2494 enum device_endian endian)
2502 mr = address_space_translate(as, addr, &addr1, &l,
2504 if (l < 2 || !memory_access_is_direct(mr, false)) {
2506 io_mem_read(mr, addr1, &val, 2);
2507 #if defined(TARGET_WORDS_BIGENDIAN)
2508 if (endian == DEVICE_LITTLE_ENDIAN) {
2512 if (endian == DEVICE_BIG_ENDIAN) {
2518 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2522 case DEVICE_LITTLE_ENDIAN:
2523 val = lduw_le_p(ptr);
2525 case DEVICE_BIG_ENDIAN:
2526 val = lduw_be_p(ptr);
2536 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
2538 return lduw_phys_internal(as, addr, DEVICE_NATIVE_ENDIAN);
2541 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
2543 return lduw_phys_internal(as, addr, DEVICE_LITTLE_ENDIAN);
2546 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
2548 return lduw_phys_internal(as, addr, DEVICE_BIG_ENDIAN);
2551 /* warning: addr must be aligned. The ram page is not masked as dirty
2552 and the code inside is not invalidated. It is useful if the dirty
2553 bits are used to track modified PTEs */
2554 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
2561 mr = address_space_translate(as, addr, &addr1, &l,
2563 if (l < 4 || !memory_access_is_direct(mr, true)) {
2564 io_mem_write(mr, addr1, val, 4);
2566 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2567 ptr = qemu_get_ram_ptr(addr1);
2570 if (unlikely(in_migration)) {
2571 if (cpu_physical_memory_is_clean(addr1)) {
2572 /* invalidate code */
2573 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2575 cpu_physical_memory_set_dirty_flag(addr1,
2576 DIRTY_MEMORY_MIGRATION);
2577 cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
2583 /* warning: addr must be aligned */
2584 static inline void stl_phys_internal(AddressSpace *as,
2585 hwaddr addr, uint32_t val,
2586 enum device_endian endian)
2593 mr = address_space_translate(as, addr, &addr1, &l,
2595 if (l < 4 || !memory_access_is_direct(mr, true)) {
2596 #if defined(TARGET_WORDS_BIGENDIAN)
2597 if (endian == DEVICE_LITTLE_ENDIAN) {
2601 if (endian == DEVICE_BIG_ENDIAN) {
2605 io_mem_write(mr, addr1, val, 4);
2608 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2609 ptr = qemu_get_ram_ptr(addr1);
2611 case DEVICE_LITTLE_ENDIAN:
2614 case DEVICE_BIG_ENDIAN:
2621 invalidate_and_set_dirty(addr1, 4);
2625 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2627 stl_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2630 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2632 stl_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2635 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2637 stl_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2641 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2644 address_space_rw(as, addr, &v, 1, 1);
2647 /* warning: addr must be aligned */
2648 static inline void stw_phys_internal(AddressSpace *as,
2649 hwaddr addr, uint32_t val,
2650 enum device_endian endian)
2657 mr = address_space_translate(as, addr, &addr1, &l, true);
2658 if (l < 2 || !memory_access_is_direct(mr, true)) {
2659 #if defined(TARGET_WORDS_BIGENDIAN)
2660 if (endian == DEVICE_LITTLE_ENDIAN) {
2664 if (endian == DEVICE_BIG_ENDIAN) {
2668 io_mem_write(mr, addr1, val, 2);
2671 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
2672 ptr = qemu_get_ram_ptr(addr1);
2674 case DEVICE_LITTLE_ENDIAN:
2677 case DEVICE_BIG_ENDIAN:
2684 invalidate_and_set_dirty(addr1, 2);
2688 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2690 stw_phys_internal(as, addr, val, DEVICE_NATIVE_ENDIAN);
2693 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2695 stw_phys_internal(as, addr, val, DEVICE_LITTLE_ENDIAN);
2698 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
2700 stw_phys_internal(as, addr, val, DEVICE_BIG_ENDIAN);
2704 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2707 address_space_rw(as, addr, (void *) &val, 8, 1);
2710 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2712 val = cpu_to_le64(val);
2713 address_space_rw(as, addr, (void *) &val, 8, 1);
2716 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
2718 val = cpu_to_be64(val);
2719 address_space_rw(as, addr, (void *) &val, 8, 1);
2722 /* virtual memory access for debug (includes writing to ROM) */
2723 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2724 uint8_t *buf, int len, int is_write)
2731 page = addr & TARGET_PAGE_MASK;
2732 phys_addr = cpu_get_phys_page_debug(cpu, page);
2733 /* if no physical page mapped, return an error */
2734 if (phys_addr == -1)
2736 l = (page + TARGET_PAGE_SIZE) - addr;
2739 phys_addr += (addr & ~TARGET_PAGE_MASK);
2741 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
2743 address_space_rw(cpu->as, phys_addr, buf, l, 0);
2753 #if !defined(CONFIG_USER_ONLY)
2756 * A helper function for the _utterly broken_ virtio device model to find out if
2757 * it's running on a big endian machine. Don't do this at home kids!
2759 bool virtio_is_big_endian(void);
2760 bool virtio_is_big_endian(void)
2762 #if defined(TARGET_WORDS_BIGENDIAN)
2771 #ifndef CONFIG_USER_ONLY
2772 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2777 mr = address_space_translate(&address_space_memory,
2778 phys_addr, &phys_addr, &l, false);
2780 return !(memory_region_is_ram(mr) ||
2781 memory_region_is_romd(mr));
2784 void qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
2788 QTAILQ_FOREACH(block, &ram_list.blocks, next) {
2789 func(block->host, block->offset, block->length, opaque);