4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19 #include "qemu/osdep.h"
20 #include "qapi/error.h"
24 #include "qemu/cutils.h"
26 #include "exec/exec-all.h"
28 #include "hw/qdev-core.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
31 #include "hw/xen/xen.h"
33 #include "sysemu/kvm.h"
34 #include "sysemu/sysemu.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "qemu/error-report.h"
38 #if defined(CONFIG_USER_ONLY)
40 #else /* !CONFIG_USER_ONLY */
42 #include "exec/memory.h"
43 #include "exec/ioport.h"
44 #include "sysemu/dma.h"
45 #include "exec/address-spaces.h"
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
59 #include "migration/vmstate.h"
61 #include "qemu/range.h"
63 #include "qemu/mmap-alloc.h"
66 //#define DEBUG_SUBPAGE
68 #if !defined(CONFIG_USER_ONLY)
69 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
70 * are protected by the ramlist lock.
72 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
74 static MemoryRegion *system_memory;
75 static MemoryRegion *system_io;
77 AddressSpace address_space_io;
78 AddressSpace address_space_memory;
80 MemoryRegion io_mem_rom, io_mem_notdirty;
81 static MemoryRegion io_mem_unassigned;
83 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
84 #define RAM_PREALLOC (1 << 0)
86 /* RAM is mmap-ed with MAP_SHARED */
87 #define RAM_SHARED (1 << 1)
89 /* Only a portion of RAM (used_length) is actually used, and migrated.
90 * This used_length size can change across reboots.
92 #define RAM_RESIZEABLE (1 << 2)
96 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
97 /* current CPU in the current thread. It is only valid inside
99 __thread CPUState *current_cpu;
100 /* 0 = Do not count executed instructions.
101 1 = Precise instruction counting.
102 2 = Adaptive rate instruction counting. */
105 #if !defined(CONFIG_USER_ONLY)
107 typedef struct PhysPageEntry PhysPageEntry;
109 struct PhysPageEntry {
110 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
112 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
116 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
118 /* Size of the L2 (and L3, etc) page tables. */
119 #define ADDR_SPACE_BITS 64
122 #define P_L2_SIZE (1 << P_L2_BITS)
124 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
126 typedef PhysPageEntry Node[P_L2_SIZE];
128 typedef struct PhysPageMap {
131 unsigned sections_nb;
132 unsigned sections_nb_alloc;
134 unsigned nodes_nb_alloc;
136 MemoryRegionSection *sections;
139 struct AddressSpaceDispatch {
142 MemoryRegionSection *mru_section;
143 /* This is a multi-level map on the physical address space.
144 * The bottom level has pointers to MemoryRegionSections.
146 PhysPageEntry phys_map;
151 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
152 typedef struct subpage_t {
156 uint16_t sub_section[TARGET_PAGE_SIZE];
159 #define PHYS_SECTION_UNASSIGNED 0
160 #define PHYS_SECTION_NOTDIRTY 1
161 #define PHYS_SECTION_ROM 2
162 #define PHYS_SECTION_WATCH 3
164 static void io_mem_init(void);
165 static void memory_map_init(void);
166 static void tcg_commit(MemoryListener *listener);
168 static MemoryRegion io_mem_watch;
171 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
172 * @cpu: the CPU whose AddressSpace this is
173 * @as: the AddressSpace itself
174 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
175 * @tcg_as_listener: listener for tracking changes to the AddressSpace
177 struct CPUAddressSpace {
180 struct AddressSpaceDispatch *memory_dispatch;
181 MemoryListener tcg_as_listener;
186 #if !defined(CONFIG_USER_ONLY)
188 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
190 static unsigned alloc_hint = 16;
191 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
192 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, alloc_hint);
193 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
194 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 alloc_hint = map->nodes_nb_alloc;
199 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
206 ret = map->nodes_nb++;
208 assert(ret != PHYS_MAP_NODE_NIL);
209 assert(ret != map->nodes_nb_alloc);
211 e.skip = leaf ? 0 : 1;
212 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
213 for (i = 0; i < P_L2_SIZE; ++i) {
214 memcpy(&p[i], &e, sizeof(e));
219 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
220 hwaddr *index, hwaddr *nb, uint16_t leaf,
224 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
226 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
227 lp->ptr = phys_map_node_alloc(map, level == 0);
229 p = map->nodes[lp->ptr];
230 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
232 while (*nb && lp < &p[P_L2_SIZE]) {
233 if ((*index & (step - 1)) == 0 && *nb >= step) {
239 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
245 static void phys_page_set(AddressSpaceDispatch *d,
246 hwaddr index, hwaddr nb,
249 /* Wildly overreserve - it doesn't matter much. */
250 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
252 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
255 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
256 * and update our entry so we can skip it and go directly to the destination.
258 static void phys_page_compact(PhysPageEntry *lp, Node *nodes)
260 unsigned valid_ptr = P_L2_SIZE;
265 if (lp->ptr == PHYS_MAP_NODE_NIL) {
270 for (i = 0; i < P_L2_SIZE; i++) {
271 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
278 phys_page_compact(&p[i], nodes);
282 /* We can only compress if there's only one child. */
287 assert(valid_ptr < P_L2_SIZE);
289 /* Don't compress if it won't fit in the # of bits we have. */
290 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
294 lp->ptr = p[valid_ptr].ptr;
295 if (!p[valid_ptr].skip) {
296 /* If our only child is a leaf, make this a leaf. */
297 /* By design, we should have made this node a leaf to begin with so we
298 * should never reach here.
299 * But since it's so simple to handle this, let's do it just in case we
304 lp->skip += p[valid_ptr].skip;
308 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
310 if (d->phys_map.skip) {
311 phys_page_compact(&d->phys_map, d->map.nodes);
315 static inline bool section_covers_addr(const MemoryRegionSection *section,
318 /* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
319 * the section must cover the entire address space.
321 return section->size.hi ||
322 range_covers_byte(section->offset_within_address_space,
323 section->size.lo, addr);
326 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
327 Node *nodes, MemoryRegionSection *sections)
330 hwaddr index = addr >> TARGET_PAGE_BITS;
333 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
334 if (lp.ptr == PHYS_MAP_NODE_NIL) {
335 return §ions[PHYS_SECTION_UNASSIGNED];
338 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
341 if (section_covers_addr(§ions[lp.ptr], addr)) {
342 return §ions[lp.ptr];
344 return §ions[PHYS_SECTION_UNASSIGNED];
348 bool memory_region_is_unassigned(MemoryRegion *mr)
350 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
351 && mr != &io_mem_watch;
354 /* Called from RCU critical section */
355 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
357 bool resolve_subpage)
359 MemoryRegionSection *section = atomic_read(&d->mru_section);
363 if (section && section != &d->map.sections[PHYS_SECTION_UNASSIGNED] &&
364 section_covers_addr(section, addr)) {
367 section = phys_page_find(d->phys_map, addr, d->map.nodes,
371 if (resolve_subpage && section->mr->subpage) {
372 subpage = container_of(section->mr, subpage_t, iomem);
373 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
376 atomic_set(&d->mru_section, section);
381 /* Called from RCU critical section */
382 static MemoryRegionSection *
383 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
384 hwaddr *plen, bool resolve_subpage)
386 MemoryRegionSection *section;
390 section = address_space_lookup_region(d, addr, resolve_subpage);
391 /* Compute offset within MemoryRegionSection */
392 addr -= section->offset_within_address_space;
394 /* Compute offset within MemoryRegion */
395 *xlat = addr + section->offset_within_region;
399 /* MMIO registers can be expected to perform full-width accesses based only
400 * on their address, without considering adjacent registers that could
401 * decode to completely different MemoryRegions. When such registers
402 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
403 * regions overlap wildly. For this reason we cannot clamp the accesses
406 * If the length is small (as is the case for address_space_ldl/stl),
407 * everything works fine. If the incoming length is large, however,
408 * the caller really has to do the clamping through memory_access_size.
410 if (memory_region_is_ram(mr)) {
411 diff = int128_sub(section->size, int128_make64(addr));
412 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
417 /* Called from RCU critical section */
418 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
419 hwaddr *xlat, hwaddr *plen,
423 MemoryRegionSection *section;
427 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
428 section = address_space_translate_internal(d, addr, &addr, plen, true);
431 if (!mr->iommu_ops) {
435 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
436 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
437 | (addr & iotlb.addr_mask));
438 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
439 if (!(iotlb.perm & (1 << is_write))) {
440 mr = &io_mem_unassigned;
444 as = iotlb.target_as;
447 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
448 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
449 *plen = MIN(page, *plen);
456 /* Called from RCU critical section */
457 MemoryRegionSection *
458 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
459 hwaddr *xlat, hwaddr *plen)
461 MemoryRegionSection *section;
462 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
464 section = address_space_translate_internal(d, addr, xlat, plen, false);
466 assert(!section->mr->iommu_ops);
471 #if !defined(CONFIG_USER_ONLY)
473 static int cpu_common_post_load(void *opaque, int version_id)
475 CPUState *cpu = opaque;
477 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
478 version_id is increased. */
479 cpu->interrupt_request &= ~0x01;
485 static int cpu_common_pre_load(void *opaque)
487 CPUState *cpu = opaque;
489 cpu->exception_index = -1;
494 static bool cpu_common_exception_index_needed(void *opaque)
496 CPUState *cpu = opaque;
498 return tcg_enabled() && cpu->exception_index != -1;
501 static const VMStateDescription vmstate_cpu_common_exception_index = {
502 .name = "cpu_common/exception_index",
504 .minimum_version_id = 1,
505 .needed = cpu_common_exception_index_needed,
506 .fields = (VMStateField[]) {
507 VMSTATE_INT32(exception_index, CPUState),
508 VMSTATE_END_OF_LIST()
512 static bool cpu_common_crash_occurred_needed(void *opaque)
514 CPUState *cpu = opaque;
516 return cpu->crash_occurred;
519 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
520 .name = "cpu_common/crash_occurred",
522 .minimum_version_id = 1,
523 .needed = cpu_common_crash_occurred_needed,
524 .fields = (VMStateField[]) {
525 VMSTATE_BOOL(crash_occurred, CPUState),
526 VMSTATE_END_OF_LIST()
530 const VMStateDescription vmstate_cpu_common = {
531 .name = "cpu_common",
533 .minimum_version_id = 1,
534 .pre_load = cpu_common_pre_load,
535 .post_load = cpu_common_post_load,
536 .fields = (VMStateField[]) {
537 VMSTATE_UINT32(halted, CPUState),
538 VMSTATE_UINT32(interrupt_request, CPUState),
539 VMSTATE_END_OF_LIST()
541 .subsections = (const VMStateDescription*[]) {
542 &vmstate_cpu_common_exception_index,
543 &vmstate_cpu_common_crash_occurred,
550 CPUState *qemu_get_cpu(int index)
555 if (cpu->cpu_index == index) {
563 #if !defined(CONFIG_USER_ONLY)
564 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
566 CPUAddressSpace *newas;
568 /* Target code should have set num_ases before calling us */
569 assert(asidx < cpu->num_ases);
572 /* address space 0 gets the convenience alias */
576 /* KVM cannot currently support multiple address spaces. */
577 assert(asidx == 0 || !kvm_enabled());
579 if (!cpu->cpu_ases) {
580 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
583 newas = &cpu->cpu_ases[asidx];
587 newas->tcg_as_listener.commit = tcg_commit;
588 memory_listener_register(&newas->tcg_as_listener, as);
592 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
594 /* Return the AddressSpace corresponding to the specified index */
595 return cpu->cpu_ases[asidx].as;
599 void cpu_exec_exit(CPUState *cpu)
601 CPUClass *cc = CPU_GET_CLASS(cpu);
603 cpu_list_remove(cpu);
605 if (cc->vmsd != NULL) {
606 vmstate_unregister(NULL, cc->vmsd, cpu);
608 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
609 vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
613 void cpu_exec_initfn(CPUState *cpu)
618 #ifndef CONFIG_USER_ONLY
619 cpu->thread_id = qemu_get_thread_id();
621 /* This is a softmmu CPU object, so create a property for it
622 * so users can wire up its memory. (This can't go in qom/cpu.c
623 * because that file is compiled only once for both user-mode
624 * and system builds.) The default if no link is set up is to use
625 * the system address space.
627 object_property_add_link(OBJECT(cpu), "memory", TYPE_MEMORY_REGION,
628 (Object **)&cpu->memory,
629 qdev_prop_allow_set_link_before_realize,
630 OBJ_PROP_LINK_UNREF_ON_RELEASE,
632 cpu->memory = system_memory;
633 object_ref(OBJECT(cpu->memory));
637 void cpu_exec_realizefn(CPUState *cpu, Error **errp)
639 CPUClass *cc ATTRIBUTE_UNUSED = CPU_GET_CLASS(cpu);
643 #ifndef CONFIG_USER_ONLY
644 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
645 vmstate_register(NULL, cpu->cpu_index, &vmstate_cpu_common, cpu);
647 if (cc->vmsd != NULL) {
648 vmstate_register(NULL, cpu->cpu_index, cc->vmsd, cpu);
653 #if defined(CONFIG_USER_ONLY)
654 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
656 tb_invalidate_phys_page_range(pc, pc + 1, 0);
659 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
662 hwaddr phys = cpu_get_phys_page_attrs_debug(cpu, pc, &attrs);
663 int asidx = cpu_asidx_from_attrs(cpu, attrs);
665 tb_invalidate_phys_addr(cpu->cpu_ases[asidx].as,
666 phys | (pc & ~TARGET_PAGE_MASK));
671 #if defined(CONFIG_USER_ONLY)
672 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
677 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
683 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
687 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
688 int flags, CPUWatchpoint **watchpoint)
693 /* Add a watchpoint. */
694 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
695 int flags, CPUWatchpoint **watchpoint)
699 /* forbid ranges which are empty or run off the end of the address space */
700 if (len == 0 || (addr + len - 1) < addr) {
701 error_report("tried to set invalid watchpoint at %"
702 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
705 wp = g_malloc(sizeof(*wp));
711 /* keep all GDB-injected watchpoints in front */
712 if (flags & BP_GDB) {
713 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
715 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
718 tlb_flush_page(cpu, addr);
725 /* Remove a specific watchpoint. */
726 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
731 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
732 if (addr == wp->vaddr && len == wp->len
733 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
734 cpu_watchpoint_remove_by_ref(cpu, wp);
741 /* Remove a specific watchpoint by reference. */
742 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
744 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
746 tlb_flush_page(cpu, watchpoint->vaddr);
751 /* Remove all matching watchpoints. */
752 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
754 CPUWatchpoint *wp, *next;
756 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
757 if (wp->flags & mask) {
758 cpu_watchpoint_remove_by_ref(cpu, wp);
763 /* Return true if this watchpoint address matches the specified
764 * access (ie the address range covered by the watchpoint overlaps
765 * partially or completely with the address range covered by the
768 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
772 /* We know the lengths are non-zero, but a little caution is
773 * required to avoid errors in the case where the range ends
774 * exactly at the top of the address space and so addr + len
775 * wraps round to zero.
777 vaddr wpend = wp->vaddr + wp->len - 1;
778 vaddr addrend = addr + len - 1;
780 return !(addr > wpend || wp->vaddr > addrend);
785 /* Add a breakpoint. */
786 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
787 CPUBreakpoint **breakpoint)
791 bp = g_malloc(sizeof(*bp));
796 /* keep all GDB-injected breakpoints in front */
797 if (flags & BP_GDB) {
798 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
800 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
803 breakpoint_invalidate(cpu, pc);
811 /* Remove a specific breakpoint. */
812 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
816 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
817 if (bp->pc == pc && bp->flags == flags) {
818 cpu_breakpoint_remove_by_ref(cpu, bp);
825 /* Remove a specific breakpoint by reference. */
826 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
828 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
830 breakpoint_invalidate(cpu, breakpoint->pc);
835 /* Remove all matching breakpoints. */
836 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
838 CPUBreakpoint *bp, *next;
840 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
841 if (bp->flags & mask) {
842 cpu_breakpoint_remove_by_ref(cpu, bp);
847 /* enable or disable single step mode. EXCP_DEBUG is returned by the
848 CPU loop after each instruction */
849 void cpu_single_step(CPUState *cpu, int enabled)
851 if (cpu->singlestep_enabled != enabled) {
852 cpu->singlestep_enabled = enabled;
854 kvm_update_guest_debug(cpu, 0);
856 /* must flush all the translated code to avoid inconsistencies */
857 /* XXX: only flush what is necessary */
863 void cpu_abort(CPUState *cpu, const char *fmt, ...)
870 fprintf(stderr, "qemu: fatal: ");
871 vfprintf(stderr, fmt, ap);
872 fprintf(stderr, "\n");
873 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
874 if (qemu_log_separate()) {
875 qemu_log("qemu: fatal: ");
876 qemu_log_vprintf(fmt, ap2);
878 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
885 #if defined(CONFIG_USER_ONLY)
887 struct sigaction act;
888 sigfillset(&act.sa_mask);
889 act.sa_handler = SIG_DFL;
890 sigaction(SIGABRT, &act, NULL);
896 #if !defined(CONFIG_USER_ONLY)
897 /* Called from RCU critical section */
898 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
902 block = atomic_rcu_read(&ram_list.mru_block);
903 if (block && addr - block->offset < block->max_length) {
906 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
907 if (addr - block->offset < block->max_length) {
912 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
916 /* It is safe to write mru_block outside the iothread lock. This
921 * xxx removed from list
925 * call_rcu(reclaim_ramblock, xxx);
928 * atomic_rcu_set is not needed here. The block was already published
929 * when it was placed into the list. Here we're just making an extra
930 * copy of the pointer.
932 ram_list.mru_block = block;
936 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
943 end = TARGET_PAGE_ALIGN(start + length);
944 start &= TARGET_PAGE_MASK;
947 block = qemu_get_ram_block(start);
948 assert(block == qemu_get_ram_block(end - 1));
949 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
951 tlb_reset_dirty(cpu, start1, length);
956 /* Note: start and end must be within the same ram block. */
957 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
961 DirtyMemoryBlocks *blocks;
962 unsigned long end, page;
969 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
970 page = start >> TARGET_PAGE_BITS;
974 blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
977 unsigned long idx = page / DIRTY_MEMORY_BLOCK_SIZE;
978 unsigned long offset = page % DIRTY_MEMORY_BLOCK_SIZE;
979 unsigned long num = MIN(end - page, DIRTY_MEMORY_BLOCK_SIZE - offset);
981 dirty |= bitmap_test_and_clear_atomic(blocks->blocks[idx],
988 if (dirty && tcg_enabled()) {
989 tlb_reset_dirty_range_all(start, length);
995 /* Called from RCU critical section */
996 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
997 MemoryRegionSection *section,
999 hwaddr paddr, hwaddr xlat,
1001 target_ulong *address)
1006 if (memory_region_is_ram(section->mr)) {
1008 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
1009 if (!section->readonly) {
1010 iotlb |= PHYS_SECTION_NOTDIRTY;
1012 iotlb |= PHYS_SECTION_ROM;
1015 AddressSpaceDispatch *d;
1017 d = atomic_rcu_read(§ion->address_space->dispatch);
1018 iotlb = section - d->map.sections;
1022 /* Make accesses to pages with watchpoints go via the
1023 watchpoint trap routines. */
1024 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1025 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1026 /* Avoid trapping reads of pages with a write breakpoint. */
1027 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1028 iotlb = PHYS_SECTION_WATCH + paddr;
1029 *address |= TLB_MMIO;
1037 #endif /* defined(CONFIG_USER_ONLY) */
1039 #if !defined(CONFIG_USER_ONLY)
1041 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1043 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1045 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1046 qemu_anon_ram_alloc;
1049 * Set a custom physical guest memory alloator.
1050 * Accelerators with unusual needs may need this. Hopefully, we can
1051 * get rid of it eventually.
1053 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1055 phys_mem_alloc = alloc;
1058 static uint16_t phys_section_add(PhysPageMap *map,
1059 MemoryRegionSection *section)
1061 /* The physical section number is ORed with a page-aligned
1062 * pointer to produce the iotlb entries. Thus it should
1063 * never overflow into the page-aligned value.
1065 assert(map->sections_nb < TARGET_PAGE_SIZE);
1067 if (map->sections_nb == map->sections_nb_alloc) {
1068 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1069 map->sections = g_renew(MemoryRegionSection, map->sections,
1070 map->sections_nb_alloc);
1072 map->sections[map->sections_nb] = *section;
1073 memory_region_ref(section->mr);
1074 return map->sections_nb++;
1077 static void phys_section_destroy(MemoryRegion *mr)
1079 bool have_sub_page = mr->subpage;
1081 memory_region_unref(mr);
1083 if (have_sub_page) {
1084 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1085 object_unref(OBJECT(&subpage->iomem));
1090 static void phys_sections_free(PhysPageMap *map)
1092 while (map->sections_nb > 0) {
1093 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1094 phys_section_destroy(section->mr);
1096 g_free(map->sections);
1100 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1103 hwaddr base = section->offset_within_address_space
1105 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1106 d->map.nodes, d->map.sections);
1107 MemoryRegionSection subsection = {
1108 .offset_within_address_space = base,
1109 .size = int128_make64(TARGET_PAGE_SIZE),
1113 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1115 if (!(existing->mr->subpage)) {
1116 subpage = subpage_init(d->as, base);
1117 subsection.address_space = d->as;
1118 subsection.mr = &subpage->iomem;
1119 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1120 phys_section_add(&d->map, &subsection));
1122 subpage = container_of(existing->mr, subpage_t, iomem);
1124 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1125 end = start + int128_get64(section->size) - 1;
1126 subpage_register(subpage, start, end,
1127 phys_section_add(&d->map, section));
1131 static void register_multipage(AddressSpaceDispatch *d,
1132 MemoryRegionSection *section)
1134 hwaddr start_addr = section->offset_within_address_space;
1135 uint16_t section_index = phys_section_add(&d->map, section);
1136 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1140 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1143 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1145 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1146 AddressSpaceDispatch *d = as->next_dispatch;
1147 MemoryRegionSection now = *section, remain = *section;
1148 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1150 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1151 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1152 - now.offset_within_address_space;
1154 now.size = int128_min(int128_make64(left), now.size);
1155 register_subpage(d, &now);
1157 now.size = int128_zero();
1159 while (int128_ne(remain.size, now.size)) {
1160 remain.size = int128_sub(remain.size, now.size);
1161 remain.offset_within_address_space += int128_get64(now.size);
1162 remain.offset_within_region += int128_get64(now.size);
1164 if (int128_lt(remain.size, page_size)) {
1165 register_subpage(d, &now);
1166 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1167 now.size = page_size;
1168 register_subpage(d, &now);
1170 now.size = int128_and(now.size, int128_neg(page_size));
1171 register_multipage(d, &now);
1176 void qemu_flush_coalesced_mmio_buffer(void)
1179 kvm_flush_coalesced_mmio_buffer();
1182 void qemu_mutex_lock_ramlist(void)
1184 qemu_mutex_lock(&ram_list.mutex);
1187 void qemu_mutex_unlock_ramlist(void)
1189 qemu_mutex_unlock(&ram_list.mutex);
1193 static void *file_ram_alloc(RAMBlock *block,
1198 bool unlink_on_error = false;
1200 char *sanitized_name;
1202 void *area = MAP_FAILED;
1205 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1207 "host lacks kvm mmu notifiers, -mem-path unsupported");
1212 fd = open(path, O_RDWR);
1214 /* @path names an existing file, use it */
1217 if (errno == ENOENT) {
1218 /* @path names a file that doesn't exist, create it */
1219 fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
1221 unlink_on_error = true;
1224 } else if (errno == EISDIR) {
1225 /* @path names a directory, create a file there */
1226 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1227 sanitized_name = g_strdup(memory_region_name(block->mr));
1228 for (c = sanitized_name; *c != '\0'; c++) {
1234 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1236 g_free(sanitized_name);
1238 fd = mkstemp(filename);
1246 if (errno != EEXIST && errno != EINTR) {
1247 error_setg_errno(errp, errno,
1248 "can't open backing store %s for guest RAM",
1253 * Try again on EINTR and EEXIST. The latter happens when
1254 * something else creates the file between our two open().
1258 block->page_size = qemu_fd_getpagesize(fd);
1259 block->mr->align = block->page_size;
1260 #if defined(__s390x__)
1261 if (kvm_enabled()) {
1262 block->mr->align = MAX(block->mr->align, QEMU_VMALLOC_ALIGN);
1266 if (memory < block->page_size) {
1267 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1268 "or larger than page size 0x%zx",
1269 memory, block->page_size);
1273 memory = ROUND_UP(memory, block->page_size);
1276 * ftruncate is not supported by hugetlbfs in older
1277 * hosts, so don't bother bailing out on errors.
1278 * If anything goes wrong with it under other filesystems,
1281 if (ftruncate(fd, memory)) {
1282 perror("ftruncate");
1285 area = qemu_ram_mmap(fd, memory, block->mr->align,
1286 block->flags & RAM_SHARED);
1287 if (area == MAP_FAILED) {
1288 error_setg_errno(errp, errno,
1289 "unable to map backing store for guest RAM");
1294 os_mem_prealloc(fd, area, memory, errp);
1295 if (errp && *errp) {
1304 if (area != MAP_FAILED) {
1305 qemu_ram_munmap(area, memory);
1307 if (unlink_on_error) {
1317 /* Called with the ramlist lock held. */
1318 static ram_addr_t find_ram_offset(ram_addr_t size)
1320 RAMBlock *block, *next_block;
1321 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1323 assert(size != 0); /* it would hand out same offset multiple times */
1325 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1329 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1330 ram_addr_t end, next = RAM_ADDR_MAX;
1332 end = block->offset + block->max_length;
1334 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1335 if (next_block->offset >= end) {
1336 next = MIN(next, next_block->offset);
1339 if (next - end >= size && next - end < mingap) {
1341 mingap = next - end;
1345 if (offset == RAM_ADDR_MAX) {
1346 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1354 ram_addr_t last_ram_offset(void)
1357 ram_addr_t last = 0;
1360 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1361 last = MAX(last, block->offset + block->max_length);
1367 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1371 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1372 if (!machine_dump_guest_core(current_machine)) {
1373 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1375 perror("qemu_madvise");
1376 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1377 "but dump_guest_core=off specified\n");
1382 const char *qemu_ram_get_idstr(RAMBlock *rb)
1387 /* Called with iothread lock held. */
1388 void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
1393 assert(!new_block->idstr[0]);
1396 char *id = qdev_get_dev_path(dev);
1398 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1402 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1405 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1406 if (block != new_block &&
1407 !strcmp(block->idstr, new_block->idstr)) {
1408 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1416 /* Called with iothread lock held. */
1417 void qemu_ram_unset_idstr(RAMBlock *block)
1419 /* FIXME: arch_init.c assumes that this is not called throughout
1420 * migration. Ignore the problem since hot-unplug during migration
1421 * does not work anyway.
1424 memset(block->idstr, 0, sizeof(block->idstr));
1428 size_t qemu_ram_pagesize(RAMBlock *rb)
1430 return rb->page_size;
1433 static int memory_try_enable_merging(void *addr, size_t len)
1435 if (!machine_mem_merge(current_machine)) {
1436 /* disabled by the user */
1440 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1443 /* Only legal before guest might have detected the memory size: e.g. on
1444 * incoming migration, or right after reset.
1446 * As memory core doesn't know how is memory accessed, it is up to
1447 * resize callback to update device state and/or add assertions to detect
1448 * misuse, if necessary.
1450 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp)
1454 newsize = HOST_PAGE_ALIGN(newsize);
1456 if (block->used_length == newsize) {
1460 if (!(block->flags & RAM_RESIZEABLE)) {
1461 error_setg_errno(errp, EINVAL,
1462 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1463 " in != 0x" RAM_ADDR_FMT, block->idstr,
1464 newsize, block->used_length);
1468 if (block->max_length < newsize) {
1469 error_setg_errno(errp, EINVAL,
1470 "Length too large: %s: 0x" RAM_ADDR_FMT
1471 " > 0x" RAM_ADDR_FMT, block->idstr,
1472 newsize, block->max_length);
1476 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1477 block->used_length = newsize;
1478 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1480 memory_region_set_size(block->mr, newsize);
1481 if (block->resized) {
1482 block->resized(block->idstr, newsize, block->host);
1487 /* Called with ram_list.mutex held */
1488 static void dirty_memory_extend(ram_addr_t old_ram_size,
1489 ram_addr_t new_ram_size)
1491 ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
1492 DIRTY_MEMORY_BLOCK_SIZE);
1493 ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
1494 DIRTY_MEMORY_BLOCK_SIZE);
1497 /* Only need to extend if block count increased */
1498 if (new_num_blocks <= old_num_blocks) {
1502 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1503 DirtyMemoryBlocks *old_blocks;
1504 DirtyMemoryBlocks *new_blocks;
1507 old_blocks = atomic_rcu_read(&ram_list.dirty_memory[i]);
1508 new_blocks = g_malloc(sizeof(*new_blocks) +
1509 sizeof(new_blocks->blocks[0]) * new_num_blocks);
1511 if (old_num_blocks) {
1512 memcpy(new_blocks->blocks, old_blocks->blocks,
1513 old_num_blocks * sizeof(old_blocks->blocks[0]));
1516 for (j = old_num_blocks; j < new_num_blocks; j++) {
1517 new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
1520 atomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
1523 g_free_rcu(old_blocks, rcu);
1528 static void ram_block_add(RAMBlock *new_block, Error **errp)
1531 RAMBlock *last_block = NULL;
1532 ram_addr_t old_ram_size, new_ram_size;
1535 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1537 qemu_mutex_lock_ramlist();
1538 new_block->offset = find_ram_offset(new_block->max_length);
1540 if (!new_block->host) {
1541 if (xen_enabled()) {
1542 xen_ram_alloc(new_block->offset, new_block->max_length,
1543 new_block->mr, &err);
1545 error_propagate(errp, err);
1546 qemu_mutex_unlock_ramlist();
1550 new_block->host = phys_mem_alloc(new_block->max_length,
1551 &new_block->mr->align);
1552 if (!new_block->host) {
1553 error_setg_errno(errp, errno,
1554 "cannot set up guest memory '%s'",
1555 memory_region_name(new_block->mr));
1556 qemu_mutex_unlock_ramlist();
1559 memory_try_enable_merging(new_block->host, new_block->max_length);
1563 new_ram_size = MAX(old_ram_size,
1564 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1565 if (new_ram_size > old_ram_size) {
1566 migration_bitmap_extend(old_ram_size, new_ram_size);
1567 dirty_memory_extend(old_ram_size, new_ram_size);
1569 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1570 * QLIST (which has an RCU-friendly variant) does not have insertion at
1571 * tail, so save the last element in last_block.
1573 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1575 if (block->max_length < new_block->max_length) {
1580 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1581 } else if (last_block) {
1582 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1583 } else { /* list is empty */
1584 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1586 ram_list.mru_block = NULL;
1588 /* Write list before version */
1591 qemu_mutex_unlock_ramlist();
1593 cpu_physical_memory_set_dirty_range(new_block->offset,
1594 new_block->used_length,
1597 if (new_block->host) {
1598 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1599 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1600 /* MADV_DONTFORK is also needed by KVM in absence of synchronous MMU */
1601 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1606 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1607 bool share, const char *mem_path,
1610 RAMBlock *new_block;
1611 Error *local_err = NULL;
1613 if (xen_enabled()) {
1614 error_setg(errp, "-mem-path not supported with Xen");
1618 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1620 * file_ram_alloc() needs to allocate just like
1621 * phys_mem_alloc, but we haven't bothered to provide
1625 "-mem-path not supported with this accelerator");
1629 size = HOST_PAGE_ALIGN(size);
1630 new_block = g_malloc0(sizeof(*new_block));
1632 new_block->used_length = size;
1633 new_block->max_length = size;
1634 new_block->flags = share ? RAM_SHARED : 0;
1635 new_block->host = file_ram_alloc(new_block, size,
1637 if (!new_block->host) {
1642 ram_block_add(new_block, &local_err);
1645 error_propagate(errp, local_err);
1653 RAMBlock *qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1654 void (*resized)(const char*,
1657 void *host, bool resizeable,
1658 MemoryRegion *mr, Error **errp)
1660 RAMBlock *new_block;
1661 Error *local_err = NULL;
1663 size = HOST_PAGE_ALIGN(size);
1664 max_size = HOST_PAGE_ALIGN(max_size);
1665 new_block = g_malloc0(sizeof(*new_block));
1667 new_block->resized = resized;
1668 new_block->used_length = size;
1669 new_block->max_length = max_size;
1670 assert(max_size >= size);
1672 new_block->page_size = getpagesize();
1673 new_block->host = host;
1675 new_block->flags |= RAM_PREALLOC;
1678 new_block->flags |= RAM_RESIZEABLE;
1680 ram_block_add(new_block, &local_err);
1683 error_propagate(errp, local_err);
1689 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1690 MemoryRegion *mr, Error **errp)
1692 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1695 RAMBlock *qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1697 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1700 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1701 void (*resized)(const char*,
1704 MemoryRegion *mr, Error **errp)
1706 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1709 static void reclaim_ramblock(RAMBlock *block)
1711 if (block->flags & RAM_PREALLOC) {
1713 } else if (xen_enabled()) {
1714 xen_invalidate_map_cache_entry(block->host);
1716 } else if (block->fd >= 0) {
1717 qemu_ram_munmap(block->host, block->max_length);
1721 qemu_anon_ram_free(block->host, block->max_length);
1726 void qemu_ram_free(RAMBlock *block)
1732 qemu_mutex_lock_ramlist();
1733 QLIST_REMOVE_RCU(block, next);
1734 ram_list.mru_block = NULL;
1735 /* Write list before version */
1738 call_rcu(block, reclaim_ramblock, rcu);
1739 qemu_mutex_unlock_ramlist();
1743 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1750 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1751 offset = addr - block->offset;
1752 if (offset < block->max_length) {
1753 vaddr = ramblock_ptr(block, offset);
1754 if (block->flags & RAM_PREALLOC) {
1756 } else if (xen_enabled()) {
1760 if (block->fd >= 0) {
1761 flags |= (block->flags & RAM_SHARED ?
1762 MAP_SHARED : MAP_PRIVATE);
1763 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1764 flags, block->fd, offset);
1767 * Remap needs to match alloc. Accelerators that
1768 * set phys_mem_alloc never remap. If they did,
1769 * we'd need a remap hook here.
1771 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1773 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1774 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1777 if (area != vaddr) {
1778 fprintf(stderr, "Could not remap addr: "
1779 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1783 memory_try_enable_merging(vaddr, length);
1784 qemu_ram_setup_dump(vaddr, length);
1789 #endif /* !_WIN32 */
1791 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1792 * This should not be used for general purpose DMA. Use address_space_map
1793 * or address_space_rw instead. For local memory (e.g. video ram) that the
1794 * device owns, use memory_region_get_ram_ptr.
1796 * Called within RCU critical section.
1798 void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t addr)
1800 RAMBlock *block = ram_block;
1802 if (block == NULL) {
1803 block = qemu_get_ram_block(addr);
1804 addr -= block->offset;
1807 if (xen_enabled() && block->host == NULL) {
1808 /* We need to check if the requested address is in the RAM
1809 * because we don't want to map the entire memory in QEMU.
1810 * In that case just map until the end of the page.
1812 if (block->offset == 0) {
1813 return xen_map_cache(addr, 0, 0);
1816 block->host = xen_map_cache(block->offset, block->max_length, 1);
1818 return ramblock_ptr(block, addr);
1821 /* Return a host pointer to guest's ram. Similar to qemu_map_ram_ptr
1822 * but takes a size argument.
1824 * Called within RCU critical section.
1826 static void *qemu_ram_ptr_length(RAMBlock *ram_block, ram_addr_t addr,
1829 RAMBlock *block = ram_block;
1834 if (block == NULL) {
1835 block = qemu_get_ram_block(addr);
1836 addr -= block->offset;
1838 *size = MIN(*size, block->max_length - addr);
1840 if (xen_enabled() && block->host == NULL) {
1841 /* We need to check if the requested address is in the RAM
1842 * because we don't want to map the entire memory in QEMU.
1843 * In that case just map the requested area.
1845 if (block->offset == 0) {
1846 return xen_map_cache(addr, *size, 1);
1849 block->host = xen_map_cache(block->offset, block->max_length, 1);
1852 return ramblock_ptr(block, addr);
1856 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1859 * ptr: Host pointer to look up
1860 * round_offset: If true round the result offset down to a page boundary
1861 * *ram_addr: set to result ram_addr
1862 * *offset: set to result offset within the RAMBlock
1864 * Returns: RAMBlock (or NULL if not found)
1866 * By the time this function returns, the returned pointer is not protected
1867 * by RCU anymore. If the caller is not within an RCU critical section and
1868 * does not hold the iothread lock, it must have other means of protecting the
1869 * pointer, such as a reference to the region that includes the incoming
1872 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1876 uint8_t *host = ptr;
1878 if (xen_enabled()) {
1879 ram_addr_t ram_addr;
1881 ram_addr = xen_ram_addr_from_mapcache(ptr);
1882 block = qemu_get_ram_block(ram_addr);
1884 *offset = ram_addr - block->offset;
1891 block = atomic_rcu_read(&ram_list.mru_block);
1892 if (block && block->host && host - block->host < block->max_length) {
1896 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1897 /* This case append when the block is not mapped. */
1898 if (block->host == NULL) {
1901 if (host - block->host < block->max_length) {
1910 *offset = (host - block->host);
1912 *offset &= TARGET_PAGE_MASK;
1919 * Finds the named RAMBlock
1921 * name: The name of RAMBlock to find
1923 * Returns: RAMBlock (or NULL if not found)
1925 RAMBlock *qemu_ram_block_by_name(const char *name)
1929 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1930 if (!strcmp(name, block->idstr)) {
1938 /* Some of the softmmu routines need to translate from a host pointer
1939 (typically a TLB entry) back to a ram offset. */
1940 ram_addr_t qemu_ram_addr_from_host(void *ptr)
1945 block = qemu_ram_block_from_host(ptr, false, &offset);
1947 return RAM_ADDR_INVALID;
1950 return block->offset + offset;
1953 /* Called within RCU critical section. */
1954 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1955 uint64_t val, unsigned size)
1957 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1958 tb_invalidate_phys_page_fast(ram_addr, size);
1962 stb_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1965 stw_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1968 stl_p(qemu_map_ram_ptr(NULL, ram_addr), val);
1973 /* Set both VGA and migration bits for simplicity and to remove
1974 * the notdirty callback faster.
1976 cpu_physical_memory_set_dirty_range(ram_addr, size,
1977 DIRTY_CLIENTS_NOCODE);
1978 /* we remove the notdirty callback only if the code has been
1980 if (!cpu_physical_memory_is_clean(ram_addr)) {
1981 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1985 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1986 unsigned size, bool is_write)
1991 static const MemoryRegionOps notdirty_mem_ops = {
1992 .write = notdirty_mem_write,
1993 .valid.accepts = notdirty_mem_accepts,
1994 .endianness = DEVICE_NATIVE_ENDIAN,
1997 /* Generate a debug exception if a watchpoint has been hit. */
1998 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2000 CPUState *cpu = current_cpu;
2001 CPUClass *cc = CPU_GET_CLASS(cpu);
2002 CPUArchState *env = cpu->env_ptr;
2003 target_ulong pc, cs_base;
2008 if (cpu->watchpoint_hit) {
2009 /* We re-entered the check after replacing the TB. Now raise
2010 * the debug interrupt so that is will trigger after the
2011 * current instruction. */
2012 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2015 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2016 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2017 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2018 && (wp->flags & flags)) {
2019 if (flags == BP_MEM_READ) {
2020 wp->flags |= BP_WATCHPOINT_HIT_READ;
2022 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2024 wp->hitaddr = vaddr;
2025 wp->hitattrs = attrs;
2026 if (!cpu->watchpoint_hit) {
2027 if (wp->flags & BP_CPU &&
2028 !cc->debug_check_watchpoint(cpu, wp)) {
2029 wp->flags &= ~BP_WATCHPOINT_HIT;
2032 cpu->watchpoint_hit = wp;
2033 tb_check_watchpoint(cpu);
2034 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2035 cpu->exception_index = EXCP_DEBUG;
2038 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2039 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2040 cpu_loop_exit_noexc(cpu);
2044 wp->flags &= ~BP_WATCHPOINT_HIT;
2049 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2050 so these check for a hit then pass through to the normal out-of-line
2052 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2053 unsigned size, MemTxAttrs attrs)
2057 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2058 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2060 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2063 data = address_space_ldub(as, addr, attrs, &res);
2066 data = address_space_lduw(as, addr, attrs, &res);
2069 data = address_space_ldl(as, addr, attrs, &res);
2077 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2078 uint64_t val, unsigned size,
2082 int asidx = cpu_asidx_from_attrs(current_cpu, attrs);
2083 AddressSpace *as = current_cpu->cpu_ases[asidx].as;
2085 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2088 address_space_stb(as, addr, val, attrs, &res);
2091 address_space_stw(as, addr, val, attrs, &res);
2094 address_space_stl(as, addr, val, attrs, &res);
2101 static const MemoryRegionOps watch_mem_ops = {
2102 .read_with_attrs = watch_mem_read,
2103 .write_with_attrs = watch_mem_write,
2104 .endianness = DEVICE_NATIVE_ENDIAN,
2107 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2108 unsigned len, MemTxAttrs attrs)
2110 subpage_t *subpage = opaque;
2114 #if defined(DEBUG_SUBPAGE)
2115 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2116 subpage, len, addr);
2118 res = address_space_read(subpage->as, addr + subpage->base,
2125 *data = ldub_p(buf);
2128 *data = lduw_p(buf);
2141 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2142 uint64_t value, unsigned len, MemTxAttrs attrs)
2144 subpage_t *subpage = opaque;
2147 #if defined(DEBUG_SUBPAGE)
2148 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2149 " value %"PRIx64"\n",
2150 __func__, subpage, len, addr, value);
2168 return address_space_write(subpage->as, addr + subpage->base,
2172 static bool subpage_accepts(void *opaque, hwaddr addr,
2173 unsigned len, bool is_write)
2175 subpage_t *subpage = opaque;
2176 #if defined(DEBUG_SUBPAGE)
2177 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2178 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2181 return address_space_access_valid(subpage->as, addr + subpage->base,
2185 static const MemoryRegionOps subpage_ops = {
2186 .read_with_attrs = subpage_read,
2187 .write_with_attrs = subpage_write,
2188 .impl.min_access_size = 1,
2189 .impl.max_access_size = 8,
2190 .valid.min_access_size = 1,
2191 .valid.max_access_size = 8,
2192 .valid.accepts = subpage_accepts,
2193 .endianness = DEVICE_NATIVE_ENDIAN,
2196 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2201 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2203 idx = SUBPAGE_IDX(start);
2204 eidx = SUBPAGE_IDX(end);
2205 #if defined(DEBUG_SUBPAGE)
2206 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2207 __func__, mmio, start, end, idx, eidx, section);
2209 for (; idx <= eidx; idx++) {
2210 mmio->sub_section[idx] = section;
2216 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2220 mmio = g_malloc0(sizeof(subpage_t));
2224 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2225 NULL, TARGET_PAGE_SIZE);
2226 mmio->iomem.subpage = true;
2227 #if defined(DEBUG_SUBPAGE)
2228 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2229 mmio, base, TARGET_PAGE_SIZE);
2231 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2236 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2240 MemoryRegionSection section = {
2241 .address_space = as,
2243 .offset_within_address_space = 0,
2244 .offset_within_region = 0,
2245 .size = int128_2_64(),
2248 return phys_section_add(map, §ion);
2251 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2253 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2254 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2255 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2256 MemoryRegionSection *sections = d->map.sections;
2258 return sections[index & ~TARGET_PAGE_MASK].mr;
2261 static void io_mem_init(void)
2263 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2264 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2266 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2268 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2272 static void mem_begin(MemoryListener *listener)
2274 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2275 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2278 n = dummy_section(&d->map, as, &io_mem_unassigned);
2279 assert(n == PHYS_SECTION_UNASSIGNED);
2280 n = dummy_section(&d->map, as, &io_mem_notdirty);
2281 assert(n == PHYS_SECTION_NOTDIRTY);
2282 n = dummy_section(&d->map, as, &io_mem_rom);
2283 assert(n == PHYS_SECTION_ROM);
2284 n = dummy_section(&d->map, as, &io_mem_watch);
2285 assert(n == PHYS_SECTION_WATCH);
2287 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2289 as->next_dispatch = d;
2292 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2294 phys_sections_free(&d->map);
2298 static void mem_commit(MemoryListener *listener)
2300 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2301 AddressSpaceDispatch *cur = as->dispatch;
2302 AddressSpaceDispatch *next = as->next_dispatch;
2304 phys_page_compact_all(next, next->map.nodes_nb);
2306 atomic_rcu_set(&as->dispatch, next);
2308 call_rcu(cur, address_space_dispatch_free, rcu);
2312 static void tcg_commit(MemoryListener *listener)
2314 CPUAddressSpace *cpuas;
2315 AddressSpaceDispatch *d;
2317 /* since each CPU stores ram addresses in its TLB cache, we must
2318 reset the modified entries */
2319 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2320 cpu_reloading_memory_map();
2321 /* The CPU and TLB are protected by the iothread lock.
2322 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2323 * may have split the RCU critical section.
2325 d = atomic_rcu_read(&cpuas->as->dispatch);
2326 cpuas->memory_dispatch = d;
2327 tlb_flush(cpuas->cpu, 1);
2330 void address_space_init_dispatch(AddressSpace *as)
2332 as->dispatch = NULL;
2333 as->dispatch_listener = (MemoryListener) {
2335 .commit = mem_commit,
2336 .region_add = mem_add,
2337 .region_nop = mem_add,
2340 memory_listener_register(&as->dispatch_listener, as);
2343 void address_space_unregister(AddressSpace *as)
2345 memory_listener_unregister(&as->dispatch_listener);
2348 void address_space_destroy_dispatch(AddressSpace *as)
2350 AddressSpaceDispatch *d = as->dispatch;
2352 atomic_rcu_set(&as->dispatch, NULL);
2354 call_rcu(d, address_space_dispatch_free, rcu);
2358 static void memory_map_init(void)
2360 system_memory = g_malloc(sizeof(*system_memory));
2362 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2363 address_space_init(&address_space_memory, system_memory, "memory");
2365 system_io = g_malloc(sizeof(*system_io));
2366 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2368 address_space_init(&address_space_io, system_io, "I/O");
2371 MemoryRegion *get_system_memory(void)
2373 return system_memory;
2376 MemoryRegion *get_system_io(void)
2381 #endif /* !defined(CONFIG_USER_ONLY) */
2383 /* physical memory access (slow version, mainly for debug) */
2384 #if defined(CONFIG_USER_ONLY)
2385 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2386 uint8_t *buf, int len, int is_write)
2393 page = addr & TARGET_PAGE_MASK;
2394 l = (page + TARGET_PAGE_SIZE) - addr;
2397 flags = page_get_flags(page);
2398 if (!(flags & PAGE_VALID))
2401 if (!(flags & PAGE_WRITE))
2403 /* XXX: this code should not depend on lock_user */
2404 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2407 unlock_user(p, addr, l);
2409 if (!(flags & PAGE_READ))
2411 /* XXX: this code should not depend on lock_user */
2412 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2415 unlock_user(p, addr, 0);
2426 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2429 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2430 addr += memory_region_get_ram_addr(mr);
2432 /* No early return if dirty_log_mask is or becomes 0, because
2433 * cpu_physical_memory_set_dirty_range will still call
2434 * xen_modified_memory.
2436 if (dirty_log_mask) {
2438 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2440 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2441 tb_invalidate_phys_range(addr, addr + length);
2442 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2444 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2447 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2449 unsigned access_size_max = mr->ops->valid.max_access_size;
2451 /* Regions are assumed to support 1-4 byte accesses unless
2452 otherwise specified. */
2453 if (access_size_max == 0) {
2454 access_size_max = 4;
2457 /* Bound the maximum access by the alignment of the address. */
2458 if (!mr->ops->impl.unaligned) {
2459 unsigned align_size_max = addr & -addr;
2460 if (align_size_max != 0 && align_size_max < access_size_max) {
2461 access_size_max = align_size_max;
2465 /* Don't attempt accesses larger than the maximum. */
2466 if (l > access_size_max) {
2467 l = access_size_max;
2474 static bool prepare_mmio_access(MemoryRegion *mr)
2476 bool unlocked = !qemu_mutex_iothread_locked();
2477 bool release_lock = false;
2479 if (unlocked && mr->global_locking) {
2480 qemu_mutex_lock_iothread();
2482 release_lock = true;
2484 if (mr->flush_coalesced_mmio) {
2486 qemu_mutex_lock_iothread();
2488 qemu_flush_coalesced_mmio_buffer();
2490 qemu_mutex_unlock_iothread();
2494 return release_lock;
2497 /* Called within RCU critical section. */
2498 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2501 int len, hwaddr addr1,
2502 hwaddr l, MemoryRegion *mr)
2506 MemTxResult result = MEMTX_OK;
2507 bool release_lock = false;
2510 if (!memory_access_is_direct(mr, true)) {
2511 release_lock |= prepare_mmio_access(mr);
2512 l = memory_access_size(mr, l, addr1);
2513 /* XXX: could force current_cpu to NULL to avoid
2517 /* 64 bit write access */
2519 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2523 /* 32 bit write access */
2525 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2529 /* 16 bit write access */
2531 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2535 /* 8 bit write access */
2537 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2545 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2546 memcpy(ptr, buf, l);
2547 invalidate_and_set_dirty(mr, addr1, l);
2551 qemu_mutex_unlock_iothread();
2552 release_lock = false;
2564 mr = address_space_translate(as, addr, &addr1, &l, true);
2570 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2571 const uint8_t *buf, int len)
2576 MemTxResult result = MEMTX_OK;
2581 mr = address_space_translate(as, addr, &addr1, &l, true);
2582 result = address_space_write_continue(as, addr, attrs, buf, len,
2590 /* Called within RCU critical section. */
2591 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2592 MemTxAttrs attrs, uint8_t *buf,
2593 int len, hwaddr addr1, hwaddr l,
2598 MemTxResult result = MEMTX_OK;
2599 bool release_lock = false;
2602 if (!memory_access_is_direct(mr, false)) {
2604 release_lock |= prepare_mmio_access(mr);
2605 l = memory_access_size(mr, l, addr1);
2608 /* 64 bit read access */
2609 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2614 /* 32 bit read access */
2615 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2620 /* 16 bit read access */
2621 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2626 /* 8 bit read access */
2627 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2636 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2637 memcpy(buf, ptr, l);
2641 qemu_mutex_unlock_iothread();
2642 release_lock = false;
2654 mr = address_space_translate(as, addr, &addr1, &l, false);
2660 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2661 MemTxAttrs attrs, uint8_t *buf, int len)
2666 MemTxResult result = MEMTX_OK;
2671 mr = address_space_translate(as, addr, &addr1, &l, false);
2672 result = address_space_read_continue(as, addr, attrs, buf, len,
2680 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2681 uint8_t *buf, int len, bool is_write)
2684 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2686 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2690 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2691 int len, int is_write)
2693 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2694 buf, len, is_write);
2697 enum write_rom_type {
2702 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2703 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2713 mr = address_space_translate(as, addr, &addr1, &l, true);
2715 if (!(memory_region_is_ram(mr) ||
2716 memory_region_is_romd(mr))) {
2717 l = memory_access_size(mr, l, addr1);
2720 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
2723 memcpy(ptr, buf, l);
2724 invalidate_and_set_dirty(mr, addr1, l);
2727 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2738 /* used for ROM loading : can write in RAM and ROM */
2739 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2740 const uint8_t *buf, int len)
2742 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2745 void cpu_flush_icache_range(hwaddr start, int len)
2748 * This function should do the same thing as an icache flush that was
2749 * triggered from within the guest. For TCG we are always cache coherent,
2750 * so there is no need to flush anything. For KVM / Xen we need to flush
2751 * the host's instruction cache at least.
2753 if (tcg_enabled()) {
2757 cpu_physical_memory_write_rom_internal(&address_space_memory,
2758 start, NULL, len, FLUSH_CACHE);
2769 static BounceBuffer bounce;
2771 typedef struct MapClient {
2773 QLIST_ENTRY(MapClient) link;
2776 QemuMutex map_client_list_lock;
2777 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2778 = QLIST_HEAD_INITIALIZER(map_client_list);
2780 static void cpu_unregister_map_client_do(MapClient *client)
2782 QLIST_REMOVE(client, link);
2786 static void cpu_notify_map_clients_locked(void)
2790 while (!QLIST_EMPTY(&map_client_list)) {
2791 client = QLIST_FIRST(&map_client_list);
2792 qemu_bh_schedule(client->bh);
2793 cpu_unregister_map_client_do(client);
2797 void cpu_register_map_client(QEMUBH *bh)
2799 MapClient *client = g_malloc(sizeof(*client));
2801 qemu_mutex_lock(&map_client_list_lock);
2803 QLIST_INSERT_HEAD(&map_client_list, client, link);
2804 if (!atomic_read(&bounce.in_use)) {
2805 cpu_notify_map_clients_locked();
2807 qemu_mutex_unlock(&map_client_list_lock);
2810 void cpu_exec_init_all(void)
2812 qemu_mutex_init(&ram_list.mutex);
2815 qemu_mutex_init(&map_client_list_lock);
2818 void cpu_unregister_map_client(QEMUBH *bh)
2822 qemu_mutex_lock(&map_client_list_lock);
2823 QLIST_FOREACH(client, &map_client_list, link) {
2824 if (client->bh == bh) {
2825 cpu_unregister_map_client_do(client);
2829 qemu_mutex_unlock(&map_client_list_lock);
2832 static void cpu_notify_map_clients(void)
2834 qemu_mutex_lock(&map_client_list_lock);
2835 cpu_notify_map_clients_locked();
2836 qemu_mutex_unlock(&map_client_list_lock);
2839 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2847 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2848 if (!memory_access_is_direct(mr, is_write)) {
2849 l = memory_access_size(mr, l, addr);
2850 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2862 /* Map a physical memory region into a host virtual address.
2863 * May map a subset of the requested range, given by and returned in *plen.
2864 * May return NULL if resources needed to perform the mapping are exhausted.
2865 * Use only for reads OR writes - not for read-modify-write operations.
2866 * Use cpu_register_map_client() to know when retrying the map operation is
2867 * likely to succeed.
2869 void *address_space_map(AddressSpace *as,
2876 hwaddr l, xlat, base;
2877 MemoryRegion *mr, *this_mr;
2886 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2888 if (!memory_access_is_direct(mr, is_write)) {
2889 if (atomic_xchg(&bounce.in_use, true)) {
2893 /* Avoid unbounded allocations */
2894 l = MIN(l, TARGET_PAGE_SIZE);
2895 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2899 memory_region_ref(mr);
2902 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2908 return bounce.buffer;
2922 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2923 if (this_mr != mr || xlat != base + done) {
2928 memory_region_ref(mr);
2930 ptr = qemu_ram_ptr_length(mr->ram_block, base, plen);
2936 /* Unmaps a memory region previously mapped by address_space_map().
2937 * Will also mark the memory as dirty if is_write == 1. access_len gives
2938 * the amount of memory that was actually read or written by the caller.
2940 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2941 int is_write, hwaddr access_len)
2943 if (buffer != bounce.buffer) {
2947 mr = memory_region_from_host(buffer, &addr1);
2950 invalidate_and_set_dirty(mr, addr1, access_len);
2952 if (xen_enabled()) {
2953 xen_invalidate_map_cache_entry(buffer);
2955 memory_region_unref(mr);
2959 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2960 bounce.buffer, access_len);
2962 qemu_vfree(bounce.buffer);
2963 bounce.buffer = NULL;
2964 memory_region_unref(bounce.mr);
2965 atomic_mb_set(&bounce.in_use, false);
2966 cpu_notify_map_clients();
2969 void *cpu_physical_memory_map(hwaddr addr,
2973 return address_space_map(&address_space_memory, addr, plen, is_write);
2976 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2977 int is_write, hwaddr access_len)
2979 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2982 /* warning: addr must be aligned */
2983 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2985 MemTxResult *result,
2986 enum device_endian endian)
2994 bool release_lock = false;
2997 mr = address_space_translate(as, addr, &addr1, &l, false);
2998 if (l < 4 || !memory_access_is_direct(mr, false)) {
2999 release_lock |= prepare_mmio_access(mr);
3002 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3003 #if defined(TARGET_WORDS_BIGENDIAN)
3004 if (endian == DEVICE_LITTLE_ENDIAN) {
3008 if (endian == DEVICE_BIG_ENDIAN) {
3014 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3016 case DEVICE_LITTLE_ENDIAN:
3017 val = ldl_le_p(ptr);
3019 case DEVICE_BIG_ENDIAN:
3020 val = ldl_be_p(ptr);
3032 qemu_mutex_unlock_iothread();
3038 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3039 MemTxAttrs attrs, MemTxResult *result)
3041 return address_space_ldl_internal(as, addr, attrs, result,
3042 DEVICE_NATIVE_ENDIAN);
3045 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3046 MemTxAttrs attrs, MemTxResult *result)
3048 return address_space_ldl_internal(as, addr, attrs, result,
3049 DEVICE_LITTLE_ENDIAN);
3052 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3053 MemTxAttrs attrs, MemTxResult *result)
3055 return address_space_ldl_internal(as, addr, attrs, result,
3059 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3061 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3064 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3066 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3069 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3071 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3074 /* warning: addr must be aligned */
3075 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3077 MemTxResult *result,
3078 enum device_endian endian)
3086 bool release_lock = false;
3089 mr = address_space_translate(as, addr, &addr1, &l,
3091 if (l < 8 || !memory_access_is_direct(mr, false)) {
3092 release_lock |= prepare_mmio_access(mr);
3095 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3096 #if defined(TARGET_WORDS_BIGENDIAN)
3097 if (endian == DEVICE_LITTLE_ENDIAN) {
3101 if (endian == DEVICE_BIG_ENDIAN) {
3107 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3109 case DEVICE_LITTLE_ENDIAN:
3110 val = ldq_le_p(ptr);
3112 case DEVICE_BIG_ENDIAN:
3113 val = ldq_be_p(ptr);
3125 qemu_mutex_unlock_iothread();
3131 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3132 MemTxAttrs attrs, MemTxResult *result)
3134 return address_space_ldq_internal(as, addr, attrs, result,
3135 DEVICE_NATIVE_ENDIAN);
3138 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3139 MemTxAttrs attrs, MemTxResult *result)
3141 return address_space_ldq_internal(as, addr, attrs, result,
3142 DEVICE_LITTLE_ENDIAN);
3145 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3146 MemTxAttrs attrs, MemTxResult *result)
3148 return address_space_ldq_internal(as, addr, attrs, result,
3152 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3154 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3157 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3159 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3162 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3164 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3168 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3169 MemTxAttrs attrs, MemTxResult *result)
3174 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3181 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3183 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3186 /* warning: addr must be aligned */
3187 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3190 MemTxResult *result,
3191 enum device_endian endian)
3199 bool release_lock = false;
3202 mr = address_space_translate(as, addr, &addr1, &l,
3204 if (l < 2 || !memory_access_is_direct(mr, false)) {
3205 release_lock |= prepare_mmio_access(mr);
3208 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3209 #if defined(TARGET_WORDS_BIGENDIAN)
3210 if (endian == DEVICE_LITTLE_ENDIAN) {
3214 if (endian == DEVICE_BIG_ENDIAN) {
3220 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3222 case DEVICE_LITTLE_ENDIAN:
3223 val = lduw_le_p(ptr);
3225 case DEVICE_BIG_ENDIAN:
3226 val = lduw_be_p(ptr);
3238 qemu_mutex_unlock_iothread();
3244 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3245 MemTxAttrs attrs, MemTxResult *result)
3247 return address_space_lduw_internal(as, addr, attrs, result,
3248 DEVICE_NATIVE_ENDIAN);
3251 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3252 MemTxAttrs attrs, MemTxResult *result)
3254 return address_space_lduw_internal(as, addr, attrs, result,
3255 DEVICE_LITTLE_ENDIAN);
3258 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3259 MemTxAttrs attrs, MemTxResult *result)
3261 return address_space_lduw_internal(as, addr, attrs, result,
3265 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3267 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3270 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3272 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3275 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3277 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3280 /* warning: addr must be aligned. The ram page is not masked as dirty
3281 and the code inside is not invalidated. It is useful if the dirty
3282 bits are used to track modified PTEs */
3283 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3284 MemTxAttrs attrs, MemTxResult *result)
3291 uint8_t dirty_log_mask;
3292 bool release_lock = false;
3295 mr = address_space_translate(as, addr, &addr1, &l,
3297 if (l < 4 || !memory_access_is_direct(mr, true)) {
3298 release_lock |= prepare_mmio_access(mr);
3300 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3302 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3305 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3306 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3307 cpu_physical_memory_set_dirty_range(memory_region_get_ram_addr(mr) + addr,
3315 qemu_mutex_unlock_iothread();
3320 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3322 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3325 /* warning: addr must be aligned */
3326 static inline void address_space_stl_internal(AddressSpace *as,
3327 hwaddr addr, uint32_t val,
3329 MemTxResult *result,
3330 enum device_endian endian)
3337 bool release_lock = false;
3340 mr = address_space_translate(as, addr, &addr1, &l,
3342 if (l < 4 || !memory_access_is_direct(mr, true)) {
3343 release_lock |= prepare_mmio_access(mr);
3345 #if defined(TARGET_WORDS_BIGENDIAN)
3346 if (endian == DEVICE_LITTLE_ENDIAN) {
3350 if (endian == DEVICE_BIG_ENDIAN) {
3354 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3357 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3359 case DEVICE_LITTLE_ENDIAN:
3362 case DEVICE_BIG_ENDIAN:
3369 invalidate_and_set_dirty(mr, addr1, 4);
3376 qemu_mutex_unlock_iothread();
3381 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3382 MemTxAttrs attrs, MemTxResult *result)
3384 address_space_stl_internal(as, addr, val, attrs, result,
3385 DEVICE_NATIVE_ENDIAN);
3388 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3389 MemTxAttrs attrs, MemTxResult *result)
3391 address_space_stl_internal(as, addr, val, attrs, result,
3392 DEVICE_LITTLE_ENDIAN);
3395 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3396 MemTxAttrs attrs, MemTxResult *result)
3398 address_space_stl_internal(as, addr, val, attrs, result,
3402 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3404 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3407 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3409 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3412 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3414 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3418 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3419 MemTxAttrs attrs, MemTxResult *result)
3424 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3430 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3432 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3435 /* warning: addr must be aligned */
3436 static inline void address_space_stw_internal(AddressSpace *as,
3437 hwaddr addr, uint32_t val,
3439 MemTxResult *result,
3440 enum device_endian endian)
3447 bool release_lock = false;
3450 mr = address_space_translate(as, addr, &addr1, &l, true);
3451 if (l < 2 || !memory_access_is_direct(mr, true)) {
3452 release_lock |= prepare_mmio_access(mr);
3454 #if defined(TARGET_WORDS_BIGENDIAN)
3455 if (endian == DEVICE_LITTLE_ENDIAN) {
3459 if (endian == DEVICE_BIG_ENDIAN) {
3463 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3466 ptr = qemu_map_ram_ptr(mr->ram_block, addr1);
3468 case DEVICE_LITTLE_ENDIAN:
3471 case DEVICE_BIG_ENDIAN:
3478 invalidate_and_set_dirty(mr, addr1, 2);
3485 qemu_mutex_unlock_iothread();
3490 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3491 MemTxAttrs attrs, MemTxResult *result)
3493 address_space_stw_internal(as, addr, val, attrs, result,
3494 DEVICE_NATIVE_ENDIAN);
3497 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3498 MemTxAttrs attrs, MemTxResult *result)
3500 address_space_stw_internal(as, addr, val, attrs, result,
3501 DEVICE_LITTLE_ENDIAN);
3504 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3505 MemTxAttrs attrs, MemTxResult *result)
3507 address_space_stw_internal(as, addr, val, attrs, result,
3511 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3513 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3516 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3518 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3521 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3523 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3527 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3528 MemTxAttrs attrs, MemTxResult *result)
3532 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3538 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3539 MemTxAttrs attrs, MemTxResult *result)
3542 val = cpu_to_le64(val);
3543 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3548 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3549 MemTxAttrs attrs, MemTxResult *result)
3552 val = cpu_to_be64(val);
3553 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3559 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3561 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3564 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3566 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3569 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3571 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3574 /* virtual memory access for debug (includes writing to ROM) */
3575 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3576 uint8_t *buf, int len, int is_write)
3586 page = addr & TARGET_PAGE_MASK;
3587 phys_addr = cpu_get_phys_page_attrs_debug(cpu, page, &attrs);
3588 asidx = cpu_asidx_from_attrs(cpu, attrs);
3589 /* if no physical page mapped, return an error */
3590 if (phys_addr == -1)
3592 l = (page + TARGET_PAGE_SIZE) - addr;
3595 phys_addr += (addr & ~TARGET_PAGE_MASK);
3597 cpu_physical_memory_write_rom(cpu->cpu_ases[asidx].as,
3600 address_space_rw(cpu->cpu_ases[asidx].as, phys_addr,
3601 MEMTXATTRS_UNSPECIFIED,
3612 * Allows code that needs to deal with migration bitmaps etc to still be built
3613 * target independent.
3615 size_t qemu_target_page_bits(void)
3617 return TARGET_PAGE_BITS;
3623 * A helper function for the _utterly broken_ virtio device model to find out if
3624 * it's running on a big endian machine. Don't do this at home kids!
3626 bool target_words_bigendian(void);
3627 bool target_words_bigendian(void)
3629 #if defined(TARGET_WORDS_BIGENDIAN)
3636 #ifndef CONFIG_USER_ONLY
3637 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3644 mr = address_space_translate(&address_space_memory,
3645 phys_addr, &phys_addr, &l, false);
3647 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3652 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3658 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3659 ret = func(block->idstr, block->host, block->offset,
3660 block->used_length, opaque);