4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
54 #include "exec/memory-internal.h"
55 #include "exec/ram_addr.h"
57 #include "qemu/range.h"
59 #include "qemu/mmap-alloc.h"
62 //#define DEBUG_SUBPAGE
64 #if !defined(CONFIG_USER_ONLY)
65 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
66 * are protected by the ramlist lock.
68 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
70 static MemoryRegion *system_memory;
71 static MemoryRegion *system_io;
73 AddressSpace address_space_io;
74 AddressSpace address_space_memory;
76 MemoryRegion io_mem_rom, io_mem_notdirty;
77 static MemoryRegion io_mem_unassigned;
79 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
80 #define RAM_PREALLOC (1 << 0)
82 /* RAM is mmap-ed with MAP_SHARED */
83 #define RAM_SHARED (1 << 1)
85 /* Only a portion of RAM (used_length) is actually used, and migrated.
86 * This used_length size can change across reboots.
88 #define RAM_RESIZEABLE (1 << 2)
90 /* RAM is backed by an mmapped file.
92 #define RAM_FILE (1 << 3)
95 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
96 /* current CPU in the current thread. It is only valid inside
98 __thread CPUState *current_cpu;
99 /* 0 = Do not count executed instructions.
100 1 = Precise instruction counting.
101 2 = Adaptive rate instruction counting. */
104 #if !defined(CONFIG_USER_ONLY)
106 typedef struct PhysPageEntry PhysPageEntry;
108 struct PhysPageEntry {
109 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
111 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
115 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
117 /* Size of the L2 (and L3, etc) page tables. */
118 #define ADDR_SPACE_BITS 64
121 #define P_L2_SIZE (1 << P_L2_BITS)
123 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
125 typedef PhysPageEntry Node[P_L2_SIZE];
127 typedef struct PhysPageMap {
130 unsigned sections_nb;
131 unsigned sections_nb_alloc;
133 unsigned nodes_nb_alloc;
135 MemoryRegionSection *sections;
138 struct AddressSpaceDispatch {
141 /* This is a multi-level map on the physical address space.
142 * The bottom level has pointers to MemoryRegionSections.
144 PhysPageEntry phys_map;
149 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
150 typedef struct subpage_t {
154 uint16_t sub_section[TARGET_PAGE_SIZE];
157 #define PHYS_SECTION_UNASSIGNED 0
158 #define PHYS_SECTION_NOTDIRTY 1
159 #define PHYS_SECTION_ROM 2
160 #define PHYS_SECTION_WATCH 3
162 static void io_mem_init(void);
163 static void memory_map_init(void);
164 static void tcg_commit(MemoryListener *listener);
166 static MemoryRegion io_mem_watch;
169 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
170 * @cpu: the CPU whose AddressSpace this is
171 * @as: the AddressSpace itself
172 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
173 * @tcg_as_listener: listener for tracking changes to the AddressSpace
175 struct CPUAddressSpace {
178 struct AddressSpaceDispatch *memory_dispatch;
179 MemoryListener tcg_as_listener;
184 #if !defined(CONFIG_USER_ONLY)
186 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
188 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
189 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
190 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
191 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
195 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
202 ret = map->nodes_nb++;
204 assert(ret != PHYS_MAP_NODE_NIL);
205 assert(ret != map->nodes_nb_alloc);
207 e.skip = leaf ? 0 : 1;
208 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
209 for (i = 0; i < P_L2_SIZE; ++i) {
210 memcpy(&p[i], &e, sizeof(e));
215 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
216 hwaddr *index, hwaddr *nb, uint16_t leaf,
220 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
222 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
223 lp->ptr = phys_map_node_alloc(map, level == 0);
225 p = map->nodes[lp->ptr];
226 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
228 while (*nb && lp < &p[P_L2_SIZE]) {
229 if ((*index & (step - 1)) == 0 && *nb >= step) {
235 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
241 static void phys_page_set(AddressSpaceDispatch *d,
242 hwaddr index, hwaddr nb,
245 /* Wildly overreserve - it doesn't matter much. */
246 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
248 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
251 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
252 * and update our entry so we can skip it and go directly to the destination.
254 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
256 unsigned valid_ptr = P_L2_SIZE;
261 if (lp->ptr == PHYS_MAP_NODE_NIL) {
266 for (i = 0; i < P_L2_SIZE; i++) {
267 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
274 phys_page_compact(&p[i], nodes, compacted);
278 /* We can only compress if there's only one child. */
283 assert(valid_ptr < P_L2_SIZE);
285 /* Don't compress if it won't fit in the # of bits we have. */
286 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
290 lp->ptr = p[valid_ptr].ptr;
291 if (!p[valid_ptr].skip) {
292 /* If our only child is a leaf, make this a leaf. */
293 /* By design, we should have made this node a leaf to begin with so we
294 * should never reach here.
295 * But since it's so simple to handle this, let's do it just in case we
300 lp->skip += p[valid_ptr].skip;
304 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
306 DECLARE_BITMAP(compacted, nodes_nb);
308 if (d->phys_map.skip) {
309 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
313 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
314 Node *nodes, MemoryRegionSection *sections)
317 hwaddr index = addr >> TARGET_PAGE_BITS;
320 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
321 if (lp.ptr == PHYS_MAP_NODE_NIL) {
322 return §ions[PHYS_SECTION_UNASSIGNED];
325 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
328 if (sections[lp.ptr].size.hi ||
329 range_covers_byte(sections[lp.ptr].offset_within_address_space,
330 sections[lp.ptr].size.lo, addr)) {
331 return §ions[lp.ptr];
333 return §ions[PHYS_SECTION_UNASSIGNED];
337 bool memory_region_is_unassigned(MemoryRegion *mr)
339 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
340 && mr != &io_mem_watch;
343 /* Called from RCU critical section */
344 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
346 bool resolve_subpage)
348 MemoryRegionSection *section;
351 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
352 if (resolve_subpage && section->mr->subpage) {
353 subpage = container_of(section->mr, subpage_t, iomem);
354 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
359 /* Called from RCU critical section */
360 static MemoryRegionSection *
361 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
362 hwaddr *plen, bool resolve_subpage)
364 MemoryRegionSection *section;
368 section = address_space_lookup_region(d, addr, resolve_subpage);
369 /* Compute offset within MemoryRegionSection */
370 addr -= section->offset_within_address_space;
372 /* Compute offset within MemoryRegion */
373 *xlat = addr + section->offset_within_region;
377 /* MMIO registers can be expected to perform full-width accesses based only
378 * on their address, without considering adjacent registers that could
379 * decode to completely different MemoryRegions. When such registers
380 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
381 * regions overlap wildly. For this reason we cannot clamp the accesses
384 * If the length is small (as is the case for address_space_ldl/stl),
385 * everything works fine. If the incoming length is large, however,
386 * the caller really has to do the clamping through memory_access_size.
388 if (memory_region_is_ram(mr)) {
389 diff = int128_sub(section->size, int128_make64(addr));
390 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
395 static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
397 if (memory_region_is_ram(mr)) {
398 return !(is_write && mr->readonly);
400 if (memory_region_is_romd(mr)) {
407 /* Called from RCU critical section */
408 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
409 hwaddr *xlat, hwaddr *plen,
413 MemoryRegionSection *section;
417 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
418 section = address_space_translate_internal(d, addr, &addr, plen, true);
421 if (!mr->iommu_ops) {
425 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
426 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
427 | (addr & iotlb.addr_mask));
428 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
429 if (!(iotlb.perm & (1 << is_write))) {
430 mr = &io_mem_unassigned;
434 as = iotlb.target_as;
437 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
438 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
439 *plen = MIN(page, *plen);
446 /* Called from RCU critical section */
447 MemoryRegionSection *
448 address_space_translate_for_iotlb(CPUState *cpu, hwaddr addr,
449 hwaddr *xlat, hwaddr *plen)
451 MemoryRegionSection *section;
452 section = address_space_translate_internal(cpu->cpu_ases[0].memory_dispatch,
453 addr, xlat, plen, false);
455 assert(!section->mr->iommu_ops);
460 #if !defined(CONFIG_USER_ONLY)
462 static int cpu_common_post_load(void *opaque, int version_id)
464 CPUState *cpu = opaque;
466 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
467 version_id is increased. */
468 cpu->interrupt_request &= ~0x01;
474 static int cpu_common_pre_load(void *opaque)
476 CPUState *cpu = opaque;
478 cpu->exception_index = -1;
483 static bool cpu_common_exception_index_needed(void *opaque)
485 CPUState *cpu = opaque;
487 return tcg_enabled() && cpu->exception_index != -1;
490 static const VMStateDescription vmstate_cpu_common_exception_index = {
491 .name = "cpu_common/exception_index",
493 .minimum_version_id = 1,
494 .needed = cpu_common_exception_index_needed,
495 .fields = (VMStateField[]) {
496 VMSTATE_INT32(exception_index, CPUState),
497 VMSTATE_END_OF_LIST()
501 static bool cpu_common_crash_occurred_needed(void *opaque)
503 CPUState *cpu = opaque;
505 return cpu->crash_occurred;
508 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
509 .name = "cpu_common/crash_occurred",
511 .minimum_version_id = 1,
512 .needed = cpu_common_crash_occurred_needed,
513 .fields = (VMStateField[]) {
514 VMSTATE_BOOL(crash_occurred, CPUState),
515 VMSTATE_END_OF_LIST()
519 const VMStateDescription vmstate_cpu_common = {
520 .name = "cpu_common",
522 .minimum_version_id = 1,
523 .pre_load = cpu_common_pre_load,
524 .post_load = cpu_common_post_load,
525 .fields = (VMStateField[]) {
526 VMSTATE_UINT32(halted, CPUState),
527 VMSTATE_UINT32(interrupt_request, CPUState),
528 VMSTATE_END_OF_LIST()
530 .subsections = (const VMStateDescription*[]) {
531 &vmstate_cpu_common_exception_index,
532 &vmstate_cpu_common_crash_occurred,
539 CPUState *qemu_get_cpu(int index)
544 if (cpu->cpu_index == index) {
552 #if !defined(CONFIG_USER_ONLY)
553 void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as)
555 /* We only support one address space per cpu at the moment. */
556 assert(cpu->as == as);
559 /* We've already registered the listener for our only AS */
563 cpu->cpu_ases = g_new0(CPUAddressSpace, 1);
564 cpu->cpu_ases[0].cpu = cpu;
565 cpu->cpu_ases[0].as = as;
566 cpu->cpu_ases[0].tcg_as_listener.commit = tcg_commit;
567 memory_listener_register(&cpu->cpu_ases[0].tcg_as_listener, as);
571 #ifndef CONFIG_USER_ONLY
572 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
574 static int cpu_get_free_index(Error **errp)
576 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
578 if (cpu >= MAX_CPUMASK_BITS) {
579 error_setg(errp, "Trying to use more CPUs than max of %d",
584 bitmap_set(cpu_index_map, cpu, 1);
588 void cpu_exec_exit(CPUState *cpu)
590 if (cpu->cpu_index == -1) {
591 /* cpu_index was never allocated by this @cpu or was already freed. */
595 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
600 static int cpu_get_free_index(Error **errp)
605 CPU_FOREACH(some_cpu) {
611 void cpu_exec_exit(CPUState *cpu)
616 void cpu_exec_init(CPUState *cpu, Error **errp)
618 CPUClass *cc = CPU_GET_CLASS(cpu);
620 Error *local_err = NULL;
622 #ifndef CONFIG_USER_ONLY
623 cpu->as = &address_space_memory;
624 cpu->thread_id = qemu_get_thread_id();
627 #if defined(CONFIG_USER_ONLY)
630 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
632 error_propagate(errp, local_err);
633 #if defined(CONFIG_USER_ONLY)
638 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
639 #if defined(CONFIG_USER_ONLY)
642 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
643 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
645 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
646 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
647 cpu_save, cpu_load, cpu->env_ptr);
648 assert(cc->vmsd == NULL);
649 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
651 if (cc->vmsd != NULL) {
652 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
656 #if defined(CONFIG_USER_ONLY)
657 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
659 tb_invalidate_phys_page_range(pc, pc + 1, 0);
662 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
664 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
666 tb_invalidate_phys_addr(cpu->as,
667 phys | (pc & ~TARGET_PAGE_MASK));
672 #if defined(CONFIG_USER_ONLY)
673 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
678 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
684 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
688 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
689 int flags, CPUWatchpoint **watchpoint)
694 /* Add a watchpoint. */
695 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
696 int flags, CPUWatchpoint **watchpoint)
700 /* forbid ranges which are empty or run off the end of the address space */
701 if (len == 0 || (addr + len - 1) < addr) {
702 error_report("tried to set invalid watchpoint at %"
703 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
706 wp = g_malloc(sizeof(*wp));
712 /* keep all GDB-injected watchpoints in front */
713 if (flags & BP_GDB) {
714 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
716 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
719 tlb_flush_page(cpu, addr);
726 /* Remove a specific watchpoint. */
727 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
732 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
733 if (addr == wp->vaddr && len == wp->len
734 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
735 cpu_watchpoint_remove_by_ref(cpu, wp);
742 /* Remove a specific watchpoint by reference. */
743 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
745 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
747 tlb_flush_page(cpu, watchpoint->vaddr);
752 /* Remove all matching watchpoints. */
753 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
755 CPUWatchpoint *wp, *next;
757 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
758 if (wp->flags & mask) {
759 cpu_watchpoint_remove_by_ref(cpu, wp);
764 /* Return true if this watchpoint address matches the specified
765 * access (ie the address range covered by the watchpoint overlaps
766 * partially or completely with the address range covered by the
769 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
773 /* We know the lengths are non-zero, but a little caution is
774 * required to avoid errors in the case where the range ends
775 * exactly at the top of the address space and so addr + len
776 * wraps round to zero.
778 vaddr wpend = wp->vaddr + wp->len - 1;
779 vaddr addrend = addr + len - 1;
781 return !(addr > wpend || wp->vaddr > addrend);
786 /* Add a breakpoint. */
787 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
788 CPUBreakpoint **breakpoint)
792 bp = g_malloc(sizeof(*bp));
797 /* keep all GDB-injected breakpoints in front */
798 if (flags & BP_GDB) {
799 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
801 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
804 breakpoint_invalidate(cpu, pc);
812 /* Remove a specific breakpoint. */
813 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
817 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
818 if (bp->pc == pc && bp->flags == flags) {
819 cpu_breakpoint_remove_by_ref(cpu, bp);
826 /* Remove a specific breakpoint by reference. */
827 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
829 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
831 breakpoint_invalidate(cpu, breakpoint->pc);
836 /* Remove all matching breakpoints. */
837 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
839 CPUBreakpoint *bp, *next;
841 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
842 if (bp->flags & mask) {
843 cpu_breakpoint_remove_by_ref(cpu, bp);
848 /* enable or disable single step mode. EXCP_DEBUG is returned by the
849 CPU loop after each instruction */
850 void cpu_single_step(CPUState *cpu, int enabled)
852 if (cpu->singlestep_enabled != enabled) {
853 cpu->singlestep_enabled = enabled;
855 kvm_update_guest_debug(cpu, 0);
857 /* must flush all the translated code to avoid inconsistencies */
858 /* XXX: only flush what is necessary */
864 void cpu_abort(CPUState *cpu, const char *fmt, ...)
871 fprintf(stderr, "qemu: fatal: ");
872 vfprintf(stderr, fmt, ap);
873 fprintf(stderr, "\n");
874 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
875 if (qemu_log_enabled()) {
876 qemu_log("qemu: fatal: ");
877 qemu_log_vprintf(fmt, ap2);
879 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
885 #if defined(CONFIG_USER_ONLY)
887 struct sigaction act;
888 sigfillset(&act.sa_mask);
889 act.sa_handler = SIG_DFL;
890 sigaction(SIGABRT, &act, NULL);
896 #if !defined(CONFIG_USER_ONLY)
897 /* Called from RCU critical section */
898 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
902 block = atomic_rcu_read(&ram_list.mru_block);
903 if (block && addr - block->offset < block->max_length) {
906 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
907 if (addr - block->offset < block->max_length) {
912 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
916 /* It is safe to write mru_block outside the iothread lock. This
921 * xxx removed from list
925 * call_rcu(reclaim_ramblock, xxx);
928 * atomic_rcu_set is not needed here. The block was already published
929 * when it was placed into the list. Here we're just making an extra
930 * copy of the pointer.
932 ram_list.mru_block = block;
936 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
943 end = TARGET_PAGE_ALIGN(start + length);
944 start &= TARGET_PAGE_MASK;
947 block = qemu_get_ram_block(start);
948 assert(block == qemu_get_ram_block(end - 1));
949 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
951 tlb_reset_dirty(cpu, start1, length);
956 /* Note: start and end must be within the same ram block. */
957 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
961 unsigned long end, page;
968 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
969 page = start >> TARGET_PAGE_BITS;
970 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
973 if (dirty && tcg_enabled()) {
974 tlb_reset_dirty_range_all(start, length);
980 /* Called from RCU critical section */
981 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
982 MemoryRegionSection *section,
984 hwaddr paddr, hwaddr xlat,
986 target_ulong *address)
991 if (memory_region_is_ram(section->mr)) {
993 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
995 if (!section->readonly) {
996 iotlb |= PHYS_SECTION_NOTDIRTY;
998 iotlb |= PHYS_SECTION_ROM;
1001 AddressSpaceDispatch *d;
1003 d = atomic_rcu_read(§ion->address_space->dispatch);
1004 iotlb = section - d->map.sections;
1008 /* Make accesses to pages with watchpoints go via the
1009 watchpoint trap routines. */
1010 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1011 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1012 /* Avoid trapping reads of pages with a write breakpoint. */
1013 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1014 iotlb = PHYS_SECTION_WATCH + paddr;
1015 *address |= TLB_MMIO;
1023 #endif /* defined(CONFIG_USER_ONLY) */
1025 #if !defined(CONFIG_USER_ONLY)
1027 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1029 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1031 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1032 qemu_anon_ram_alloc;
1035 * Set a custom physical guest memory alloator.
1036 * Accelerators with unusual needs may need this. Hopefully, we can
1037 * get rid of it eventually.
1039 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1041 phys_mem_alloc = alloc;
1044 static uint16_t phys_section_add(PhysPageMap *map,
1045 MemoryRegionSection *section)
1047 /* The physical section number is ORed with a page-aligned
1048 * pointer to produce the iotlb entries. Thus it should
1049 * never overflow into the page-aligned value.
1051 assert(map->sections_nb < TARGET_PAGE_SIZE);
1053 if (map->sections_nb == map->sections_nb_alloc) {
1054 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1055 map->sections = g_renew(MemoryRegionSection, map->sections,
1056 map->sections_nb_alloc);
1058 map->sections[map->sections_nb] = *section;
1059 memory_region_ref(section->mr);
1060 return map->sections_nb++;
1063 static void phys_section_destroy(MemoryRegion *mr)
1065 memory_region_unref(mr);
1068 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1069 object_unref(OBJECT(&subpage->iomem));
1074 static void phys_sections_free(PhysPageMap *map)
1076 while (map->sections_nb > 0) {
1077 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1078 phys_section_destroy(section->mr);
1080 g_free(map->sections);
1084 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1087 hwaddr base = section->offset_within_address_space
1089 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1090 d->map.nodes, d->map.sections);
1091 MemoryRegionSection subsection = {
1092 .offset_within_address_space = base,
1093 .size = int128_make64(TARGET_PAGE_SIZE),
1097 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1099 if (!(existing->mr->subpage)) {
1100 subpage = subpage_init(d->as, base);
1101 subsection.address_space = d->as;
1102 subsection.mr = &subpage->iomem;
1103 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1104 phys_section_add(&d->map, &subsection));
1106 subpage = container_of(existing->mr, subpage_t, iomem);
1108 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1109 end = start + int128_get64(section->size) - 1;
1110 subpage_register(subpage, start, end,
1111 phys_section_add(&d->map, section));
1115 static void register_multipage(AddressSpaceDispatch *d,
1116 MemoryRegionSection *section)
1118 hwaddr start_addr = section->offset_within_address_space;
1119 uint16_t section_index = phys_section_add(&d->map, section);
1120 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1124 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1127 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1129 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1130 AddressSpaceDispatch *d = as->next_dispatch;
1131 MemoryRegionSection now = *section, remain = *section;
1132 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1134 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1135 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1136 - now.offset_within_address_space;
1138 now.size = int128_min(int128_make64(left), now.size);
1139 register_subpage(d, &now);
1141 now.size = int128_zero();
1143 while (int128_ne(remain.size, now.size)) {
1144 remain.size = int128_sub(remain.size, now.size);
1145 remain.offset_within_address_space += int128_get64(now.size);
1146 remain.offset_within_region += int128_get64(now.size);
1148 if (int128_lt(remain.size, page_size)) {
1149 register_subpage(d, &now);
1150 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1151 now.size = page_size;
1152 register_subpage(d, &now);
1154 now.size = int128_and(now.size, int128_neg(page_size));
1155 register_multipage(d, &now);
1160 void qemu_flush_coalesced_mmio_buffer(void)
1163 kvm_flush_coalesced_mmio_buffer();
1166 void qemu_mutex_lock_ramlist(void)
1168 qemu_mutex_lock(&ram_list.mutex);
1171 void qemu_mutex_unlock_ramlist(void)
1173 qemu_mutex_unlock(&ram_list.mutex);
1178 #include <sys/vfs.h>
1180 #define HUGETLBFS_MAGIC 0x958458f6
1182 static long gethugepagesize(const char *path, Error **errp)
1188 ret = statfs(path, &fs);
1189 } while (ret != 0 && errno == EINTR);
1192 error_setg_errno(errp, errno, "failed to get page size of file %s",
1197 if (fs.f_type != HUGETLBFS_MAGIC)
1198 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
1203 static void *file_ram_alloc(RAMBlock *block,
1209 char *sanitized_name;
1214 Error *local_err = NULL;
1216 hpagesize = gethugepagesize(path, &local_err);
1218 error_propagate(errp, local_err);
1221 block->mr->align = hpagesize;
1223 if (memory < hpagesize) {
1224 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1225 "or larger than huge page size 0x%" PRIx64,
1230 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1232 "host lacks kvm mmu notifiers, -mem-path unsupported");
1236 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1237 sanitized_name = g_strdup(memory_region_name(block->mr));
1238 for (c = sanitized_name; *c != '\0'; c++) {
1243 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1245 g_free(sanitized_name);
1247 fd = mkstemp(filename);
1249 error_setg_errno(errp, errno,
1250 "unable to create backing store for hugepages");
1257 memory = ROUND_UP(memory, hpagesize);
1260 * ftruncate is not supported by hugetlbfs in older
1261 * hosts, so don't bother bailing out on errors.
1262 * If anything goes wrong with it under other filesystems,
1265 if (ftruncate(fd, memory)) {
1266 perror("ftruncate");
1269 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1270 if (area == MAP_FAILED) {
1271 error_setg_errno(errp, errno,
1272 "unable to map backing store for hugepages");
1278 os_mem_prealloc(fd, area, memory);
1289 /* Called with the ramlist lock held. */
1290 static ram_addr_t find_ram_offset(ram_addr_t size)
1292 RAMBlock *block, *next_block;
1293 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1295 assert(size != 0); /* it would hand out same offset multiple times */
1297 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1301 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1302 ram_addr_t end, next = RAM_ADDR_MAX;
1304 end = block->offset + block->max_length;
1306 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1307 if (next_block->offset >= end) {
1308 next = MIN(next, next_block->offset);
1311 if (next - end >= size && next - end < mingap) {
1313 mingap = next - end;
1317 if (offset == RAM_ADDR_MAX) {
1318 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1326 ram_addr_t last_ram_offset(void)
1329 ram_addr_t last = 0;
1332 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1333 last = MAX(last, block->offset + block->max_length);
1339 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1343 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1344 if (!machine_dump_guest_core(current_machine)) {
1345 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1347 perror("qemu_madvise");
1348 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1349 "but dump_guest_core=off specified\n");
1354 /* Called within an RCU critical section, or while the ramlist lock
1357 static RAMBlock *find_ram_block(ram_addr_t addr)
1361 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1362 if (block->offset == addr) {
1370 /* Called with iothread lock held. */
1371 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1373 RAMBlock *new_block, *block;
1376 new_block = find_ram_block(addr);
1378 assert(!new_block->idstr[0]);
1381 char *id = qdev_get_dev_path(dev);
1383 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1387 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1389 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1390 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1391 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1399 /* Called with iothread lock held. */
1400 void qemu_ram_unset_idstr(ram_addr_t addr)
1404 /* FIXME: arch_init.c assumes that this is not called throughout
1405 * migration. Ignore the problem since hot-unplug during migration
1406 * does not work anyway.
1410 block = find_ram_block(addr);
1412 memset(block->idstr, 0, sizeof(block->idstr));
1417 static int memory_try_enable_merging(void *addr, size_t len)
1419 if (!machine_mem_merge(current_machine)) {
1420 /* disabled by the user */
1424 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1427 /* Only legal before guest might have detected the memory size: e.g. on
1428 * incoming migration, or right after reset.
1430 * As memory core doesn't know how is memory accessed, it is up to
1431 * resize callback to update device state and/or add assertions to detect
1432 * misuse, if necessary.
1434 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1436 RAMBlock *block = find_ram_block(base);
1440 newsize = TARGET_PAGE_ALIGN(newsize);
1442 if (block->used_length == newsize) {
1446 if (!(block->flags & RAM_RESIZEABLE)) {
1447 error_setg_errno(errp, EINVAL,
1448 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1449 " in != 0x" RAM_ADDR_FMT, block->idstr,
1450 newsize, block->used_length);
1454 if (block->max_length < newsize) {
1455 error_setg_errno(errp, EINVAL,
1456 "Length too large: %s: 0x" RAM_ADDR_FMT
1457 " > 0x" RAM_ADDR_FMT, block->idstr,
1458 newsize, block->max_length);
1462 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1463 block->used_length = newsize;
1464 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1466 memory_region_set_size(block->mr, newsize);
1467 if (block->resized) {
1468 block->resized(block->idstr, newsize, block->host);
1473 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1476 RAMBlock *last_block = NULL;
1477 ram_addr_t old_ram_size, new_ram_size;
1479 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1481 qemu_mutex_lock_ramlist();
1482 new_block->offset = find_ram_offset(new_block->max_length);
1484 if (!new_block->host) {
1485 if (xen_enabled()) {
1486 xen_ram_alloc(new_block->offset, new_block->max_length,
1489 new_block->host = phys_mem_alloc(new_block->max_length,
1490 &new_block->mr->align);
1491 if (!new_block->host) {
1492 error_setg_errno(errp, errno,
1493 "cannot set up guest memory '%s'",
1494 memory_region_name(new_block->mr));
1495 qemu_mutex_unlock_ramlist();
1498 memory_try_enable_merging(new_block->host, new_block->max_length);
1502 new_ram_size = MAX(old_ram_size,
1503 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1504 if (new_ram_size > old_ram_size) {
1505 migration_bitmap_extend(old_ram_size, new_ram_size);
1507 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1508 * QLIST (which has an RCU-friendly variant) does not have insertion at
1509 * tail, so save the last element in last_block.
1511 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1513 if (block->max_length < new_block->max_length) {
1518 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1519 } else if (last_block) {
1520 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1521 } else { /* list is empty */
1522 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1524 ram_list.mru_block = NULL;
1526 /* Write list before version */
1529 qemu_mutex_unlock_ramlist();
1531 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1533 if (new_ram_size > old_ram_size) {
1536 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1537 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1538 ram_list.dirty_memory[i] =
1539 bitmap_zero_extend(ram_list.dirty_memory[i],
1540 old_ram_size, new_ram_size);
1543 cpu_physical_memory_set_dirty_range(new_block->offset,
1544 new_block->used_length,
1547 if (new_block->host) {
1548 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1549 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1550 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1551 if (kvm_enabled()) {
1552 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1556 return new_block->offset;
1560 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1561 bool share, const char *mem_path,
1564 RAMBlock *new_block;
1566 Error *local_err = NULL;
1568 if (xen_enabled()) {
1569 error_setg(errp, "-mem-path not supported with Xen");
1573 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1575 * file_ram_alloc() needs to allocate just like
1576 * phys_mem_alloc, but we haven't bothered to provide
1580 "-mem-path not supported with this accelerator");
1584 size = TARGET_PAGE_ALIGN(size);
1585 new_block = g_malloc0(sizeof(*new_block));
1587 new_block->used_length = size;
1588 new_block->max_length = size;
1589 new_block->flags = share ? RAM_SHARED : 0;
1590 new_block->flags |= RAM_FILE;
1591 new_block->host = file_ram_alloc(new_block, size,
1593 if (!new_block->host) {
1598 addr = ram_block_add(new_block, &local_err);
1601 error_propagate(errp, local_err);
1609 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1610 void (*resized)(const char*,
1613 void *host, bool resizeable,
1614 MemoryRegion *mr, Error **errp)
1616 RAMBlock *new_block;
1618 Error *local_err = NULL;
1620 size = TARGET_PAGE_ALIGN(size);
1621 max_size = TARGET_PAGE_ALIGN(max_size);
1622 new_block = g_malloc0(sizeof(*new_block));
1624 new_block->resized = resized;
1625 new_block->used_length = size;
1626 new_block->max_length = max_size;
1627 assert(max_size >= size);
1629 new_block->host = host;
1631 new_block->flags |= RAM_PREALLOC;
1634 new_block->flags |= RAM_RESIZEABLE;
1636 addr = ram_block_add(new_block, &local_err);
1639 error_propagate(errp, local_err);
1645 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1646 MemoryRegion *mr, Error **errp)
1648 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1651 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1653 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1656 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1657 void (*resized)(const char*,
1660 MemoryRegion *mr, Error **errp)
1662 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1665 void qemu_ram_free_from_ptr(ram_addr_t addr)
1669 qemu_mutex_lock_ramlist();
1670 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1671 if (addr == block->offset) {
1672 QLIST_REMOVE_RCU(block, next);
1673 ram_list.mru_block = NULL;
1674 /* Write list before version */
1677 g_free_rcu(block, rcu);
1681 qemu_mutex_unlock_ramlist();
1684 static void reclaim_ramblock(RAMBlock *block)
1686 if (block->flags & RAM_PREALLOC) {
1688 } else if (xen_enabled()) {
1689 xen_invalidate_map_cache_entry(block->host);
1691 } else if (block->fd >= 0) {
1692 if (block->flags & RAM_FILE) {
1693 qemu_ram_munmap(block->host, block->max_length);
1695 munmap(block->host, block->max_length);
1700 qemu_anon_ram_free(block->host, block->max_length);
1705 void qemu_ram_free(ram_addr_t addr)
1709 qemu_mutex_lock_ramlist();
1710 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1711 if (addr == block->offset) {
1712 QLIST_REMOVE_RCU(block, next);
1713 ram_list.mru_block = NULL;
1714 /* Write list before version */
1717 call_rcu(block, reclaim_ramblock, rcu);
1721 qemu_mutex_unlock_ramlist();
1725 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1732 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1733 offset = addr - block->offset;
1734 if (offset < block->max_length) {
1735 vaddr = ramblock_ptr(block, offset);
1736 if (block->flags & RAM_PREALLOC) {
1738 } else if (xen_enabled()) {
1742 if (block->fd >= 0) {
1743 flags |= (block->flags & RAM_SHARED ?
1744 MAP_SHARED : MAP_PRIVATE);
1745 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1746 flags, block->fd, offset);
1749 * Remap needs to match alloc. Accelerators that
1750 * set phys_mem_alloc never remap. If they did,
1751 * we'd need a remap hook here.
1753 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1755 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1756 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1759 if (area != vaddr) {
1760 fprintf(stderr, "Could not remap addr: "
1761 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1765 memory_try_enable_merging(vaddr, length);
1766 qemu_ram_setup_dump(vaddr, length);
1771 #endif /* !_WIN32 */
1773 int qemu_get_ram_fd(ram_addr_t addr)
1779 block = qemu_get_ram_block(addr);
1785 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1791 block = qemu_get_ram_block(addr);
1792 ptr = ramblock_ptr(block, 0);
1797 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1798 * This should not be used for general purpose DMA. Use address_space_map
1799 * or address_space_rw instead. For local memory (e.g. video ram) that the
1800 * device owns, use memory_region_get_ram_ptr.
1802 * By the time this function returns, the returned pointer is not protected
1803 * by RCU anymore. If the caller is not within an RCU critical section and
1804 * does not hold the iothread lock, it must have other means of protecting the
1805 * pointer, such as a reference to the region that includes the incoming
1808 void *qemu_get_ram_ptr(ram_addr_t addr)
1814 block = qemu_get_ram_block(addr);
1816 if (xen_enabled() && block->host == NULL) {
1817 /* We need to check if the requested address is in the RAM
1818 * because we don't want to map the entire memory in QEMU.
1819 * In that case just map until the end of the page.
1821 if (block->offset == 0) {
1822 ptr = xen_map_cache(addr, 0, 0);
1826 block->host = xen_map_cache(block->offset, block->max_length, 1);
1828 ptr = ramblock_ptr(block, addr - block->offset);
1835 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1836 * but takes a size argument.
1838 * By the time this function returns, the returned pointer is not protected
1839 * by RCU anymore. If the caller is not within an RCU critical section and
1840 * does not hold the iothread lock, it must have other means of protecting the
1841 * pointer, such as a reference to the region that includes the incoming
1844 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1850 if (xen_enabled()) {
1851 return xen_map_cache(addr, *size, 1);
1855 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1856 if (addr - block->offset < block->max_length) {
1857 if (addr - block->offset + *size > block->max_length)
1858 *size = block->max_length - addr + block->offset;
1859 ptr = ramblock_ptr(block, addr - block->offset);
1865 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1870 /* Some of the softmmu routines need to translate from a host pointer
1871 * (typically a TLB entry) back to a ram offset.
1873 * By the time this function returns, the returned pointer is not protected
1874 * by RCU anymore. If the caller is not within an RCU critical section and
1875 * does not hold the iothread lock, it must have other means of protecting the
1876 * pointer, such as a reference to the region that includes the incoming
1879 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1882 uint8_t *host = ptr;
1885 if (xen_enabled()) {
1887 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1888 mr = qemu_get_ram_block(*ram_addr)->mr;
1894 block = atomic_rcu_read(&ram_list.mru_block);
1895 if (block && block->host && host - block->host < block->max_length) {
1899 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1900 /* This case append when the block is not mapped. */
1901 if (block->host == NULL) {
1904 if (host - block->host < block->max_length) {
1913 *ram_addr = block->offset + (host - block->host);
1919 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1920 uint64_t val, unsigned size)
1922 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1923 tb_invalidate_phys_page_fast(ram_addr, size);
1927 stb_p(qemu_get_ram_ptr(ram_addr), val);
1930 stw_p(qemu_get_ram_ptr(ram_addr), val);
1933 stl_p(qemu_get_ram_ptr(ram_addr), val);
1938 /* Set both VGA and migration bits for simplicity and to remove
1939 * the notdirty callback faster.
1941 cpu_physical_memory_set_dirty_range(ram_addr, size,
1942 DIRTY_CLIENTS_NOCODE);
1943 /* we remove the notdirty callback only if the code has been
1945 if (!cpu_physical_memory_is_clean(ram_addr)) {
1946 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1950 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1951 unsigned size, bool is_write)
1956 static const MemoryRegionOps notdirty_mem_ops = {
1957 .write = notdirty_mem_write,
1958 .valid.accepts = notdirty_mem_accepts,
1959 .endianness = DEVICE_NATIVE_ENDIAN,
1962 /* Generate a debug exception if a watchpoint has been hit. */
1963 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
1965 CPUState *cpu = current_cpu;
1966 CPUArchState *env = cpu->env_ptr;
1967 target_ulong pc, cs_base;
1972 if (cpu->watchpoint_hit) {
1973 /* We re-entered the check after replacing the TB. Now raise
1974 * the debug interrupt so that is will trigger after the
1975 * current instruction. */
1976 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
1979 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1980 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1981 if (cpu_watchpoint_address_matches(wp, vaddr, len)
1982 && (wp->flags & flags)) {
1983 if (flags == BP_MEM_READ) {
1984 wp->flags |= BP_WATCHPOINT_HIT_READ;
1986 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
1988 wp->hitaddr = vaddr;
1989 wp->hitattrs = attrs;
1990 if (!cpu->watchpoint_hit) {
1991 cpu->watchpoint_hit = wp;
1992 tb_check_watchpoint(cpu);
1993 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1994 cpu->exception_index = EXCP_DEBUG;
1997 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1998 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
1999 cpu_resume_from_signal(cpu, NULL);
2003 wp->flags &= ~BP_WATCHPOINT_HIT;
2008 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2009 so these check for a hit then pass through to the normal out-of-line
2011 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2012 unsigned size, MemTxAttrs attrs)
2017 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2020 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2023 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2026 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2034 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2035 uint64_t val, unsigned size,
2040 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2043 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2046 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2049 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2056 static const MemoryRegionOps watch_mem_ops = {
2057 .read_with_attrs = watch_mem_read,
2058 .write_with_attrs = watch_mem_write,
2059 .endianness = DEVICE_NATIVE_ENDIAN,
2062 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2063 unsigned len, MemTxAttrs attrs)
2065 subpage_t *subpage = opaque;
2069 #if defined(DEBUG_SUBPAGE)
2070 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2071 subpage, len, addr);
2073 res = address_space_read(subpage->as, addr + subpage->base,
2080 *data = ldub_p(buf);
2083 *data = lduw_p(buf);
2096 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2097 uint64_t value, unsigned len, MemTxAttrs attrs)
2099 subpage_t *subpage = opaque;
2102 #if defined(DEBUG_SUBPAGE)
2103 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2104 " value %"PRIx64"\n",
2105 __func__, subpage, len, addr, value);
2123 return address_space_write(subpage->as, addr + subpage->base,
2127 static bool subpage_accepts(void *opaque, hwaddr addr,
2128 unsigned len, bool is_write)
2130 subpage_t *subpage = opaque;
2131 #if defined(DEBUG_SUBPAGE)
2132 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2133 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2136 return address_space_access_valid(subpage->as, addr + subpage->base,
2140 static const MemoryRegionOps subpage_ops = {
2141 .read_with_attrs = subpage_read,
2142 .write_with_attrs = subpage_write,
2143 .impl.min_access_size = 1,
2144 .impl.max_access_size = 8,
2145 .valid.min_access_size = 1,
2146 .valid.max_access_size = 8,
2147 .valid.accepts = subpage_accepts,
2148 .endianness = DEVICE_NATIVE_ENDIAN,
2151 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2156 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2158 idx = SUBPAGE_IDX(start);
2159 eidx = SUBPAGE_IDX(end);
2160 #if defined(DEBUG_SUBPAGE)
2161 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2162 __func__, mmio, start, end, idx, eidx, section);
2164 for (; idx <= eidx; idx++) {
2165 mmio->sub_section[idx] = section;
2171 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2175 mmio = g_malloc0(sizeof(subpage_t));
2179 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2180 NULL, TARGET_PAGE_SIZE);
2181 mmio->iomem.subpage = true;
2182 #if defined(DEBUG_SUBPAGE)
2183 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2184 mmio, base, TARGET_PAGE_SIZE);
2186 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2191 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2195 MemoryRegionSection section = {
2196 .address_space = as,
2198 .offset_within_address_space = 0,
2199 .offset_within_region = 0,
2200 .size = int128_2_64(),
2203 return phys_section_add(map, §ion);
2206 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index)
2208 CPUAddressSpace *cpuas = &cpu->cpu_ases[0];
2209 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2210 MemoryRegionSection *sections = d->map.sections;
2212 return sections[index & ~TARGET_PAGE_MASK].mr;
2215 static void io_mem_init(void)
2217 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2218 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2220 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2222 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2226 static void mem_begin(MemoryListener *listener)
2228 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2229 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2232 n = dummy_section(&d->map, as, &io_mem_unassigned);
2233 assert(n == PHYS_SECTION_UNASSIGNED);
2234 n = dummy_section(&d->map, as, &io_mem_notdirty);
2235 assert(n == PHYS_SECTION_NOTDIRTY);
2236 n = dummy_section(&d->map, as, &io_mem_rom);
2237 assert(n == PHYS_SECTION_ROM);
2238 n = dummy_section(&d->map, as, &io_mem_watch);
2239 assert(n == PHYS_SECTION_WATCH);
2241 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2243 as->next_dispatch = d;
2246 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2248 phys_sections_free(&d->map);
2252 static void mem_commit(MemoryListener *listener)
2254 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2255 AddressSpaceDispatch *cur = as->dispatch;
2256 AddressSpaceDispatch *next = as->next_dispatch;
2258 phys_page_compact_all(next, next->map.nodes_nb);
2260 atomic_rcu_set(&as->dispatch, next);
2262 call_rcu(cur, address_space_dispatch_free, rcu);
2266 static void tcg_commit(MemoryListener *listener)
2268 CPUAddressSpace *cpuas;
2269 AddressSpaceDispatch *d;
2271 /* since each CPU stores ram addresses in its TLB cache, we must
2272 reset the modified entries */
2273 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2274 cpu_reloading_memory_map();
2275 /* The CPU and TLB are protected by the iothread lock.
2276 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2277 * may have split the RCU critical section.
2279 d = atomic_rcu_read(&cpuas->as->dispatch);
2280 cpuas->memory_dispatch = d;
2281 tlb_flush(cpuas->cpu, 1);
2284 void address_space_init_dispatch(AddressSpace *as)
2286 as->dispatch = NULL;
2287 as->dispatch_listener = (MemoryListener) {
2289 .commit = mem_commit,
2290 .region_add = mem_add,
2291 .region_nop = mem_add,
2294 memory_listener_register(&as->dispatch_listener, as);
2297 void address_space_unregister(AddressSpace *as)
2299 memory_listener_unregister(&as->dispatch_listener);
2302 void address_space_destroy_dispatch(AddressSpace *as)
2304 AddressSpaceDispatch *d = as->dispatch;
2306 atomic_rcu_set(&as->dispatch, NULL);
2308 call_rcu(d, address_space_dispatch_free, rcu);
2312 static void memory_map_init(void)
2314 system_memory = g_malloc(sizeof(*system_memory));
2316 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2317 address_space_init(&address_space_memory, system_memory, "memory");
2319 system_io = g_malloc(sizeof(*system_io));
2320 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2322 address_space_init(&address_space_io, system_io, "I/O");
2325 MemoryRegion *get_system_memory(void)
2327 return system_memory;
2330 MemoryRegion *get_system_io(void)
2335 #endif /* !defined(CONFIG_USER_ONLY) */
2337 /* physical memory access (slow version, mainly for debug) */
2338 #if defined(CONFIG_USER_ONLY)
2339 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2340 uint8_t *buf, int len, int is_write)
2347 page = addr & TARGET_PAGE_MASK;
2348 l = (page + TARGET_PAGE_SIZE) - addr;
2351 flags = page_get_flags(page);
2352 if (!(flags & PAGE_VALID))
2355 if (!(flags & PAGE_WRITE))
2357 /* XXX: this code should not depend on lock_user */
2358 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2361 unlock_user(p, addr, l);
2363 if (!(flags & PAGE_READ))
2365 /* XXX: this code should not depend on lock_user */
2366 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2369 unlock_user(p, addr, 0);
2380 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2383 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2384 /* No early return if dirty_log_mask is or becomes 0, because
2385 * cpu_physical_memory_set_dirty_range will still call
2386 * xen_modified_memory.
2388 if (dirty_log_mask) {
2390 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2392 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2393 tb_invalidate_phys_range(addr, addr + length);
2394 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2396 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2399 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2401 unsigned access_size_max = mr->ops->valid.max_access_size;
2403 /* Regions are assumed to support 1-4 byte accesses unless
2404 otherwise specified. */
2405 if (access_size_max == 0) {
2406 access_size_max = 4;
2409 /* Bound the maximum access by the alignment of the address. */
2410 if (!mr->ops->impl.unaligned) {
2411 unsigned align_size_max = addr & -addr;
2412 if (align_size_max != 0 && align_size_max < access_size_max) {
2413 access_size_max = align_size_max;
2417 /* Don't attempt accesses larger than the maximum. */
2418 if (l > access_size_max) {
2419 l = access_size_max;
2426 static bool prepare_mmio_access(MemoryRegion *mr)
2428 bool unlocked = !qemu_mutex_iothread_locked();
2429 bool release_lock = false;
2431 if (unlocked && mr->global_locking) {
2432 qemu_mutex_lock_iothread();
2434 release_lock = true;
2436 if (mr->flush_coalesced_mmio) {
2438 qemu_mutex_lock_iothread();
2440 qemu_flush_coalesced_mmio_buffer();
2442 qemu_mutex_unlock_iothread();
2446 return release_lock;
2449 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2450 uint8_t *buf, int len, bool is_write)
2457 MemTxResult result = MEMTX_OK;
2458 bool release_lock = false;
2463 mr = address_space_translate(as, addr, &addr1, &l, is_write);
2466 if (!memory_access_is_direct(mr, is_write)) {
2467 release_lock |= prepare_mmio_access(mr);
2468 l = memory_access_size(mr, l, addr1);
2469 /* XXX: could force current_cpu to NULL to avoid
2473 /* 64 bit write access */
2475 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2479 /* 32 bit write access */
2481 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2485 /* 16 bit write access */
2487 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2491 /* 8 bit write access */
2493 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2500 addr1 += memory_region_get_ram_addr(mr);
2502 ptr = qemu_get_ram_ptr(addr1);
2503 memcpy(ptr, buf, l);
2504 invalidate_and_set_dirty(mr, addr1, l);
2507 if (!memory_access_is_direct(mr, is_write)) {
2509 release_lock |= prepare_mmio_access(mr);
2510 l = memory_access_size(mr, l, addr1);
2513 /* 64 bit read access */
2514 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2519 /* 32 bit read access */
2520 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2525 /* 16 bit read access */
2526 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2531 /* 8 bit read access */
2532 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2541 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2542 memcpy(buf, ptr, l);
2547 qemu_mutex_unlock_iothread();
2548 release_lock = false;
2560 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2561 const uint8_t *buf, int len)
2563 return address_space_rw(as, addr, attrs, (uint8_t *)buf, len, true);
2566 MemTxResult address_space_read(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2567 uint8_t *buf, int len)
2569 return address_space_rw(as, addr, attrs, buf, len, false);
2573 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2574 int len, int is_write)
2576 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2577 buf, len, is_write);
2580 enum write_rom_type {
2585 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2586 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2596 mr = address_space_translate(as, addr, &addr1, &l, true);
2598 if (!(memory_region_is_ram(mr) ||
2599 memory_region_is_romd(mr))) {
2600 l = memory_access_size(mr, l, addr1);
2602 addr1 += memory_region_get_ram_addr(mr);
2604 ptr = qemu_get_ram_ptr(addr1);
2607 memcpy(ptr, buf, l);
2608 invalidate_and_set_dirty(mr, addr1, l);
2611 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2622 /* used for ROM loading : can write in RAM and ROM */
2623 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2624 const uint8_t *buf, int len)
2626 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2629 void cpu_flush_icache_range(hwaddr start, int len)
2632 * This function should do the same thing as an icache flush that was
2633 * triggered from within the guest. For TCG we are always cache coherent,
2634 * so there is no need to flush anything. For KVM / Xen we need to flush
2635 * the host's instruction cache at least.
2637 if (tcg_enabled()) {
2641 cpu_physical_memory_write_rom_internal(&address_space_memory,
2642 start, NULL, len, FLUSH_CACHE);
2653 static BounceBuffer bounce;
2655 typedef struct MapClient {
2657 QLIST_ENTRY(MapClient) link;
2660 QemuMutex map_client_list_lock;
2661 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2662 = QLIST_HEAD_INITIALIZER(map_client_list);
2664 static void cpu_unregister_map_client_do(MapClient *client)
2666 QLIST_REMOVE(client, link);
2670 static void cpu_notify_map_clients_locked(void)
2674 while (!QLIST_EMPTY(&map_client_list)) {
2675 client = QLIST_FIRST(&map_client_list);
2676 qemu_bh_schedule(client->bh);
2677 cpu_unregister_map_client_do(client);
2681 void cpu_register_map_client(QEMUBH *bh)
2683 MapClient *client = g_malloc(sizeof(*client));
2685 qemu_mutex_lock(&map_client_list_lock);
2687 QLIST_INSERT_HEAD(&map_client_list, client, link);
2688 if (!atomic_read(&bounce.in_use)) {
2689 cpu_notify_map_clients_locked();
2691 qemu_mutex_unlock(&map_client_list_lock);
2694 void cpu_exec_init_all(void)
2696 qemu_mutex_init(&ram_list.mutex);
2699 qemu_mutex_init(&map_client_list_lock);
2702 void cpu_unregister_map_client(QEMUBH *bh)
2706 qemu_mutex_lock(&map_client_list_lock);
2707 QLIST_FOREACH(client, &map_client_list, link) {
2708 if (client->bh == bh) {
2709 cpu_unregister_map_client_do(client);
2713 qemu_mutex_unlock(&map_client_list_lock);
2716 static void cpu_notify_map_clients(void)
2718 qemu_mutex_lock(&map_client_list_lock);
2719 cpu_notify_map_clients_locked();
2720 qemu_mutex_unlock(&map_client_list_lock);
2723 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2731 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2732 if (!memory_access_is_direct(mr, is_write)) {
2733 l = memory_access_size(mr, l, addr);
2734 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2746 /* Map a physical memory region into a host virtual address.
2747 * May map a subset of the requested range, given by and returned in *plen.
2748 * May return NULL if resources needed to perform the mapping are exhausted.
2749 * Use only for reads OR writes - not for read-modify-write operations.
2750 * Use cpu_register_map_client() to know when retrying the map operation is
2751 * likely to succeed.
2753 void *address_space_map(AddressSpace *as,
2760 hwaddr l, xlat, base;
2761 MemoryRegion *mr, *this_mr;
2770 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2772 if (!memory_access_is_direct(mr, is_write)) {
2773 if (atomic_xchg(&bounce.in_use, true)) {
2777 /* Avoid unbounded allocations */
2778 l = MIN(l, TARGET_PAGE_SIZE);
2779 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2783 memory_region_ref(mr);
2786 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2792 return bounce.buffer;
2796 raddr = memory_region_get_ram_addr(mr);
2807 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2808 if (this_mr != mr || xlat != base + done) {
2813 memory_region_ref(mr);
2816 return qemu_ram_ptr_length(raddr + base, plen);
2819 /* Unmaps a memory region previously mapped by address_space_map().
2820 * Will also mark the memory as dirty if is_write == 1. access_len gives
2821 * the amount of memory that was actually read or written by the caller.
2823 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2824 int is_write, hwaddr access_len)
2826 if (buffer != bounce.buffer) {
2830 mr = qemu_ram_addr_from_host(buffer, &addr1);
2833 invalidate_and_set_dirty(mr, addr1, access_len);
2835 if (xen_enabled()) {
2836 xen_invalidate_map_cache_entry(buffer);
2838 memory_region_unref(mr);
2842 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2843 bounce.buffer, access_len);
2845 qemu_vfree(bounce.buffer);
2846 bounce.buffer = NULL;
2847 memory_region_unref(bounce.mr);
2848 atomic_mb_set(&bounce.in_use, false);
2849 cpu_notify_map_clients();
2852 void *cpu_physical_memory_map(hwaddr addr,
2856 return address_space_map(&address_space_memory, addr, plen, is_write);
2859 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2860 int is_write, hwaddr access_len)
2862 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2865 /* warning: addr must be aligned */
2866 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2868 MemTxResult *result,
2869 enum device_endian endian)
2877 bool release_lock = false;
2880 mr = address_space_translate(as, addr, &addr1, &l, false);
2881 if (l < 4 || !memory_access_is_direct(mr, false)) {
2882 release_lock |= prepare_mmio_access(mr);
2885 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
2886 #if defined(TARGET_WORDS_BIGENDIAN)
2887 if (endian == DEVICE_LITTLE_ENDIAN) {
2891 if (endian == DEVICE_BIG_ENDIAN) {
2897 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2901 case DEVICE_LITTLE_ENDIAN:
2902 val = ldl_le_p(ptr);
2904 case DEVICE_BIG_ENDIAN:
2905 val = ldl_be_p(ptr);
2917 qemu_mutex_unlock_iothread();
2923 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
2924 MemTxAttrs attrs, MemTxResult *result)
2926 return address_space_ldl_internal(as, addr, attrs, result,
2927 DEVICE_NATIVE_ENDIAN);
2930 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
2931 MemTxAttrs attrs, MemTxResult *result)
2933 return address_space_ldl_internal(as, addr, attrs, result,
2934 DEVICE_LITTLE_ENDIAN);
2937 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
2938 MemTxAttrs attrs, MemTxResult *result)
2940 return address_space_ldl_internal(as, addr, attrs, result,
2944 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
2946 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2949 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
2951 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2954 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
2956 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
2959 /* warning: addr must be aligned */
2960 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
2962 MemTxResult *result,
2963 enum device_endian endian)
2971 bool release_lock = false;
2974 mr = address_space_translate(as, addr, &addr1, &l,
2976 if (l < 8 || !memory_access_is_direct(mr, false)) {
2977 release_lock |= prepare_mmio_access(mr);
2980 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
2981 #if defined(TARGET_WORDS_BIGENDIAN)
2982 if (endian == DEVICE_LITTLE_ENDIAN) {
2986 if (endian == DEVICE_BIG_ENDIAN) {
2992 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
2996 case DEVICE_LITTLE_ENDIAN:
2997 val = ldq_le_p(ptr);
2999 case DEVICE_BIG_ENDIAN:
3000 val = ldq_be_p(ptr);
3012 qemu_mutex_unlock_iothread();
3018 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3019 MemTxAttrs attrs, MemTxResult *result)
3021 return address_space_ldq_internal(as, addr, attrs, result,
3022 DEVICE_NATIVE_ENDIAN);
3025 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3026 MemTxAttrs attrs, MemTxResult *result)
3028 return address_space_ldq_internal(as, addr, attrs, result,
3029 DEVICE_LITTLE_ENDIAN);
3032 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3033 MemTxAttrs attrs, MemTxResult *result)
3035 return address_space_ldq_internal(as, addr, attrs, result,
3039 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3041 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3044 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3046 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3049 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3051 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3055 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3056 MemTxAttrs attrs, MemTxResult *result)
3061 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3068 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3070 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3073 /* warning: addr must be aligned */
3074 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3077 MemTxResult *result,
3078 enum device_endian endian)
3086 bool release_lock = false;
3089 mr = address_space_translate(as, addr, &addr1, &l,
3091 if (l < 2 || !memory_access_is_direct(mr, false)) {
3092 release_lock |= prepare_mmio_access(mr);
3095 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3096 #if defined(TARGET_WORDS_BIGENDIAN)
3097 if (endian == DEVICE_LITTLE_ENDIAN) {
3101 if (endian == DEVICE_BIG_ENDIAN) {
3107 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3111 case DEVICE_LITTLE_ENDIAN:
3112 val = lduw_le_p(ptr);
3114 case DEVICE_BIG_ENDIAN:
3115 val = lduw_be_p(ptr);
3127 qemu_mutex_unlock_iothread();
3133 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3134 MemTxAttrs attrs, MemTxResult *result)
3136 return address_space_lduw_internal(as, addr, attrs, result,
3137 DEVICE_NATIVE_ENDIAN);
3140 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3141 MemTxAttrs attrs, MemTxResult *result)
3143 return address_space_lduw_internal(as, addr, attrs, result,
3144 DEVICE_LITTLE_ENDIAN);
3147 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3148 MemTxAttrs attrs, MemTxResult *result)
3150 return address_space_lduw_internal(as, addr, attrs, result,
3154 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3156 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3159 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3161 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3166 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3169 /* warning: addr must be aligned. The ram page is not masked as dirty
3170 and the code inside is not invalidated. It is useful if the dirty
3171 bits are used to track modified PTEs */
3172 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3173 MemTxAttrs attrs, MemTxResult *result)
3180 uint8_t dirty_log_mask;
3181 bool release_lock = false;
3184 mr = address_space_translate(as, addr, &addr1, &l,
3186 if (l < 4 || !memory_access_is_direct(mr, true)) {
3187 release_lock |= prepare_mmio_access(mr);
3189 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3191 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3192 ptr = qemu_get_ram_ptr(addr1);
3195 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3196 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3197 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3204 qemu_mutex_unlock_iothread();
3209 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3211 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3214 /* warning: addr must be aligned */
3215 static inline void address_space_stl_internal(AddressSpace *as,
3216 hwaddr addr, uint32_t val,
3218 MemTxResult *result,
3219 enum device_endian endian)
3226 bool release_lock = false;
3229 mr = address_space_translate(as, addr, &addr1, &l,
3231 if (l < 4 || !memory_access_is_direct(mr, true)) {
3232 release_lock |= prepare_mmio_access(mr);
3234 #if defined(TARGET_WORDS_BIGENDIAN)
3235 if (endian == DEVICE_LITTLE_ENDIAN) {
3239 if (endian == DEVICE_BIG_ENDIAN) {
3243 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3246 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3247 ptr = qemu_get_ram_ptr(addr1);
3249 case DEVICE_LITTLE_ENDIAN:
3252 case DEVICE_BIG_ENDIAN:
3259 invalidate_and_set_dirty(mr, addr1, 4);
3266 qemu_mutex_unlock_iothread();
3271 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3272 MemTxAttrs attrs, MemTxResult *result)
3274 address_space_stl_internal(as, addr, val, attrs, result,
3275 DEVICE_NATIVE_ENDIAN);
3278 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3279 MemTxAttrs attrs, MemTxResult *result)
3281 address_space_stl_internal(as, addr, val, attrs, result,
3282 DEVICE_LITTLE_ENDIAN);
3285 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3286 MemTxAttrs attrs, MemTxResult *result)
3288 address_space_stl_internal(as, addr, val, attrs, result,
3292 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3294 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3297 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3299 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3302 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3304 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3308 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3309 MemTxAttrs attrs, MemTxResult *result)
3314 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3320 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3322 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3325 /* warning: addr must be aligned */
3326 static inline void address_space_stw_internal(AddressSpace *as,
3327 hwaddr addr, uint32_t val,
3329 MemTxResult *result,
3330 enum device_endian endian)
3337 bool release_lock = false;
3340 mr = address_space_translate(as, addr, &addr1, &l, true);
3341 if (l < 2 || !memory_access_is_direct(mr, true)) {
3342 release_lock |= prepare_mmio_access(mr);
3344 #if defined(TARGET_WORDS_BIGENDIAN)
3345 if (endian == DEVICE_LITTLE_ENDIAN) {
3349 if (endian == DEVICE_BIG_ENDIAN) {
3353 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3356 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3357 ptr = qemu_get_ram_ptr(addr1);
3359 case DEVICE_LITTLE_ENDIAN:
3362 case DEVICE_BIG_ENDIAN:
3369 invalidate_and_set_dirty(mr, addr1, 2);
3376 qemu_mutex_unlock_iothread();
3381 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3382 MemTxAttrs attrs, MemTxResult *result)
3384 address_space_stw_internal(as, addr, val, attrs, result,
3385 DEVICE_NATIVE_ENDIAN);
3388 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3389 MemTxAttrs attrs, MemTxResult *result)
3391 address_space_stw_internal(as, addr, val, attrs, result,
3392 DEVICE_LITTLE_ENDIAN);
3395 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3396 MemTxAttrs attrs, MemTxResult *result)
3398 address_space_stw_internal(as, addr, val, attrs, result,
3402 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3404 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3407 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3409 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3412 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3414 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3418 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3419 MemTxAttrs attrs, MemTxResult *result)
3423 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3429 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3430 MemTxAttrs attrs, MemTxResult *result)
3433 val = cpu_to_le64(val);
3434 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3439 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3440 MemTxAttrs attrs, MemTxResult *result)
3443 val = cpu_to_be64(val);
3444 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3450 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3452 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3455 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3457 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3460 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3462 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3465 /* virtual memory access for debug (includes writing to ROM) */
3466 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3467 uint8_t *buf, int len, int is_write)
3474 page = addr & TARGET_PAGE_MASK;
3475 phys_addr = cpu_get_phys_page_debug(cpu, page);
3476 /* if no physical page mapped, return an error */
3477 if (phys_addr == -1)
3479 l = (page + TARGET_PAGE_SIZE) - addr;
3482 phys_addr += (addr & ~TARGET_PAGE_MASK);
3484 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3486 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3498 * A helper function for the _utterly broken_ virtio device model to find out if
3499 * it's running on a big endian machine. Don't do this at home kids!
3501 bool target_words_bigendian(void);
3502 bool target_words_bigendian(void)
3504 #if defined(TARGET_WORDS_BIGENDIAN)
3511 #ifndef CONFIG_USER_ONLY
3512 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3519 mr = address_space_translate(&address_space_memory,
3520 phys_addr, &phys_addr, &l, false);
3522 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3527 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3533 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3534 ret = func(block->idstr, block->host, block->offset,
3535 block->used_length, opaque);