4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21 #include <sys/types.h>
25 #include "qemu-common.h"
29 #if !defined(CONFIG_USER_ONLY)
30 #include "hw/boards.h"
33 #include "qemu/osdep.h"
34 #include "sysemu/kvm.h"
35 #include "sysemu/sysemu.h"
36 #include "hw/xen/xen.h"
37 #include "qemu/timer.h"
38 #include "qemu/config-file.h"
39 #include "qemu/error-report.h"
40 #include "exec/memory.h"
41 #include "sysemu/dma.h"
42 #include "exec/address-spaces.h"
43 #if defined(CONFIG_USER_ONLY)
45 #else /* !CONFIG_USER_ONLY */
46 #include "sysemu/xen-mapcache.h"
49 #include "exec/cpu-all.h"
50 #include "qemu/rcu_queue.h"
51 #include "qemu/main-loop.h"
52 #include "translate-all.h"
53 #include "sysemu/replay.h"
55 #include "exec/memory-internal.h"
56 #include "exec/ram_addr.h"
58 #include "qemu/range.h"
60 #include "qemu/mmap-alloc.h"
63 //#define DEBUG_SUBPAGE
65 #if !defined(CONFIG_USER_ONLY)
66 /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes
67 * are protected by the ramlist lock.
69 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
71 static MemoryRegion *system_memory;
72 static MemoryRegion *system_io;
74 AddressSpace address_space_io;
75 AddressSpace address_space_memory;
77 MemoryRegion io_mem_rom, io_mem_notdirty;
78 static MemoryRegion io_mem_unassigned;
80 /* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */
81 #define RAM_PREALLOC (1 << 0)
83 /* RAM is mmap-ed with MAP_SHARED */
84 #define RAM_SHARED (1 << 1)
86 /* Only a portion of RAM (used_length) is actually used, and migrated.
87 * This used_length size can change across reboots.
89 #define RAM_RESIZEABLE (1 << 2)
93 struct CPUTailQ cpus = QTAILQ_HEAD_INITIALIZER(cpus);
94 /* current CPU in the current thread. It is only valid inside
96 __thread CPUState *current_cpu;
97 /* 0 = Do not count executed instructions.
98 1 = Precise instruction counting.
99 2 = Adaptive rate instruction counting. */
102 #if !defined(CONFIG_USER_ONLY)
104 typedef struct PhysPageEntry PhysPageEntry;
106 struct PhysPageEntry {
107 /* How many bits skip to next level (in units of L2_SIZE). 0 for a leaf. */
109 /* index into phys_sections (!skip) or phys_map_nodes (skip) */
113 #define PHYS_MAP_NODE_NIL (((uint32_t)~0) >> 6)
115 /* Size of the L2 (and L3, etc) page tables. */
116 #define ADDR_SPACE_BITS 64
119 #define P_L2_SIZE (1 << P_L2_BITS)
121 #define P_L2_LEVELS (((ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / P_L2_BITS) + 1)
123 typedef PhysPageEntry Node[P_L2_SIZE];
125 typedef struct PhysPageMap {
128 unsigned sections_nb;
129 unsigned sections_nb_alloc;
131 unsigned nodes_nb_alloc;
133 MemoryRegionSection *sections;
136 struct AddressSpaceDispatch {
139 /* This is a multi-level map on the physical address space.
140 * The bottom level has pointers to MemoryRegionSections.
142 PhysPageEntry phys_map;
147 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
148 typedef struct subpage_t {
152 uint16_t sub_section[TARGET_PAGE_SIZE];
155 #define PHYS_SECTION_UNASSIGNED 0
156 #define PHYS_SECTION_NOTDIRTY 1
157 #define PHYS_SECTION_ROM 2
158 #define PHYS_SECTION_WATCH 3
160 static void io_mem_init(void);
161 static void memory_map_init(void);
162 static void tcg_commit(MemoryListener *listener);
164 static MemoryRegion io_mem_watch;
167 * CPUAddressSpace: all the information a CPU needs about an AddressSpace
168 * @cpu: the CPU whose AddressSpace this is
169 * @as: the AddressSpace itself
170 * @memory_dispatch: its dispatch pointer (cached, RCU protected)
171 * @tcg_as_listener: listener for tracking changes to the AddressSpace
173 struct CPUAddressSpace {
176 struct AddressSpaceDispatch *memory_dispatch;
177 MemoryListener tcg_as_listener;
182 #if !defined(CONFIG_USER_ONLY)
184 static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes)
186 if (map->nodes_nb + nodes > map->nodes_nb_alloc) {
187 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc * 2, 16);
188 map->nodes_nb_alloc = MAX(map->nodes_nb_alloc, map->nodes_nb + nodes);
189 map->nodes = g_renew(Node, map->nodes, map->nodes_nb_alloc);
193 static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf)
200 ret = map->nodes_nb++;
202 assert(ret != PHYS_MAP_NODE_NIL);
203 assert(ret != map->nodes_nb_alloc);
205 e.skip = leaf ? 0 : 1;
206 e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL;
207 for (i = 0; i < P_L2_SIZE; ++i) {
208 memcpy(&p[i], &e, sizeof(e));
213 static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp,
214 hwaddr *index, hwaddr *nb, uint16_t leaf,
218 hwaddr step = (hwaddr)1 << (level * P_L2_BITS);
220 if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) {
221 lp->ptr = phys_map_node_alloc(map, level == 0);
223 p = map->nodes[lp->ptr];
224 lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)];
226 while (*nb && lp < &p[P_L2_SIZE]) {
227 if ((*index & (step - 1)) == 0 && *nb >= step) {
233 phys_page_set_level(map, lp, index, nb, leaf, level - 1);
239 static void phys_page_set(AddressSpaceDispatch *d,
240 hwaddr index, hwaddr nb,
243 /* Wildly overreserve - it doesn't matter much. */
244 phys_map_node_reserve(&d->map, 3 * P_L2_LEVELS);
246 phys_page_set_level(&d->map, &d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
249 /* Compact a non leaf page entry. Simply detect that the entry has a single child,
250 * and update our entry so we can skip it and go directly to the destination.
252 static void phys_page_compact(PhysPageEntry *lp, Node *nodes, unsigned long *compacted)
254 unsigned valid_ptr = P_L2_SIZE;
259 if (lp->ptr == PHYS_MAP_NODE_NIL) {
264 for (i = 0; i < P_L2_SIZE; i++) {
265 if (p[i].ptr == PHYS_MAP_NODE_NIL) {
272 phys_page_compact(&p[i], nodes, compacted);
276 /* We can only compress if there's only one child. */
281 assert(valid_ptr < P_L2_SIZE);
283 /* Don't compress if it won't fit in the # of bits we have. */
284 if (lp->skip + p[valid_ptr].skip >= (1 << 3)) {
288 lp->ptr = p[valid_ptr].ptr;
289 if (!p[valid_ptr].skip) {
290 /* If our only child is a leaf, make this a leaf. */
291 /* By design, we should have made this node a leaf to begin with so we
292 * should never reach here.
293 * But since it's so simple to handle this, let's do it just in case we
298 lp->skip += p[valid_ptr].skip;
302 static void phys_page_compact_all(AddressSpaceDispatch *d, int nodes_nb)
304 DECLARE_BITMAP(compacted, nodes_nb);
306 if (d->phys_map.skip) {
307 phys_page_compact(&d->phys_map, d->map.nodes, compacted);
311 static MemoryRegionSection *phys_page_find(PhysPageEntry lp, hwaddr addr,
312 Node *nodes, MemoryRegionSection *sections)
315 hwaddr index = addr >> TARGET_PAGE_BITS;
318 for (i = P_L2_LEVELS; lp.skip && (i -= lp.skip) >= 0;) {
319 if (lp.ptr == PHYS_MAP_NODE_NIL) {
320 return §ions[PHYS_SECTION_UNASSIGNED];
323 lp = p[(index >> (i * P_L2_BITS)) & (P_L2_SIZE - 1)];
326 if (sections[lp.ptr].size.hi ||
327 range_covers_byte(sections[lp.ptr].offset_within_address_space,
328 sections[lp.ptr].size.lo, addr)) {
329 return §ions[lp.ptr];
331 return §ions[PHYS_SECTION_UNASSIGNED];
335 bool memory_region_is_unassigned(MemoryRegion *mr)
337 return mr != &io_mem_rom && mr != &io_mem_notdirty && !mr->rom_device
338 && mr != &io_mem_watch;
341 /* Called from RCU critical section */
342 static MemoryRegionSection *address_space_lookup_region(AddressSpaceDispatch *d,
344 bool resolve_subpage)
346 MemoryRegionSection *section;
349 section = phys_page_find(d->phys_map, addr, d->map.nodes, d->map.sections);
350 if (resolve_subpage && section->mr->subpage) {
351 subpage = container_of(section->mr, subpage_t, iomem);
352 section = &d->map.sections[subpage->sub_section[SUBPAGE_IDX(addr)]];
357 /* Called from RCU critical section */
358 static MemoryRegionSection *
359 address_space_translate_internal(AddressSpaceDispatch *d, hwaddr addr, hwaddr *xlat,
360 hwaddr *plen, bool resolve_subpage)
362 MemoryRegionSection *section;
366 section = address_space_lookup_region(d, addr, resolve_subpage);
367 /* Compute offset within MemoryRegionSection */
368 addr -= section->offset_within_address_space;
370 /* Compute offset within MemoryRegion */
371 *xlat = addr + section->offset_within_region;
375 /* MMIO registers can be expected to perform full-width accesses based only
376 * on their address, without considering adjacent registers that could
377 * decode to completely different MemoryRegions. When such registers
378 * exist (e.g. I/O ports 0xcf8 and 0xcf9 on most PC chipsets), MMIO
379 * regions overlap wildly. For this reason we cannot clamp the accesses
382 * If the length is small (as is the case for address_space_ldl/stl),
383 * everything works fine. If the incoming length is large, however,
384 * the caller really has to do the clamping through memory_access_size.
386 if (memory_region_is_ram(mr)) {
387 diff = int128_sub(section->size, int128_make64(addr));
388 *plen = int128_get64(int128_min(diff, int128_make64(*plen)));
393 /* Called from RCU critical section */
394 MemoryRegion *address_space_translate(AddressSpace *as, hwaddr addr,
395 hwaddr *xlat, hwaddr *plen,
399 MemoryRegionSection *section;
403 AddressSpaceDispatch *d = atomic_rcu_read(&as->dispatch);
404 section = address_space_translate_internal(d, addr, &addr, plen, true);
407 if (!mr->iommu_ops) {
411 iotlb = mr->iommu_ops->translate(mr, addr, is_write);
412 addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
413 | (addr & iotlb.addr_mask));
414 *plen = MIN(*plen, (addr | iotlb.addr_mask) - addr + 1);
415 if (!(iotlb.perm & (1 << is_write))) {
416 mr = &io_mem_unassigned;
420 as = iotlb.target_as;
423 if (xen_enabled() && memory_access_is_direct(mr, is_write)) {
424 hwaddr page = ((addr & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE) - addr;
425 *plen = MIN(page, *plen);
432 /* Called from RCU critical section */
433 MemoryRegionSection *
434 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
435 hwaddr *xlat, hwaddr *plen)
437 MemoryRegionSection *section;
438 AddressSpaceDispatch *d = cpu->cpu_ases[asidx].memory_dispatch;
440 section = address_space_translate_internal(d, addr, xlat, plen, false);
442 assert(!section->mr->iommu_ops);
447 #if !defined(CONFIG_USER_ONLY)
449 static int cpu_common_post_load(void *opaque, int version_id)
451 CPUState *cpu = opaque;
453 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
454 version_id is increased. */
455 cpu->interrupt_request &= ~0x01;
461 static int cpu_common_pre_load(void *opaque)
463 CPUState *cpu = opaque;
465 cpu->exception_index = -1;
470 static bool cpu_common_exception_index_needed(void *opaque)
472 CPUState *cpu = opaque;
474 return tcg_enabled() && cpu->exception_index != -1;
477 static const VMStateDescription vmstate_cpu_common_exception_index = {
478 .name = "cpu_common/exception_index",
480 .minimum_version_id = 1,
481 .needed = cpu_common_exception_index_needed,
482 .fields = (VMStateField[]) {
483 VMSTATE_INT32(exception_index, CPUState),
484 VMSTATE_END_OF_LIST()
488 static bool cpu_common_crash_occurred_needed(void *opaque)
490 CPUState *cpu = opaque;
492 return cpu->crash_occurred;
495 static const VMStateDescription vmstate_cpu_common_crash_occurred = {
496 .name = "cpu_common/crash_occurred",
498 .minimum_version_id = 1,
499 .needed = cpu_common_crash_occurred_needed,
500 .fields = (VMStateField[]) {
501 VMSTATE_BOOL(crash_occurred, CPUState),
502 VMSTATE_END_OF_LIST()
506 const VMStateDescription vmstate_cpu_common = {
507 .name = "cpu_common",
509 .minimum_version_id = 1,
510 .pre_load = cpu_common_pre_load,
511 .post_load = cpu_common_post_load,
512 .fields = (VMStateField[]) {
513 VMSTATE_UINT32(halted, CPUState),
514 VMSTATE_UINT32(interrupt_request, CPUState),
515 VMSTATE_END_OF_LIST()
517 .subsections = (const VMStateDescription*[]) {
518 &vmstate_cpu_common_exception_index,
519 &vmstate_cpu_common_crash_occurred,
526 CPUState *qemu_get_cpu(int index)
531 if (cpu->cpu_index == index) {
539 #if !defined(CONFIG_USER_ONLY)
540 void cpu_address_space_init(CPUState *cpu, AddressSpace *as, int asidx)
542 CPUAddressSpace *newas;
544 /* Target code should have set num_ases before calling us */
545 assert(asidx < cpu->num_ases);
548 /* address space 0 gets the convenience alias */
552 /* KVM cannot currently support multiple address spaces. */
553 assert(asidx == 0 || !kvm_enabled());
555 if (!cpu->cpu_ases) {
556 cpu->cpu_ases = g_new0(CPUAddressSpace, cpu->num_ases);
559 newas = &cpu->cpu_ases[asidx];
563 newas->tcg_as_listener.commit = tcg_commit;
564 memory_listener_register(&newas->tcg_as_listener, as);
568 AddressSpace *cpu_get_address_space(CPUState *cpu, int asidx)
570 /* Return the AddressSpace corresponding to the specified index */
571 return cpu->cpu_ases[asidx].as;
575 #ifndef CONFIG_USER_ONLY
576 static DECLARE_BITMAP(cpu_index_map, MAX_CPUMASK_BITS);
578 static int cpu_get_free_index(Error **errp)
580 int cpu = find_first_zero_bit(cpu_index_map, MAX_CPUMASK_BITS);
582 if (cpu >= MAX_CPUMASK_BITS) {
583 error_setg(errp, "Trying to use more CPUs than max of %d",
588 bitmap_set(cpu_index_map, cpu, 1);
592 void cpu_exec_exit(CPUState *cpu)
594 if (cpu->cpu_index == -1) {
595 /* cpu_index was never allocated by this @cpu or was already freed. */
599 bitmap_clear(cpu_index_map, cpu->cpu_index, 1);
604 static int cpu_get_free_index(Error **errp)
609 CPU_FOREACH(some_cpu) {
615 void cpu_exec_exit(CPUState *cpu)
620 void cpu_exec_init(CPUState *cpu, Error **errp)
622 CPUClass *cc = CPU_GET_CLASS(cpu);
624 Error *local_err = NULL;
629 #ifndef CONFIG_USER_ONLY
630 cpu->thread_id = qemu_get_thread_id();
633 #if defined(CONFIG_USER_ONLY)
636 cpu_index = cpu->cpu_index = cpu_get_free_index(&local_err);
638 error_propagate(errp, local_err);
639 #if defined(CONFIG_USER_ONLY)
644 QTAILQ_INSERT_TAIL(&cpus, cpu, node);
645 #if defined(CONFIG_USER_ONLY)
648 if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
649 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
651 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
652 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
653 cpu_save, cpu_load, cpu->env_ptr);
654 assert(cc->vmsd == NULL);
655 assert(qdev_get_vmsd(DEVICE(cpu)) == NULL);
657 if (cc->vmsd != NULL) {
658 vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
662 #if defined(CONFIG_USER_ONLY)
663 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
665 tb_invalidate_phys_page_range(pc, pc + 1, 0);
668 static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
670 hwaddr phys = cpu_get_phys_page_debug(cpu, pc);
672 tb_invalidate_phys_addr(cpu->as,
673 phys | (pc & ~TARGET_PAGE_MASK));
678 #if defined(CONFIG_USER_ONLY)
679 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
684 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
690 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
694 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
695 int flags, CPUWatchpoint **watchpoint)
700 /* Add a watchpoint. */
701 int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
702 int flags, CPUWatchpoint **watchpoint)
706 /* forbid ranges which are empty or run off the end of the address space */
707 if (len == 0 || (addr + len - 1) < addr) {
708 error_report("tried to set invalid watchpoint at %"
709 VADDR_PRIx ", len=%" VADDR_PRIu, addr, len);
712 wp = g_malloc(sizeof(*wp));
718 /* keep all GDB-injected watchpoints in front */
719 if (flags & BP_GDB) {
720 QTAILQ_INSERT_HEAD(&cpu->watchpoints, wp, entry);
722 QTAILQ_INSERT_TAIL(&cpu->watchpoints, wp, entry);
725 tlb_flush_page(cpu, addr);
732 /* Remove a specific watchpoint. */
733 int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
738 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
739 if (addr == wp->vaddr && len == wp->len
740 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
741 cpu_watchpoint_remove_by_ref(cpu, wp);
748 /* Remove a specific watchpoint by reference. */
749 void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
751 QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
753 tlb_flush_page(cpu, watchpoint->vaddr);
758 /* Remove all matching watchpoints. */
759 void cpu_watchpoint_remove_all(CPUState *cpu, int mask)
761 CPUWatchpoint *wp, *next;
763 QTAILQ_FOREACH_SAFE(wp, &cpu->watchpoints, entry, next) {
764 if (wp->flags & mask) {
765 cpu_watchpoint_remove_by_ref(cpu, wp);
770 /* Return true if this watchpoint address matches the specified
771 * access (ie the address range covered by the watchpoint overlaps
772 * partially or completely with the address range covered by the
775 static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
779 /* We know the lengths are non-zero, but a little caution is
780 * required to avoid errors in the case where the range ends
781 * exactly at the top of the address space and so addr + len
782 * wraps round to zero.
784 vaddr wpend = wp->vaddr + wp->len - 1;
785 vaddr addrend = addr + len - 1;
787 return !(addr > wpend || wp->vaddr > addrend);
792 /* Add a breakpoint. */
793 int cpu_breakpoint_insert(CPUState *cpu, vaddr pc, int flags,
794 CPUBreakpoint **breakpoint)
798 bp = g_malloc(sizeof(*bp));
803 /* keep all GDB-injected breakpoints in front */
804 if (flags & BP_GDB) {
805 QTAILQ_INSERT_HEAD(&cpu->breakpoints, bp, entry);
807 QTAILQ_INSERT_TAIL(&cpu->breakpoints, bp, entry);
810 breakpoint_invalidate(cpu, pc);
818 /* Remove a specific breakpoint. */
819 int cpu_breakpoint_remove(CPUState *cpu, vaddr pc, int flags)
823 QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) {
824 if (bp->pc == pc && bp->flags == flags) {
825 cpu_breakpoint_remove_by_ref(cpu, bp);
832 /* Remove a specific breakpoint by reference. */
833 void cpu_breakpoint_remove_by_ref(CPUState *cpu, CPUBreakpoint *breakpoint)
835 QTAILQ_REMOVE(&cpu->breakpoints, breakpoint, entry);
837 breakpoint_invalidate(cpu, breakpoint->pc);
842 /* Remove all matching breakpoints. */
843 void cpu_breakpoint_remove_all(CPUState *cpu, int mask)
845 CPUBreakpoint *bp, *next;
847 QTAILQ_FOREACH_SAFE(bp, &cpu->breakpoints, entry, next) {
848 if (bp->flags & mask) {
849 cpu_breakpoint_remove_by_ref(cpu, bp);
854 /* enable or disable single step mode. EXCP_DEBUG is returned by the
855 CPU loop after each instruction */
856 void cpu_single_step(CPUState *cpu, int enabled)
858 if (cpu->singlestep_enabled != enabled) {
859 cpu->singlestep_enabled = enabled;
861 kvm_update_guest_debug(cpu, 0);
863 /* must flush all the translated code to avoid inconsistencies */
864 /* XXX: only flush what is necessary */
870 void cpu_abort(CPUState *cpu, const char *fmt, ...)
877 fprintf(stderr, "qemu: fatal: ");
878 vfprintf(stderr, fmt, ap);
879 fprintf(stderr, "\n");
880 cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
881 if (qemu_log_separate()) {
882 qemu_log("qemu: fatal: ");
883 qemu_log_vprintf(fmt, ap2);
885 log_cpu_state(cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
892 #if defined(CONFIG_USER_ONLY)
894 struct sigaction act;
895 sigfillset(&act.sa_mask);
896 act.sa_handler = SIG_DFL;
897 sigaction(SIGABRT, &act, NULL);
903 #if !defined(CONFIG_USER_ONLY)
904 /* Called from RCU critical section */
905 static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
909 block = atomic_rcu_read(&ram_list.mru_block);
910 if (block && addr - block->offset < block->max_length) {
913 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
914 if (addr - block->offset < block->max_length) {
919 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
923 /* It is safe to write mru_block outside the iothread lock. This
928 * xxx removed from list
932 * call_rcu(reclaim_ramblock, xxx);
935 * atomic_rcu_set is not needed here. The block was already published
936 * when it was placed into the list. Here we're just making an extra
937 * copy of the pointer.
939 ram_list.mru_block = block;
943 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
950 end = TARGET_PAGE_ALIGN(start + length);
951 start &= TARGET_PAGE_MASK;
954 block = qemu_get_ram_block(start);
955 assert(block == qemu_get_ram_block(end - 1));
956 start1 = (uintptr_t)ramblock_ptr(block, start - block->offset);
958 tlb_reset_dirty(cpu, start1, length);
963 /* Note: start and end must be within the same ram block. */
964 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
968 unsigned long end, page;
975 end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
976 page = start >> TARGET_PAGE_BITS;
977 dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client],
980 if (dirty && tcg_enabled()) {
981 tlb_reset_dirty_range_all(start, length);
987 /* Called from RCU critical section */
988 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
989 MemoryRegionSection *section,
991 hwaddr paddr, hwaddr xlat,
993 target_ulong *address)
998 if (memory_region_is_ram(section->mr)) {
1000 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1002 if (!section->readonly) {
1003 iotlb |= PHYS_SECTION_NOTDIRTY;
1005 iotlb |= PHYS_SECTION_ROM;
1008 AddressSpaceDispatch *d;
1010 d = atomic_rcu_read(§ion->address_space->dispatch);
1011 iotlb = section - d->map.sections;
1015 /* Make accesses to pages with watchpoints go via the
1016 watchpoint trap routines. */
1017 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
1018 if (cpu_watchpoint_address_matches(wp, vaddr, TARGET_PAGE_SIZE)) {
1019 /* Avoid trapping reads of pages with a write breakpoint. */
1020 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1021 iotlb = PHYS_SECTION_WATCH + paddr;
1022 *address |= TLB_MMIO;
1030 #endif /* defined(CONFIG_USER_ONLY) */
1032 #if !defined(CONFIG_USER_ONLY)
1034 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1036 static subpage_t *subpage_init(AddressSpace *as, hwaddr base);
1038 static void *(*phys_mem_alloc)(size_t size, uint64_t *align) =
1039 qemu_anon_ram_alloc;
1042 * Set a custom physical guest memory alloator.
1043 * Accelerators with unusual needs may need this. Hopefully, we can
1044 * get rid of it eventually.
1046 void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align))
1048 phys_mem_alloc = alloc;
1051 static uint16_t phys_section_add(PhysPageMap *map,
1052 MemoryRegionSection *section)
1054 /* The physical section number is ORed with a page-aligned
1055 * pointer to produce the iotlb entries. Thus it should
1056 * never overflow into the page-aligned value.
1058 assert(map->sections_nb < TARGET_PAGE_SIZE);
1060 if (map->sections_nb == map->sections_nb_alloc) {
1061 map->sections_nb_alloc = MAX(map->sections_nb_alloc * 2, 16);
1062 map->sections = g_renew(MemoryRegionSection, map->sections,
1063 map->sections_nb_alloc);
1065 map->sections[map->sections_nb] = *section;
1066 memory_region_ref(section->mr);
1067 return map->sections_nb++;
1070 static void phys_section_destroy(MemoryRegion *mr)
1072 bool have_sub_page = mr->subpage;
1074 memory_region_unref(mr);
1076 if (have_sub_page) {
1077 subpage_t *subpage = container_of(mr, subpage_t, iomem);
1078 object_unref(OBJECT(&subpage->iomem));
1083 static void phys_sections_free(PhysPageMap *map)
1085 while (map->sections_nb > 0) {
1086 MemoryRegionSection *section = &map->sections[--map->sections_nb];
1087 phys_section_destroy(section->mr);
1089 g_free(map->sections);
1093 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
1096 hwaddr base = section->offset_within_address_space
1098 MemoryRegionSection *existing = phys_page_find(d->phys_map, base,
1099 d->map.nodes, d->map.sections);
1100 MemoryRegionSection subsection = {
1101 .offset_within_address_space = base,
1102 .size = int128_make64(TARGET_PAGE_SIZE),
1106 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
1108 if (!(existing->mr->subpage)) {
1109 subpage = subpage_init(d->as, base);
1110 subsection.address_space = d->as;
1111 subsection.mr = &subpage->iomem;
1112 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
1113 phys_section_add(&d->map, &subsection));
1115 subpage = container_of(existing->mr, subpage_t, iomem);
1117 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
1118 end = start + int128_get64(section->size) - 1;
1119 subpage_register(subpage, start, end,
1120 phys_section_add(&d->map, section));
1124 static void register_multipage(AddressSpaceDispatch *d,
1125 MemoryRegionSection *section)
1127 hwaddr start_addr = section->offset_within_address_space;
1128 uint16_t section_index = phys_section_add(&d->map, section);
1129 uint64_t num_pages = int128_get64(int128_rshift(section->size,
1133 phys_page_set(d, start_addr >> TARGET_PAGE_BITS, num_pages, section_index);
1136 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
1138 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
1139 AddressSpaceDispatch *d = as->next_dispatch;
1140 MemoryRegionSection now = *section, remain = *section;
1141 Int128 page_size = int128_make64(TARGET_PAGE_SIZE);
1143 if (now.offset_within_address_space & ~TARGET_PAGE_MASK) {
1144 uint64_t left = TARGET_PAGE_ALIGN(now.offset_within_address_space)
1145 - now.offset_within_address_space;
1147 now.size = int128_min(int128_make64(left), now.size);
1148 register_subpage(d, &now);
1150 now.size = int128_zero();
1152 while (int128_ne(remain.size, now.size)) {
1153 remain.size = int128_sub(remain.size, now.size);
1154 remain.offset_within_address_space += int128_get64(now.size);
1155 remain.offset_within_region += int128_get64(now.size);
1157 if (int128_lt(remain.size, page_size)) {
1158 register_subpage(d, &now);
1159 } else if (remain.offset_within_address_space & ~TARGET_PAGE_MASK) {
1160 now.size = page_size;
1161 register_subpage(d, &now);
1163 now.size = int128_and(now.size, int128_neg(page_size));
1164 register_multipage(d, &now);
1169 void qemu_flush_coalesced_mmio_buffer(void)
1172 kvm_flush_coalesced_mmio_buffer();
1175 void qemu_mutex_lock_ramlist(void)
1177 qemu_mutex_lock(&ram_list.mutex);
1180 void qemu_mutex_unlock_ramlist(void)
1182 qemu_mutex_unlock(&ram_list.mutex);
1187 #include <sys/vfs.h>
1189 #define HUGETLBFS_MAGIC 0x958458f6
1191 static long gethugepagesize(const char *path, Error **errp)
1197 ret = statfs(path, &fs);
1198 } while (ret != 0 && errno == EINTR);
1201 error_setg_errno(errp, errno, "failed to get page size of file %s",
1209 static void *file_ram_alloc(RAMBlock *block,
1216 char *sanitized_name;
1221 Error *local_err = NULL;
1223 hpagesize = gethugepagesize(path, &local_err);
1225 error_propagate(errp, local_err);
1228 block->mr->align = hpagesize;
1230 if (memory < hpagesize) {
1231 error_setg(errp, "memory size 0x" RAM_ADDR_FMT " must be equal to "
1232 "or larger than huge page size 0x%" PRIx64,
1237 if (kvm_enabled() && !kvm_has_sync_mmu()) {
1239 "host lacks kvm mmu notifiers, -mem-path unsupported");
1243 if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
1244 /* Make name safe to use with mkstemp by replacing '/' with '_'. */
1245 sanitized_name = g_strdup(memory_region_name(block->mr));
1246 for (c = sanitized_name; *c != '\0'; c++) {
1252 filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
1254 g_free(sanitized_name);
1256 fd = mkstemp(filename);
1262 fd = open(path, O_RDWR | O_CREAT, 0644);
1266 error_setg_errno(errp, errno,
1267 "unable to create backing store for hugepages");
1271 memory = ROUND_UP(memory, hpagesize);
1274 * ftruncate is not supported by hugetlbfs in older
1275 * hosts, so don't bother bailing out on errors.
1276 * If anything goes wrong with it under other filesystems,
1279 if (ftruncate(fd, memory)) {
1280 perror("ftruncate");
1283 area = qemu_ram_mmap(fd, memory, hpagesize, block->flags & RAM_SHARED);
1284 if (area == MAP_FAILED) {
1285 error_setg_errno(errp, errno,
1286 "unable to map backing store for hugepages");
1292 os_mem_prealloc(fd, area, memory);
1303 /* Called with the ramlist lock held. */
1304 static ram_addr_t find_ram_offset(ram_addr_t size)
1306 RAMBlock *block, *next_block;
1307 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
1309 assert(size != 0); /* it would hand out same offset multiple times */
1311 if (QLIST_EMPTY_RCU(&ram_list.blocks)) {
1315 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1316 ram_addr_t end, next = RAM_ADDR_MAX;
1318 end = block->offset + block->max_length;
1320 QLIST_FOREACH_RCU(next_block, &ram_list.blocks, next) {
1321 if (next_block->offset >= end) {
1322 next = MIN(next, next_block->offset);
1325 if (next - end >= size && next - end < mingap) {
1327 mingap = next - end;
1331 if (offset == RAM_ADDR_MAX) {
1332 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
1340 ram_addr_t last_ram_offset(void)
1343 ram_addr_t last = 0;
1346 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1347 last = MAX(last, block->offset + block->max_length);
1353 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
1357 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
1358 if (!machine_dump_guest_core(current_machine)) {
1359 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
1361 perror("qemu_madvise");
1362 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
1363 "but dump_guest_core=off specified\n");
1368 /* Called within an RCU critical section, or while the ramlist lock
1371 static RAMBlock *find_ram_block(ram_addr_t addr)
1375 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1376 if (block->offset == addr) {
1384 const char *qemu_ram_get_idstr(RAMBlock *rb)
1389 /* Called with iothread lock held. */
1390 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
1392 RAMBlock *new_block, *block;
1395 new_block = find_ram_block(addr);
1397 assert(!new_block->idstr[0]);
1400 char *id = qdev_get_dev_path(dev);
1402 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1406 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1408 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1409 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1410 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1418 /* Called with iothread lock held. */
1419 void qemu_ram_unset_idstr(ram_addr_t addr)
1423 /* FIXME: arch_init.c assumes that this is not called throughout
1424 * migration. Ignore the problem since hot-unplug during migration
1425 * does not work anyway.
1429 block = find_ram_block(addr);
1431 memset(block->idstr, 0, sizeof(block->idstr));
1436 static int memory_try_enable_merging(void *addr, size_t len)
1438 if (!machine_mem_merge(current_machine)) {
1439 /* disabled by the user */
1443 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1446 /* Only legal before guest might have detected the memory size: e.g. on
1447 * incoming migration, or right after reset.
1449 * As memory core doesn't know how is memory accessed, it is up to
1450 * resize callback to update device state and/or add assertions to detect
1451 * misuse, if necessary.
1453 int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp)
1455 RAMBlock *block = find_ram_block(base);
1459 newsize = HOST_PAGE_ALIGN(newsize);
1461 if (block->used_length == newsize) {
1465 if (!(block->flags & RAM_RESIZEABLE)) {
1466 error_setg_errno(errp, EINVAL,
1467 "Length mismatch: %s: 0x" RAM_ADDR_FMT
1468 " in != 0x" RAM_ADDR_FMT, block->idstr,
1469 newsize, block->used_length);
1473 if (block->max_length < newsize) {
1474 error_setg_errno(errp, EINVAL,
1475 "Length too large: %s: 0x" RAM_ADDR_FMT
1476 " > 0x" RAM_ADDR_FMT, block->idstr,
1477 newsize, block->max_length);
1481 cpu_physical_memory_clear_dirty_range(block->offset, block->used_length);
1482 block->used_length = newsize;
1483 cpu_physical_memory_set_dirty_range(block->offset, block->used_length,
1485 memory_region_set_size(block->mr, newsize);
1486 if (block->resized) {
1487 block->resized(block->idstr, newsize, block->host);
1492 static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp)
1495 RAMBlock *last_block = NULL;
1496 ram_addr_t old_ram_size, new_ram_size;
1498 old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1500 qemu_mutex_lock_ramlist();
1501 new_block->offset = find_ram_offset(new_block->max_length);
1503 if (!new_block->host) {
1504 if (xen_enabled()) {
1505 xen_ram_alloc(new_block->offset, new_block->max_length,
1508 new_block->host = phys_mem_alloc(new_block->max_length,
1509 &new_block->mr->align);
1510 if (!new_block->host) {
1511 error_setg_errno(errp, errno,
1512 "cannot set up guest memory '%s'",
1513 memory_region_name(new_block->mr));
1514 qemu_mutex_unlock_ramlist();
1517 memory_try_enable_merging(new_block->host, new_block->max_length);
1521 new_ram_size = MAX(old_ram_size,
1522 (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
1523 if (new_ram_size > old_ram_size) {
1524 migration_bitmap_extend(old_ram_size, new_ram_size);
1526 /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
1527 * QLIST (which has an RCU-friendly variant) does not have insertion at
1528 * tail, so save the last element in last_block.
1530 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1532 if (block->max_length < new_block->max_length) {
1537 QLIST_INSERT_BEFORE_RCU(block, new_block, next);
1538 } else if (last_block) {
1539 QLIST_INSERT_AFTER_RCU(last_block, new_block, next);
1540 } else { /* list is empty */
1541 QLIST_INSERT_HEAD_RCU(&ram_list.blocks, new_block, next);
1543 ram_list.mru_block = NULL;
1545 /* Write list before version */
1548 qemu_mutex_unlock_ramlist();
1550 new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
1552 if (new_ram_size > old_ram_size) {
1555 /* ram_list.dirty_memory[] is protected by the iothread lock. */
1556 for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
1557 ram_list.dirty_memory[i] =
1558 bitmap_zero_extend(ram_list.dirty_memory[i],
1559 old_ram_size, new_ram_size);
1562 cpu_physical_memory_set_dirty_range(new_block->offset,
1563 new_block->used_length,
1566 if (new_block->host) {
1567 qemu_ram_setup_dump(new_block->host, new_block->max_length);
1568 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_HUGEPAGE);
1569 qemu_madvise(new_block->host, new_block->max_length, QEMU_MADV_DONTFORK);
1570 if (kvm_enabled()) {
1571 kvm_setup_guest_memory(new_block->host, new_block->max_length);
1575 return new_block->offset;
1579 ram_addr_t qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
1580 bool share, const char *mem_path,
1583 RAMBlock *new_block;
1585 Error *local_err = NULL;
1587 if (xen_enabled()) {
1588 error_setg(errp, "-mem-path not supported with Xen");
1592 if (phys_mem_alloc != qemu_anon_ram_alloc) {
1594 * file_ram_alloc() needs to allocate just like
1595 * phys_mem_alloc, but we haven't bothered to provide
1599 "-mem-path not supported with this accelerator");
1603 size = HOST_PAGE_ALIGN(size);
1604 new_block = g_malloc0(sizeof(*new_block));
1606 new_block->used_length = size;
1607 new_block->max_length = size;
1608 new_block->flags = share ? RAM_SHARED : 0;
1609 new_block->host = file_ram_alloc(new_block, size,
1611 if (!new_block->host) {
1616 addr = ram_block_add(new_block, &local_err);
1619 error_propagate(errp, local_err);
1627 ram_addr_t qemu_ram_alloc_internal(ram_addr_t size, ram_addr_t max_size,
1628 void (*resized)(const char*,
1631 void *host, bool resizeable,
1632 MemoryRegion *mr, Error **errp)
1634 RAMBlock *new_block;
1636 Error *local_err = NULL;
1638 size = HOST_PAGE_ALIGN(size);
1639 max_size = HOST_PAGE_ALIGN(max_size);
1640 new_block = g_malloc0(sizeof(*new_block));
1642 new_block->resized = resized;
1643 new_block->used_length = size;
1644 new_block->max_length = max_size;
1645 assert(max_size >= size);
1647 new_block->host = host;
1649 new_block->flags |= RAM_PREALLOC;
1652 new_block->flags |= RAM_RESIZEABLE;
1654 addr = ram_block_add(new_block, &local_err);
1657 error_propagate(errp, local_err);
1663 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1664 MemoryRegion *mr, Error **errp)
1666 return qemu_ram_alloc_internal(size, size, NULL, host, false, mr, errp);
1669 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr, Error **errp)
1671 return qemu_ram_alloc_internal(size, size, NULL, NULL, false, mr, errp);
1674 ram_addr_t qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t maxsz,
1675 void (*resized)(const char*,
1678 MemoryRegion *mr, Error **errp)
1680 return qemu_ram_alloc_internal(size, maxsz, resized, NULL, true, mr, errp);
1683 static void reclaim_ramblock(RAMBlock *block)
1685 if (block->flags & RAM_PREALLOC) {
1687 } else if (xen_enabled()) {
1688 xen_invalidate_map_cache_entry(block->host);
1690 } else if (block->fd >= 0) {
1691 qemu_ram_munmap(block->host, block->max_length);
1695 qemu_anon_ram_free(block->host, block->max_length);
1700 void qemu_ram_free(ram_addr_t addr)
1704 qemu_mutex_lock_ramlist();
1705 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1706 if (addr == block->offset) {
1707 QLIST_REMOVE_RCU(block, next);
1708 ram_list.mru_block = NULL;
1709 /* Write list before version */
1712 call_rcu(block, reclaim_ramblock, rcu);
1716 qemu_mutex_unlock_ramlist();
1720 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1727 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1728 offset = addr - block->offset;
1729 if (offset < block->max_length) {
1730 vaddr = ramblock_ptr(block, offset);
1731 if (block->flags & RAM_PREALLOC) {
1733 } else if (xen_enabled()) {
1737 if (block->fd >= 0) {
1738 flags |= (block->flags & RAM_SHARED ?
1739 MAP_SHARED : MAP_PRIVATE);
1740 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1741 flags, block->fd, offset);
1744 * Remap needs to match alloc. Accelerators that
1745 * set phys_mem_alloc never remap. If they did,
1746 * we'd need a remap hook here.
1748 assert(phys_mem_alloc == qemu_anon_ram_alloc);
1750 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1751 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1754 if (area != vaddr) {
1755 fprintf(stderr, "Could not remap addr: "
1756 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1760 memory_try_enable_merging(vaddr, length);
1761 qemu_ram_setup_dump(vaddr, length);
1766 #endif /* !_WIN32 */
1768 int qemu_get_ram_fd(ram_addr_t addr)
1774 block = qemu_get_ram_block(addr);
1780 void qemu_set_ram_fd(ram_addr_t addr, int fd)
1785 block = qemu_get_ram_block(addr);
1790 void *qemu_get_ram_block_host_ptr(ram_addr_t addr)
1796 block = qemu_get_ram_block(addr);
1797 ptr = ramblock_ptr(block, 0);
1802 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1803 * This should not be used for general purpose DMA. Use address_space_map
1804 * or address_space_rw instead. For local memory (e.g. video ram) that the
1805 * device owns, use memory_region_get_ram_ptr.
1807 * Called within RCU critical section.
1809 void *qemu_get_ram_ptr(ram_addr_t addr)
1811 RAMBlock *block = qemu_get_ram_block(addr);
1813 if (xen_enabled() && block->host == NULL) {
1814 /* We need to check if the requested address is in the RAM
1815 * because we don't want to map the entire memory in QEMU.
1816 * In that case just map until the end of the page.
1818 if (block->offset == 0) {
1819 return xen_map_cache(addr, 0, 0);
1822 block->host = xen_map_cache(block->offset, block->max_length, 1);
1824 return ramblock_ptr(block, addr - block->offset);
1827 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1828 * but takes a size argument.
1830 * Called within RCU critical section.
1832 static void *qemu_ram_ptr_length(ram_addr_t addr, hwaddr *size)
1835 ram_addr_t offset_inside_block;
1840 block = qemu_get_ram_block(addr);
1841 offset_inside_block = addr - block->offset;
1842 *size = MIN(*size, block->max_length - offset_inside_block);
1844 if (xen_enabled() && block->host == NULL) {
1845 /* We need to check if the requested address is in the RAM
1846 * because we don't want to map the entire memory in QEMU.
1847 * In that case just map the requested area.
1849 if (block->offset == 0) {
1850 return xen_map_cache(addr, *size, 1);
1853 block->host = xen_map_cache(block->offset, block->max_length, 1);
1856 return ramblock_ptr(block, offset_inside_block);
1860 * Translates a host ptr back to a RAMBlock, a ram_addr and an offset
1863 * ptr: Host pointer to look up
1864 * round_offset: If true round the result offset down to a page boundary
1865 * *ram_addr: set to result ram_addr
1866 * *offset: set to result offset within the RAMBlock
1868 * Returns: RAMBlock (or NULL if not found)
1870 * By the time this function returns, the returned pointer is not protected
1871 * by RCU anymore. If the caller is not within an RCU critical section and
1872 * does not hold the iothread lock, it must have other means of protecting the
1873 * pointer, such as a reference to the region that includes the incoming
1876 RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
1877 ram_addr_t *ram_addr,
1881 uint8_t *host = ptr;
1883 if (xen_enabled()) {
1885 *ram_addr = xen_ram_addr_from_mapcache(ptr);
1886 block = qemu_get_ram_block(*ram_addr);
1888 *offset = (host - block->host);
1895 block = atomic_rcu_read(&ram_list.mru_block);
1896 if (block && block->host && host - block->host < block->max_length) {
1900 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1901 /* This case append when the block is not mapped. */
1902 if (block->host == NULL) {
1905 if (host - block->host < block->max_length) {
1914 *offset = (host - block->host);
1916 *offset &= TARGET_PAGE_MASK;
1918 *ram_addr = block->offset + *offset;
1924 * Finds the named RAMBlock
1926 * name: The name of RAMBlock to find
1928 * Returns: RAMBlock (or NULL if not found)
1930 RAMBlock *qemu_ram_block_by_name(const char *name)
1934 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
1935 if (!strcmp(name, block->idstr)) {
1943 /* Some of the softmmu routines need to translate from a host pointer
1944 (typically a TLB entry) back to a ram offset. */
1945 MemoryRegion *qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1948 ram_addr_t offset; /* Not used */
1950 block = qemu_ram_block_from_host(ptr, false, ram_addr, &offset);
1959 /* Called within RCU critical section. */
1960 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1961 uint64_t val, unsigned size)
1963 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1964 tb_invalidate_phys_page_fast(ram_addr, size);
1968 stb_p(qemu_get_ram_ptr(ram_addr), val);
1971 stw_p(qemu_get_ram_ptr(ram_addr), val);
1974 stl_p(qemu_get_ram_ptr(ram_addr), val);
1979 /* Set both VGA and migration bits for simplicity and to remove
1980 * the notdirty callback faster.
1982 cpu_physical_memory_set_dirty_range(ram_addr, size,
1983 DIRTY_CLIENTS_NOCODE);
1984 /* we remove the notdirty callback only if the code has been
1986 if (!cpu_physical_memory_is_clean(ram_addr)) {
1987 tlb_set_dirty(current_cpu, current_cpu->mem_io_vaddr);
1991 static bool notdirty_mem_accepts(void *opaque, hwaddr addr,
1992 unsigned size, bool is_write)
1997 static const MemoryRegionOps notdirty_mem_ops = {
1998 .write = notdirty_mem_write,
1999 .valid.accepts = notdirty_mem_accepts,
2000 .endianness = DEVICE_NATIVE_ENDIAN,
2003 /* Generate a debug exception if a watchpoint has been hit. */
2004 static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
2006 CPUState *cpu = current_cpu;
2007 CPUArchState *env = cpu->env_ptr;
2008 target_ulong pc, cs_base;
2013 if (cpu->watchpoint_hit) {
2014 /* We re-entered the check after replacing the TB. Now raise
2015 * the debug interrupt so that is will trigger after the
2016 * current instruction. */
2017 cpu_interrupt(cpu, CPU_INTERRUPT_DEBUG);
2020 vaddr = (cpu->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2021 QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
2022 if (cpu_watchpoint_address_matches(wp, vaddr, len)
2023 && (wp->flags & flags)) {
2024 if (flags == BP_MEM_READ) {
2025 wp->flags |= BP_WATCHPOINT_HIT_READ;
2027 wp->flags |= BP_WATCHPOINT_HIT_WRITE;
2029 wp->hitaddr = vaddr;
2030 wp->hitattrs = attrs;
2031 if (!cpu->watchpoint_hit) {
2032 cpu->watchpoint_hit = wp;
2033 tb_check_watchpoint(cpu);
2034 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2035 cpu->exception_index = EXCP_DEBUG;
2038 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2039 tb_gen_code(cpu, pc, cs_base, cpu_flags, 1);
2040 cpu_resume_from_signal(cpu, NULL);
2044 wp->flags &= ~BP_WATCHPOINT_HIT;
2049 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2050 so these check for a hit then pass through to the normal out-of-line
2052 static MemTxResult watch_mem_read(void *opaque, hwaddr addr, uint64_t *pdata,
2053 unsigned size, MemTxAttrs attrs)
2058 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_READ);
2061 data = address_space_ldub(&address_space_memory, addr, attrs, &res);
2064 data = address_space_lduw(&address_space_memory, addr, attrs, &res);
2067 data = address_space_ldl(&address_space_memory, addr, attrs, &res);
2075 static MemTxResult watch_mem_write(void *opaque, hwaddr addr,
2076 uint64_t val, unsigned size,
2081 check_watchpoint(addr & ~TARGET_PAGE_MASK, size, attrs, BP_MEM_WRITE);
2084 address_space_stb(&address_space_memory, addr, val, attrs, &res);
2087 address_space_stw(&address_space_memory, addr, val, attrs, &res);
2090 address_space_stl(&address_space_memory, addr, val, attrs, &res);
2097 static const MemoryRegionOps watch_mem_ops = {
2098 .read_with_attrs = watch_mem_read,
2099 .write_with_attrs = watch_mem_write,
2100 .endianness = DEVICE_NATIVE_ENDIAN,
2103 static MemTxResult subpage_read(void *opaque, hwaddr addr, uint64_t *data,
2104 unsigned len, MemTxAttrs attrs)
2106 subpage_t *subpage = opaque;
2110 #if defined(DEBUG_SUBPAGE)
2111 printf("%s: subpage %p len %u addr " TARGET_FMT_plx "\n", __func__,
2112 subpage, len, addr);
2114 res = address_space_read(subpage->as, addr + subpage->base,
2121 *data = ldub_p(buf);
2124 *data = lduw_p(buf);
2137 static MemTxResult subpage_write(void *opaque, hwaddr addr,
2138 uint64_t value, unsigned len, MemTxAttrs attrs)
2140 subpage_t *subpage = opaque;
2143 #if defined(DEBUG_SUBPAGE)
2144 printf("%s: subpage %p len %u addr " TARGET_FMT_plx
2145 " value %"PRIx64"\n",
2146 __func__, subpage, len, addr, value);
2164 return address_space_write(subpage->as, addr + subpage->base,
2168 static bool subpage_accepts(void *opaque, hwaddr addr,
2169 unsigned len, bool is_write)
2171 subpage_t *subpage = opaque;
2172 #if defined(DEBUG_SUBPAGE)
2173 printf("%s: subpage %p %c len %u addr " TARGET_FMT_plx "\n",
2174 __func__, subpage, is_write ? 'w' : 'r', len, addr);
2177 return address_space_access_valid(subpage->as, addr + subpage->base,
2181 static const MemoryRegionOps subpage_ops = {
2182 .read_with_attrs = subpage_read,
2183 .write_with_attrs = subpage_write,
2184 .impl.min_access_size = 1,
2185 .impl.max_access_size = 8,
2186 .valid.min_access_size = 1,
2187 .valid.max_access_size = 8,
2188 .valid.accepts = subpage_accepts,
2189 .endianness = DEVICE_NATIVE_ENDIAN,
2192 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2197 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
2199 idx = SUBPAGE_IDX(start);
2200 eidx = SUBPAGE_IDX(end);
2201 #if defined(DEBUG_SUBPAGE)
2202 printf("%s: %p start %08x end %08x idx %08x eidx %08x section %d\n",
2203 __func__, mmio, start, end, idx, eidx, section);
2205 for (; idx <= eidx; idx++) {
2206 mmio->sub_section[idx] = section;
2212 static subpage_t *subpage_init(AddressSpace *as, hwaddr base)
2216 mmio = g_malloc0(sizeof(subpage_t));
2220 memory_region_init_io(&mmio->iomem, NULL, &subpage_ops, mmio,
2221 NULL, TARGET_PAGE_SIZE);
2222 mmio->iomem.subpage = true;
2223 #if defined(DEBUG_SUBPAGE)
2224 printf("%s: %p base " TARGET_FMT_plx " len %08x\n", __func__,
2225 mmio, base, TARGET_PAGE_SIZE);
2227 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, PHYS_SECTION_UNASSIGNED);
2232 static uint16_t dummy_section(PhysPageMap *map, AddressSpace *as,
2236 MemoryRegionSection section = {
2237 .address_space = as,
2239 .offset_within_address_space = 0,
2240 .offset_within_region = 0,
2241 .size = int128_2_64(),
2244 return phys_section_add(map, §ion);
2247 MemoryRegion *iotlb_to_region(CPUState *cpu, hwaddr index, MemTxAttrs attrs)
2249 int asidx = cpu_asidx_from_attrs(cpu, attrs);
2250 CPUAddressSpace *cpuas = &cpu->cpu_ases[asidx];
2251 AddressSpaceDispatch *d = atomic_rcu_read(&cpuas->memory_dispatch);
2252 MemoryRegionSection *sections = d->map.sections;
2254 return sections[index & ~TARGET_PAGE_MASK].mr;
2257 static void io_mem_init(void)
2259 memory_region_init_io(&io_mem_rom, NULL, &unassigned_mem_ops, NULL, NULL, UINT64_MAX);
2260 memory_region_init_io(&io_mem_unassigned, NULL, &unassigned_mem_ops, NULL,
2262 memory_region_init_io(&io_mem_notdirty, NULL, ¬dirty_mem_ops, NULL,
2264 memory_region_init_io(&io_mem_watch, NULL, &watch_mem_ops, NULL,
2268 static void mem_begin(MemoryListener *listener)
2270 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2271 AddressSpaceDispatch *d = g_new0(AddressSpaceDispatch, 1);
2274 n = dummy_section(&d->map, as, &io_mem_unassigned);
2275 assert(n == PHYS_SECTION_UNASSIGNED);
2276 n = dummy_section(&d->map, as, &io_mem_notdirty);
2277 assert(n == PHYS_SECTION_NOTDIRTY);
2278 n = dummy_section(&d->map, as, &io_mem_rom);
2279 assert(n == PHYS_SECTION_ROM);
2280 n = dummy_section(&d->map, as, &io_mem_watch);
2281 assert(n == PHYS_SECTION_WATCH);
2283 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .skip = 1 };
2285 as->next_dispatch = d;
2288 static void address_space_dispatch_free(AddressSpaceDispatch *d)
2290 phys_sections_free(&d->map);
2294 static void mem_commit(MemoryListener *listener)
2296 AddressSpace *as = container_of(listener, AddressSpace, dispatch_listener);
2297 AddressSpaceDispatch *cur = as->dispatch;
2298 AddressSpaceDispatch *next = as->next_dispatch;
2300 phys_page_compact_all(next, next->map.nodes_nb);
2302 atomic_rcu_set(&as->dispatch, next);
2304 call_rcu(cur, address_space_dispatch_free, rcu);
2308 static void tcg_commit(MemoryListener *listener)
2310 CPUAddressSpace *cpuas;
2311 AddressSpaceDispatch *d;
2313 /* since each CPU stores ram addresses in its TLB cache, we must
2314 reset the modified entries */
2315 cpuas = container_of(listener, CPUAddressSpace, tcg_as_listener);
2316 cpu_reloading_memory_map();
2317 /* The CPU and TLB are protected by the iothread lock.
2318 * We reload the dispatch pointer now because cpu_reloading_memory_map()
2319 * may have split the RCU critical section.
2321 d = atomic_rcu_read(&cpuas->as->dispatch);
2322 cpuas->memory_dispatch = d;
2323 tlb_flush(cpuas->cpu, 1);
2326 void address_space_init_dispatch(AddressSpace *as)
2328 as->dispatch = NULL;
2329 as->dispatch_listener = (MemoryListener) {
2331 .commit = mem_commit,
2332 .region_add = mem_add,
2333 .region_nop = mem_add,
2336 memory_listener_register(&as->dispatch_listener, as);
2339 void address_space_unregister(AddressSpace *as)
2341 memory_listener_unregister(&as->dispatch_listener);
2344 void address_space_destroy_dispatch(AddressSpace *as)
2346 AddressSpaceDispatch *d = as->dispatch;
2348 atomic_rcu_set(&as->dispatch, NULL);
2350 call_rcu(d, address_space_dispatch_free, rcu);
2354 static void memory_map_init(void)
2356 system_memory = g_malloc(sizeof(*system_memory));
2358 memory_region_init(system_memory, NULL, "system", UINT64_MAX);
2359 address_space_init(&address_space_memory, system_memory, "memory");
2361 system_io = g_malloc(sizeof(*system_io));
2362 memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io",
2364 address_space_init(&address_space_io, system_io, "I/O");
2367 MemoryRegion *get_system_memory(void)
2369 return system_memory;
2372 MemoryRegion *get_system_io(void)
2377 #endif /* !defined(CONFIG_USER_ONLY) */
2379 /* physical memory access (slow version, mainly for debug) */
2380 #if defined(CONFIG_USER_ONLY)
2381 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
2382 uint8_t *buf, int len, int is_write)
2389 page = addr & TARGET_PAGE_MASK;
2390 l = (page + TARGET_PAGE_SIZE) - addr;
2393 flags = page_get_flags(page);
2394 if (!(flags & PAGE_VALID))
2397 if (!(flags & PAGE_WRITE))
2399 /* XXX: this code should not depend on lock_user */
2400 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
2403 unlock_user(p, addr, l);
2405 if (!(flags & PAGE_READ))
2407 /* XXX: this code should not depend on lock_user */
2408 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
2411 unlock_user(p, addr, 0);
2422 static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
2425 uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr);
2426 /* No early return if dirty_log_mask is or becomes 0, because
2427 * cpu_physical_memory_set_dirty_range will still call
2428 * xen_modified_memory.
2430 if (dirty_log_mask) {
2432 cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask);
2434 if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
2435 tb_invalidate_phys_range(addr, addr + length);
2436 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
2438 cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
2441 static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr)
2443 unsigned access_size_max = mr->ops->valid.max_access_size;
2445 /* Regions are assumed to support 1-4 byte accesses unless
2446 otherwise specified. */
2447 if (access_size_max == 0) {
2448 access_size_max = 4;
2451 /* Bound the maximum access by the alignment of the address. */
2452 if (!mr->ops->impl.unaligned) {
2453 unsigned align_size_max = addr & -addr;
2454 if (align_size_max != 0 && align_size_max < access_size_max) {
2455 access_size_max = align_size_max;
2459 /* Don't attempt accesses larger than the maximum. */
2460 if (l > access_size_max) {
2461 l = access_size_max;
2468 static bool prepare_mmio_access(MemoryRegion *mr)
2470 bool unlocked = !qemu_mutex_iothread_locked();
2471 bool release_lock = false;
2473 if (unlocked && mr->global_locking) {
2474 qemu_mutex_lock_iothread();
2476 release_lock = true;
2478 if (mr->flush_coalesced_mmio) {
2480 qemu_mutex_lock_iothread();
2482 qemu_flush_coalesced_mmio_buffer();
2484 qemu_mutex_unlock_iothread();
2488 return release_lock;
2491 /* Called within RCU critical section. */
2492 static MemTxResult address_space_write_continue(AddressSpace *as, hwaddr addr,
2495 int len, hwaddr addr1,
2496 hwaddr l, MemoryRegion *mr)
2500 MemTxResult result = MEMTX_OK;
2501 bool release_lock = false;
2504 if (!memory_access_is_direct(mr, true)) {
2505 release_lock |= prepare_mmio_access(mr);
2506 l = memory_access_size(mr, l, addr1);
2507 /* XXX: could force current_cpu to NULL to avoid
2511 /* 64 bit write access */
2513 result |= memory_region_dispatch_write(mr, addr1, val, 8,
2517 /* 32 bit write access */
2519 result |= memory_region_dispatch_write(mr, addr1, val, 4,
2523 /* 16 bit write access */
2525 result |= memory_region_dispatch_write(mr, addr1, val, 2,
2529 /* 8 bit write access */
2531 result |= memory_region_dispatch_write(mr, addr1, val, 1,
2538 addr1 += memory_region_get_ram_addr(mr);
2540 ptr = qemu_get_ram_ptr(addr1);
2541 memcpy(ptr, buf, l);
2542 invalidate_and_set_dirty(mr, addr1, l);
2546 qemu_mutex_unlock_iothread();
2547 release_lock = false;
2559 mr = address_space_translate(as, addr, &addr1, &l, true);
2565 MemTxResult address_space_write(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2566 const uint8_t *buf, int len)
2571 MemTxResult result = MEMTX_OK;
2576 mr = address_space_translate(as, addr, &addr1, &l, true);
2577 result = address_space_write_continue(as, addr, attrs, buf, len,
2585 /* Called within RCU critical section. */
2586 MemTxResult address_space_read_continue(AddressSpace *as, hwaddr addr,
2587 MemTxAttrs attrs, uint8_t *buf,
2588 int len, hwaddr addr1, hwaddr l,
2593 MemTxResult result = MEMTX_OK;
2594 bool release_lock = false;
2597 if (!memory_access_is_direct(mr, false)) {
2599 release_lock |= prepare_mmio_access(mr);
2600 l = memory_access_size(mr, l, addr1);
2603 /* 64 bit read access */
2604 result |= memory_region_dispatch_read(mr, addr1, &val, 8,
2609 /* 32 bit read access */
2610 result |= memory_region_dispatch_read(mr, addr1, &val, 4,
2615 /* 16 bit read access */
2616 result |= memory_region_dispatch_read(mr, addr1, &val, 2,
2621 /* 8 bit read access */
2622 result |= memory_region_dispatch_read(mr, addr1, &val, 1,
2631 ptr = qemu_get_ram_ptr(mr->ram_addr + addr1);
2632 memcpy(buf, ptr, l);
2636 qemu_mutex_unlock_iothread();
2637 release_lock = false;
2649 mr = address_space_translate(as, addr, &addr1, &l, false);
2655 MemTxResult address_space_read_full(AddressSpace *as, hwaddr addr,
2656 MemTxAttrs attrs, uint8_t *buf, int len)
2661 MemTxResult result = MEMTX_OK;
2666 mr = address_space_translate(as, addr, &addr1, &l, false);
2667 result = address_space_read_continue(as, addr, attrs, buf, len,
2675 MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs,
2676 uint8_t *buf, int len, bool is_write)
2679 return address_space_write(as, addr, attrs, (uint8_t *)buf, len);
2681 return address_space_read(as, addr, attrs, (uint8_t *)buf, len);
2685 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
2686 int len, int is_write)
2688 address_space_rw(&address_space_memory, addr, MEMTXATTRS_UNSPECIFIED,
2689 buf, len, is_write);
2692 enum write_rom_type {
2697 static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as,
2698 hwaddr addr, const uint8_t *buf, int len, enum write_rom_type type)
2708 mr = address_space_translate(as, addr, &addr1, &l, true);
2710 if (!(memory_region_is_ram(mr) ||
2711 memory_region_is_romd(mr))) {
2712 l = memory_access_size(mr, l, addr1);
2714 addr1 += memory_region_get_ram_addr(mr);
2716 ptr = qemu_get_ram_ptr(addr1);
2719 memcpy(ptr, buf, l);
2720 invalidate_and_set_dirty(mr, addr1, l);
2723 flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l);
2734 /* used for ROM loading : can write in RAM and ROM */
2735 void cpu_physical_memory_write_rom(AddressSpace *as, hwaddr addr,
2736 const uint8_t *buf, int len)
2738 cpu_physical_memory_write_rom_internal(as, addr, buf, len, WRITE_DATA);
2741 void cpu_flush_icache_range(hwaddr start, int len)
2744 * This function should do the same thing as an icache flush that was
2745 * triggered from within the guest. For TCG we are always cache coherent,
2746 * so there is no need to flush anything. For KVM / Xen we need to flush
2747 * the host's instruction cache at least.
2749 if (tcg_enabled()) {
2753 cpu_physical_memory_write_rom_internal(&address_space_memory,
2754 start, NULL, len, FLUSH_CACHE);
2765 static BounceBuffer bounce;
2767 typedef struct MapClient {
2769 QLIST_ENTRY(MapClient) link;
2772 QemuMutex map_client_list_lock;
2773 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2774 = QLIST_HEAD_INITIALIZER(map_client_list);
2776 static void cpu_unregister_map_client_do(MapClient *client)
2778 QLIST_REMOVE(client, link);
2782 static void cpu_notify_map_clients_locked(void)
2786 while (!QLIST_EMPTY(&map_client_list)) {
2787 client = QLIST_FIRST(&map_client_list);
2788 qemu_bh_schedule(client->bh);
2789 cpu_unregister_map_client_do(client);
2793 void cpu_register_map_client(QEMUBH *bh)
2795 MapClient *client = g_malloc(sizeof(*client));
2797 qemu_mutex_lock(&map_client_list_lock);
2799 QLIST_INSERT_HEAD(&map_client_list, client, link);
2800 if (!atomic_read(&bounce.in_use)) {
2801 cpu_notify_map_clients_locked();
2803 qemu_mutex_unlock(&map_client_list_lock);
2806 void cpu_exec_init_all(void)
2808 qemu_mutex_init(&ram_list.mutex);
2811 qemu_mutex_init(&map_client_list_lock);
2814 void cpu_unregister_map_client(QEMUBH *bh)
2818 qemu_mutex_lock(&map_client_list_lock);
2819 QLIST_FOREACH(client, &map_client_list, link) {
2820 if (client->bh == bh) {
2821 cpu_unregister_map_client_do(client);
2825 qemu_mutex_unlock(&map_client_list_lock);
2828 static void cpu_notify_map_clients(void)
2830 qemu_mutex_lock(&map_client_list_lock);
2831 cpu_notify_map_clients_locked();
2832 qemu_mutex_unlock(&map_client_list_lock);
2835 bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write)
2843 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2844 if (!memory_access_is_direct(mr, is_write)) {
2845 l = memory_access_size(mr, l, addr);
2846 if (!memory_region_access_valid(mr, xlat, l, is_write)) {
2858 /* Map a physical memory region into a host virtual address.
2859 * May map a subset of the requested range, given by and returned in *plen.
2860 * May return NULL if resources needed to perform the mapping are exhausted.
2861 * Use only for reads OR writes - not for read-modify-write operations.
2862 * Use cpu_register_map_client() to know when retrying the map operation is
2863 * likely to succeed.
2865 void *address_space_map(AddressSpace *as,
2872 hwaddr l, xlat, base;
2873 MemoryRegion *mr, *this_mr;
2883 mr = address_space_translate(as, addr, &xlat, &l, is_write);
2885 if (!memory_access_is_direct(mr, is_write)) {
2886 if (atomic_xchg(&bounce.in_use, true)) {
2890 /* Avoid unbounded allocations */
2891 l = MIN(l, TARGET_PAGE_SIZE);
2892 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, l);
2896 memory_region_ref(mr);
2899 address_space_read(as, addr, MEMTXATTRS_UNSPECIFIED,
2905 return bounce.buffer;
2909 raddr = memory_region_get_ram_addr(mr);
2920 this_mr = address_space_translate(as, addr, &xlat, &l, is_write);
2921 if (this_mr != mr || xlat != base + done) {
2926 memory_region_ref(mr);
2928 ptr = qemu_ram_ptr_length(raddr + base, plen);
2934 /* Unmaps a memory region previously mapped by address_space_map().
2935 * Will also mark the memory as dirty if is_write == 1. access_len gives
2936 * the amount of memory that was actually read or written by the caller.
2938 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2939 int is_write, hwaddr access_len)
2941 if (buffer != bounce.buffer) {
2945 mr = qemu_ram_addr_from_host(buffer, &addr1);
2948 invalidate_and_set_dirty(mr, addr1, access_len);
2950 if (xen_enabled()) {
2951 xen_invalidate_map_cache_entry(buffer);
2953 memory_region_unref(mr);
2957 address_space_write(as, bounce.addr, MEMTXATTRS_UNSPECIFIED,
2958 bounce.buffer, access_len);
2960 qemu_vfree(bounce.buffer);
2961 bounce.buffer = NULL;
2962 memory_region_unref(bounce.mr);
2963 atomic_mb_set(&bounce.in_use, false);
2964 cpu_notify_map_clients();
2967 void *cpu_physical_memory_map(hwaddr addr,
2971 return address_space_map(&address_space_memory, addr, plen, is_write);
2974 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2975 int is_write, hwaddr access_len)
2977 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2980 /* warning: addr must be aligned */
2981 static inline uint32_t address_space_ldl_internal(AddressSpace *as, hwaddr addr,
2983 MemTxResult *result,
2984 enum device_endian endian)
2992 bool release_lock = false;
2995 mr = address_space_translate(as, addr, &addr1, &l, false);
2996 if (l < 4 || !memory_access_is_direct(mr, false)) {
2997 release_lock |= prepare_mmio_access(mr);
3000 r = memory_region_dispatch_read(mr, addr1, &val, 4, attrs);
3001 #if defined(TARGET_WORDS_BIGENDIAN)
3002 if (endian == DEVICE_LITTLE_ENDIAN) {
3006 if (endian == DEVICE_BIG_ENDIAN) {
3012 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3016 case DEVICE_LITTLE_ENDIAN:
3017 val = ldl_le_p(ptr);
3019 case DEVICE_BIG_ENDIAN:
3020 val = ldl_be_p(ptr);
3032 qemu_mutex_unlock_iothread();
3038 uint32_t address_space_ldl(AddressSpace *as, hwaddr addr,
3039 MemTxAttrs attrs, MemTxResult *result)
3041 return address_space_ldl_internal(as, addr, attrs, result,
3042 DEVICE_NATIVE_ENDIAN);
3045 uint32_t address_space_ldl_le(AddressSpace *as, hwaddr addr,
3046 MemTxAttrs attrs, MemTxResult *result)
3048 return address_space_ldl_internal(as, addr, attrs, result,
3049 DEVICE_LITTLE_ENDIAN);
3052 uint32_t address_space_ldl_be(AddressSpace *as, hwaddr addr,
3053 MemTxAttrs attrs, MemTxResult *result)
3055 return address_space_ldl_internal(as, addr, attrs, result,
3059 uint32_t ldl_phys(AddressSpace *as, hwaddr addr)
3061 return address_space_ldl(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3064 uint32_t ldl_le_phys(AddressSpace *as, hwaddr addr)
3066 return address_space_ldl_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3069 uint32_t ldl_be_phys(AddressSpace *as, hwaddr addr)
3071 return address_space_ldl_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3074 /* warning: addr must be aligned */
3075 static inline uint64_t address_space_ldq_internal(AddressSpace *as, hwaddr addr,
3077 MemTxResult *result,
3078 enum device_endian endian)
3086 bool release_lock = false;
3089 mr = address_space_translate(as, addr, &addr1, &l,
3091 if (l < 8 || !memory_access_is_direct(mr, false)) {
3092 release_lock |= prepare_mmio_access(mr);
3095 r = memory_region_dispatch_read(mr, addr1, &val, 8, attrs);
3096 #if defined(TARGET_WORDS_BIGENDIAN)
3097 if (endian == DEVICE_LITTLE_ENDIAN) {
3101 if (endian == DEVICE_BIG_ENDIAN) {
3107 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3111 case DEVICE_LITTLE_ENDIAN:
3112 val = ldq_le_p(ptr);
3114 case DEVICE_BIG_ENDIAN:
3115 val = ldq_be_p(ptr);
3127 qemu_mutex_unlock_iothread();
3133 uint64_t address_space_ldq(AddressSpace *as, hwaddr addr,
3134 MemTxAttrs attrs, MemTxResult *result)
3136 return address_space_ldq_internal(as, addr, attrs, result,
3137 DEVICE_NATIVE_ENDIAN);
3140 uint64_t address_space_ldq_le(AddressSpace *as, hwaddr addr,
3141 MemTxAttrs attrs, MemTxResult *result)
3143 return address_space_ldq_internal(as, addr, attrs, result,
3144 DEVICE_LITTLE_ENDIAN);
3147 uint64_t address_space_ldq_be(AddressSpace *as, hwaddr addr,
3148 MemTxAttrs attrs, MemTxResult *result)
3150 return address_space_ldq_internal(as, addr, attrs, result,
3154 uint64_t ldq_phys(AddressSpace *as, hwaddr addr)
3156 return address_space_ldq(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3159 uint64_t ldq_le_phys(AddressSpace *as, hwaddr addr)
3161 return address_space_ldq_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3164 uint64_t ldq_be_phys(AddressSpace *as, hwaddr addr)
3166 return address_space_ldq_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3170 uint32_t address_space_ldub(AddressSpace *as, hwaddr addr,
3171 MemTxAttrs attrs, MemTxResult *result)
3176 r = address_space_rw(as, addr, attrs, &val, 1, 0);
3183 uint32_t ldub_phys(AddressSpace *as, hwaddr addr)
3185 return address_space_ldub(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3188 /* warning: addr must be aligned */
3189 static inline uint32_t address_space_lduw_internal(AddressSpace *as,
3192 MemTxResult *result,
3193 enum device_endian endian)
3201 bool release_lock = false;
3204 mr = address_space_translate(as, addr, &addr1, &l,
3206 if (l < 2 || !memory_access_is_direct(mr, false)) {
3207 release_lock |= prepare_mmio_access(mr);
3210 r = memory_region_dispatch_read(mr, addr1, &val, 2, attrs);
3211 #if defined(TARGET_WORDS_BIGENDIAN)
3212 if (endian == DEVICE_LITTLE_ENDIAN) {
3216 if (endian == DEVICE_BIG_ENDIAN) {
3222 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(mr)
3226 case DEVICE_LITTLE_ENDIAN:
3227 val = lduw_le_p(ptr);
3229 case DEVICE_BIG_ENDIAN:
3230 val = lduw_be_p(ptr);
3242 qemu_mutex_unlock_iothread();
3248 uint32_t address_space_lduw(AddressSpace *as, hwaddr addr,
3249 MemTxAttrs attrs, MemTxResult *result)
3251 return address_space_lduw_internal(as, addr, attrs, result,
3252 DEVICE_NATIVE_ENDIAN);
3255 uint32_t address_space_lduw_le(AddressSpace *as, hwaddr addr,
3256 MemTxAttrs attrs, MemTxResult *result)
3258 return address_space_lduw_internal(as, addr, attrs, result,
3259 DEVICE_LITTLE_ENDIAN);
3262 uint32_t address_space_lduw_be(AddressSpace *as, hwaddr addr,
3263 MemTxAttrs attrs, MemTxResult *result)
3265 return address_space_lduw_internal(as, addr, attrs, result,
3269 uint32_t lduw_phys(AddressSpace *as, hwaddr addr)
3271 return address_space_lduw(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3274 uint32_t lduw_le_phys(AddressSpace *as, hwaddr addr)
3276 return address_space_lduw_le(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3279 uint32_t lduw_be_phys(AddressSpace *as, hwaddr addr)
3281 return address_space_lduw_be(as, addr, MEMTXATTRS_UNSPECIFIED, NULL);
3284 /* warning: addr must be aligned. The ram page is not masked as dirty
3285 and the code inside is not invalidated. It is useful if the dirty
3286 bits are used to track modified PTEs */
3287 void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val,
3288 MemTxAttrs attrs, MemTxResult *result)
3295 uint8_t dirty_log_mask;
3296 bool release_lock = false;
3299 mr = address_space_translate(as, addr, &addr1, &l,
3301 if (l < 4 || !memory_access_is_direct(mr, true)) {
3302 release_lock |= prepare_mmio_access(mr);
3304 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3306 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3307 ptr = qemu_get_ram_ptr(addr1);
3310 dirty_log_mask = memory_region_get_dirty_log_mask(mr);
3311 dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
3312 cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask);
3319 qemu_mutex_unlock_iothread();
3324 void stl_phys_notdirty(AddressSpace *as, hwaddr addr, uint32_t val)
3326 address_space_stl_notdirty(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3329 /* warning: addr must be aligned */
3330 static inline void address_space_stl_internal(AddressSpace *as,
3331 hwaddr addr, uint32_t val,
3333 MemTxResult *result,
3334 enum device_endian endian)
3341 bool release_lock = false;
3344 mr = address_space_translate(as, addr, &addr1, &l,
3346 if (l < 4 || !memory_access_is_direct(mr, true)) {
3347 release_lock |= prepare_mmio_access(mr);
3349 #if defined(TARGET_WORDS_BIGENDIAN)
3350 if (endian == DEVICE_LITTLE_ENDIAN) {
3354 if (endian == DEVICE_BIG_ENDIAN) {
3358 r = memory_region_dispatch_write(mr, addr1, val, 4, attrs);
3361 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3362 ptr = qemu_get_ram_ptr(addr1);
3364 case DEVICE_LITTLE_ENDIAN:
3367 case DEVICE_BIG_ENDIAN:
3374 invalidate_and_set_dirty(mr, addr1, 4);
3381 qemu_mutex_unlock_iothread();
3386 void address_space_stl(AddressSpace *as, hwaddr addr, uint32_t val,
3387 MemTxAttrs attrs, MemTxResult *result)
3389 address_space_stl_internal(as, addr, val, attrs, result,
3390 DEVICE_NATIVE_ENDIAN);
3393 void address_space_stl_le(AddressSpace *as, hwaddr addr, uint32_t val,
3394 MemTxAttrs attrs, MemTxResult *result)
3396 address_space_stl_internal(as, addr, val, attrs, result,
3397 DEVICE_LITTLE_ENDIAN);
3400 void address_space_stl_be(AddressSpace *as, hwaddr addr, uint32_t val,
3401 MemTxAttrs attrs, MemTxResult *result)
3403 address_space_stl_internal(as, addr, val, attrs, result,
3407 void stl_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3409 address_space_stl(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3412 void stl_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3414 address_space_stl_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3417 void stl_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3419 address_space_stl_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3423 void address_space_stb(AddressSpace *as, hwaddr addr, uint32_t val,
3424 MemTxAttrs attrs, MemTxResult *result)
3429 r = address_space_rw(as, addr, attrs, &v, 1, 1);
3435 void stb_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3437 address_space_stb(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3440 /* warning: addr must be aligned */
3441 static inline void address_space_stw_internal(AddressSpace *as,
3442 hwaddr addr, uint32_t val,
3444 MemTxResult *result,
3445 enum device_endian endian)
3452 bool release_lock = false;
3455 mr = address_space_translate(as, addr, &addr1, &l, true);
3456 if (l < 2 || !memory_access_is_direct(mr, true)) {
3457 release_lock |= prepare_mmio_access(mr);
3459 #if defined(TARGET_WORDS_BIGENDIAN)
3460 if (endian == DEVICE_LITTLE_ENDIAN) {
3464 if (endian == DEVICE_BIG_ENDIAN) {
3468 r = memory_region_dispatch_write(mr, addr1, val, 2, attrs);
3471 addr1 += memory_region_get_ram_addr(mr) & TARGET_PAGE_MASK;
3472 ptr = qemu_get_ram_ptr(addr1);
3474 case DEVICE_LITTLE_ENDIAN:
3477 case DEVICE_BIG_ENDIAN:
3484 invalidate_and_set_dirty(mr, addr1, 2);
3491 qemu_mutex_unlock_iothread();
3496 void address_space_stw(AddressSpace *as, hwaddr addr, uint32_t val,
3497 MemTxAttrs attrs, MemTxResult *result)
3499 address_space_stw_internal(as, addr, val, attrs, result,
3500 DEVICE_NATIVE_ENDIAN);
3503 void address_space_stw_le(AddressSpace *as, hwaddr addr, uint32_t val,
3504 MemTxAttrs attrs, MemTxResult *result)
3506 address_space_stw_internal(as, addr, val, attrs, result,
3507 DEVICE_LITTLE_ENDIAN);
3510 void address_space_stw_be(AddressSpace *as, hwaddr addr, uint32_t val,
3511 MemTxAttrs attrs, MemTxResult *result)
3513 address_space_stw_internal(as, addr, val, attrs, result,
3517 void stw_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3519 address_space_stw(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3522 void stw_le_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3524 address_space_stw_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3527 void stw_be_phys(AddressSpace *as, hwaddr addr, uint32_t val)
3529 address_space_stw_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3533 void address_space_stq(AddressSpace *as, hwaddr addr, uint64_t val,
3534 MemTxAttrs attrs, MemTxResult *result)
3538 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3544 void address_space_stq_le(AddressSpace *as, hwaddr addr, uint64_t val,
3545 MemTxAttrs attrs, MemTxResult *result)
3548 val = cpu_to_le64(val);
3549 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3554 void address_space_stq_be(AddressSpace *as, hwaddr addr, uint64_t val,
3555 MemTxAttrs attrs, MemTxResult *result)
3558 val = cpu_to_be64(val);
3559 r = address_space_rw(as, addr, attrs, (void *) &val, 8, 1);
3565 void stq_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3567 address_space_stq(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3570 void stq_le_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3572 address_space_stq_le(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3575 void stq_be_phys(AddressSpace *as, hwaddr addr, uint64_t val)
3577 address_space_stq_be(as, addr, val, MEMTXATTRS_UNSPECIFIED, NULL);
3580 /* virtual memory access for debug (includes writing to ROM) */
3581 int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr,
3582 uint8_t *buf, int len, int is_write)
3589 page = addr & TARGET_PAGE_MASK;
3590 phys_addr = cpu_get_phys_page_debug(cpu, page);
3591 /* if no physical page mapped, return an error */
3592 if (phys_addr == -1)
3594 l = (page + TARGET_PAGE_SIZE) - addr;
3597 phys_addr += (addr & ~TARGET_PAGE_MASK);
3599 cpu_physical_memory_write_rom(cpu->as, phys_addr, buf, l);
3601 address_space_rw(cpu->as, phys_addr, MEMTXATTRS_UNSPECIFIED,
3612 * Allows code that needs to deal with migration bitmaps etc to still be built
3613 * target independent.
3615 size_t qemu_target_page_bits(void)
3617 return TARGET_PAGE_BITS;
3623 * A helper function for the _utterly broken_ virtio device model to find out if
3624 * it's running on a big endian machine. Don't do this at home kids!
3626 bool target_words_bigendian(void);
3627 bool target_words_bigendian(void)
3629 #if defined(TARGET_WORDS_BIGENDIAN)
3636 #ifndef CONFIG_USER_ONLY
3637 bool cpu_physical_memory_is_io(hwaddr phys_addr)
3644 mr = address_space_translate(&address_space_memory,
3645 phys_addr, &phys_addr, &l, false);
3647 res = !(memory_region_is_ram(mr) || memory_region_is_romd(mr));
3652 int qemu_ram_foreach_block(RAMBlockIterFunc func, void *opaque)
3658 QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
3659 ret = func(block->idstr, block->host, block->offset,
3660 block->used_length, opaque);