2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
35 #include "qemu-timer.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
47 #include <machine/profile.h>
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
62 #include "memory-internal.h"
64 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
95 #if !defined(CONFIG_USER_ONLY)
97 static int in_migration;
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
145 /* Size of the L2 (and L3, etc) page tables. */
147 #define L2_SIZE (1 << L2_BITS)
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
159 #define V_L1_BITS V_L1_BITS_REM
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
174 #if !defined(CONFIG_USER_ONLY)
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
189 static void io_mem_init(void);
190 static void memory_map_init(void);
191 static void *qemu_safe_ram_ptr(ram_addr_t addr);
193 static MemoryRegion io_mem_watch;
195 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
196 tb_page_addr_t phys_page2);
199 static int tb_flush_count;
200 static int tb_phys_invalidate_count;
203 static inline void map_exec(void *addr, long size)
206 VirtualProtect(addr, size,
207 PAGE_EXECUTE_READWRITE, &old_protect);
211 static inline void map_exec(void *addr, long size)
213 unsigned long start, end, page_size;
215 page_size = getpagesize();
216 start = (unsigned long)addr;
217 start &= ~(page_size - 1);
219 end = (unsigned long)addr + size;
220 end += page_size - 1;
221 end &= ~(page_size - 1);
223 mprotect((void *)start, end - start,
224 PROT_READ | PROT_WRITE | PROT_EXEC);
228 static void page_init(void)
230 /* NOTE: we can always suppose that qemu_host_page_size >=
234 SYSTEM_INFO system_info;
236 GetSystemInfo(&system_info);
237 qemu_real_host_page_size = system_info.dwPageSize;
240 qemu_real_host_page_size = getpagesize();
242 if (qemu_host_page_size == 0)
243 qemu_host_page_size = qemu_real_host_page_size;
244 if (qemu_host_page_size < TARGET_PAGE_SIZE)
245 qemu_host_page_size = TARGET_PAGE_SIZE;
246 qemu_host_page_mask = ~(qemu_host_page_size - 1);
248 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
250 #ifdef HAVE_KINFO_GETVMMAP
251 struct kinfo_vmentry *freep;
254 freep = kinfo_getvmmap(getpid(), &cnt);
257 for (i = 0; i < cnt; i++) {
258 unsigned long startaddr, endaddr;
260 startaddr = freep[i].kve_start;
261 endaddr = freep[i].kve_end;
262 if (h2g_valid(startaddr)) {
263 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
265 if (h2g_valid(endaddr)) {
266 endaddr = h2g(endaddr);
267 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
269 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
271 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
282 last_brk = (unsigned long)sbrk(0);
284 f = fopen("/compat/linux/proc/self/maps", "r");
289 unsigned long startaddr, endaddr;
292 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
294 if (n == 2 && h2g_valid(startaddr)) {
295 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
297 if (h2g_valid(endaddr)) {
298 endaddr = h2g(endaddr);
302 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
314 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
320 #if defined(CONFIG_USER_ONLY)
321 /* We can't use g_malloc because it may recurse into a locked mutex. */
322 # define ALLOC(P, SIZE) \
324 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
325 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
328 # define ALLOC(P, SIZE) \
329 do { P = g_malloc0(SIZE); } while (0)
332 /* Level 1. Always allocated. */
333 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
336 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
343 ALLOC(p, sizeof(void *) * L2_SIZE);
347 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
355 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
361 return pd + (index & (L2_SIZE - 1));
364 static inline PageDesc *page_find(tb_page_addr_t index)
366 return page_find_alloc(index, 0);
369 #if !defined(CONFIG_USER_ONLY)
371 static void phys_map_node_reserve(unsigned nodes)
373 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
374 typedef PhysPageEntry Node[L2_SIZE];
375 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
376 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
377 phys_map_nodes_nb + nodes);
378 phys_map_nodes = g_renew(Node, phys_map_nodes,
379 phys_map_nodes_nb_alloc);
383 static uint16_t phys_map_node_alloc(void)
388 ret = phys_map_nodes_nb++;
389 assert(ret != PHYS_MAP_NODE_NIL);
390 assert(ret != phys_map_nodes_nb_alloc);
391 for (i = 0; i < L2_SIZE; ++i) {
392 phys_map_nodes[ret][i].is_leaf = 0;
393 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
398 static void phys_map_nodes_reset(void)
400 phys_map_nodes_nb = 0;
404 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
405 hwaddr *nb, uint16_t leaf,
410 hwaddr step = (hwaddr)1 << (level * L2_BITS);
412 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
413 lp->ptr = phys_map_node_alloc();
414 p = phys_map_nodes[lp->ptr];
416 for (i = 0; i < L2_SIZE; i++) {
418 p[i].ptr = phys_section_unassigned;
422 p = phys_map_nodes[lp->ptr];
424 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
426 while (*nb && lp < &p[L2_SIZE]) {
427 if ((*index & (step - 1)) == 0 && *nb >= step) {
433 phys_page_set_level(lp, index, nb, leaf, level - 1);
439 static void phys_page_set(AddressSpaceDispatch *d,
440 hwaddr index, hwaddr nb,
443 /* Wildly overreserve - it doesn't matter much. */
444 phys_map_node_reserve(3 * P_L2_LEVELS);
446 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
449 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
451 PhysPageEntry lp = d->phys_map;
454 uint16_t s_index = phys_section_unassigned;
456 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
457 if (lp.ptr == PHYS_MAP_NODE_NIL) {
460 p = phys_map_nodes[lp.ptr];
461 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
466 return &phys_sections[s_index];
469 bool memory_region_is_unassigned(MemoryRegion *mr)
471 return mr != &io_mem_ram && mr != &io_mem_rom
472 && mr != &io_mem_notdirty && !mr->rom_device
473 && mr != &io_mem_watch;
476 #define mmap_lock() do { } while(0)
477 #define mmap_unlock() do { } while(0)
480 #if defined(CONFIG_USER_ONLY)
481 /* Currently it is not recommended to allocate big chunks of data in
482 user mode. It will change when a dedicated libc will be used. */
483 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
484 region in which the guest needs to run. Revisit this. */
485 #define USE_STATIC_CODE_GEN_BUFFER
488 /* ??? Should configure for this, not list operating systems here. */
489 #if (defined(__linux__) \
490 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
491 || defined(__DragonFly__) || defined(__OpenBSD__) \
492 || defined(__NetBSD__))
496 /* Minimum size of the code gen buffer. This number is randomly chosen,
497 but not so small that we can't have a fair number of TB's live. */
498 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
500 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
501 indicated, this is constrained by the range of direct branches on the
502 host cpu, as used by the TCG implementation of goto_tb. */
503 #if defined(__x86_64__)
504 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
505 #elif defined(__sparc__)
506 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
507 #elif defined(__arm__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
509 #elif defined(__s390x__)
510 /* We have a +- 4GB range on the branches; leave some slop. */
511 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
513 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
516 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
518 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
519 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
520 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
522 static inline size_t size_code_gen_buffer(size_t tb_size)
524 /* Size the buffer. */
526 #ifdef USE_STATIC_CODE_GEN_BUFFER
527 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
529 /* ??? Needs adjustments. */
530 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
531 static buffer, we could size this on RESERVED_VA, on the text
532 segment size of the executable, or continue to use the default. */
533 tb_size = (unsigned long)(ram_size / 4);
536 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
539 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
540 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
542 code_gen_buffer_size = tb_size;
546 #ifdef USE_STATIC_CODE_GEN_BUFFER
547 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
548 __attribute__((aligned(CODE_GEN_ALIGN)));
550 static inline void *alloc_code_gen_buffer(void)
552 map_exec(static_code_gen_buffer, code_gen_buffer_size);
553 return static_code_gen_buffer;
555 #elif defined(USE_MMAP)
556 static inline void *alloc_code_gen_buffer(void)
558 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
562 /* Constrain the position of the buffer based on the host cpu.
563 Note that these addresses are chosen in concert with the
564 addresses assigned in the relevant linker script file. */
565 # if defined(__PIE__) || defined(__PIC__)
566 /* Don't bother setting a preferred location if we're building
567 a position-independent executable. We're more likely to get
568 an address near the main executable if we let the kernel
569 choose the address. */
570 # elif defined(__x86_64__) && defined(MAP_32BIT)
571 /* Force the memory down into low memory with the executable.
572 Leave the choice of exact location with the kernel. */
574 /* Cannot expect to map more than 800MB in low memory. */
575 if (code_gen_buffer_size > 800u * 1024 * 1024) {
576 code_gen_buffer_size = 800u * 1024 * 1024;
578 # elif defined(__sparc__)
579 start = 0x40000000ul;
580 # elif defined(__s390x__)
581 start = 0x90000000ul;
584 buf = mmap((void *)start, code_gen_buffer_size,
585 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
586 return buf == MAP_FAILED ? NULL : buf;
589 static inline void *alloc_code_gen_buffer(void)
591 void *buf = g_malloc(code_gen_buffer_size);
593 map_exec(buf, code_gen_buffer_size);
597 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
599 static inline void code_gen_alloc(size_t tb_size)
601 code_gen_buffer_size = size_code_gen_buffer(tb_size);
602 code_gen_buffer = alloc_code_gen_buffer();
603 if (code_gen_buffer == NULL) {
604 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
608 /* Steal room for the prologue at the end of the buffer. This ensures
609 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
610 from TB's to the prologue are going to be in range. It also means
611 that we don't need to mark (additional) portions of the data segment
613 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
614 code_gen_buffer_size -= 1024;
616 code_gen_buffer_max_size = code_gen_buffer_size -
617 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
618 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
619 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
622 /* Must be called before using the QEMU cpus. 'tb_size' is the size
623 (in bytes) allocated to the translation buffer. Zero means default
625 void tcg_exec_init(unsigned long tb_size)
628 code_gen_alloc(tb_size);
629 code_gen_ptr = code_gen_buffer;
630 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
632 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
633 /* There's no guest base to take into account, so go ahead and
634 initialize the prologue now. */
635 tcg_prologue_init(&tcg_ctx);
639 bool tcg_enabled(void)
641 return code_gen_buffer != NULL;
644 void cpu_exec_init_all(void)
646 #if !defined(CONFIG_USER_ONLY)
652 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
654 static int cpu_common_post_load(void *opaque, int version_id)
656 CPUArchState *env = opaque;
658 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
659 version_id is increased. */
660 env->interrupt_request &= ~0x01;
666 static const VMStateDescription vmstate_cpu_common = {
667 .name = "cpu_common",
669 .minimum_version_id = 1,
670 .minimum_version_id_old = 1,
671 .post_load = cpu_common_post_load,
672 .fields = (VMStateField []) {
673 VMSTATE_UINT32(halted, CPUArchState),
674 VMSTATE_UINT32(interrupt_request, CPUArchState),
675 VMSTATE_END_OF_LIST()
680 CPUArchState *qemu_get_cpu(int cpu)
682 CPUArchState *env = first_cpu;
685 if (env->cpu_index == cpu)
693 void cpu_exec_init(CPUArchState *env)
698 #if defined(CONFIG_USER_ONLY)
701 env->next_cpu = NULL;
704 while (*penv != NULL) {
705 penv = &(*penv)->next_cpu;
708 env->cpu_index = cpu_index;
710 QTAILQ_INIT(&env->breakpoints);
711 QTAILQ_INIT(&env->watchpoints);
712 #ifndef CONFIG_USER_ONLY
713 env->thread_id = qemu_get_thread_id();
716 #if defined(CONFIG_USER_ONLY)
719 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
720 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
721 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
722 cpu_save, cpu_load, env);
726 /* Allocate a new translation block. Flush the translation buffer if
727 too many translation blocks or too much generated code. */
728 static TranslationBlock *tb_alloc(target_ulong pc)
730 TranslationBlock *tb;
732 if (nb_tbs >= code_gen_max_blocks ||
733 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
741 void tb_free(TranslationBlock *tb)
743 /* In practice this is mostly used for single use temporary TB
744 Ignore the hard cases and just back up if this TB happens to
745 be the last one generated. */
746 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
747 code_gen_ptr = tb->tc_ptr;
752 static inline void invalidate_page_bitmap(PageDesc *p)
754 if (p->code_bitmap) {
755 g_free(p->code_bitmap);
756 p->code_bitmap = NULL;
758 p->code_write_count = 0;
761 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763 static void page_flush_tb_1 (int level, void **lp)
772 for (i = 0; i < L2_SIZE; ++i) {
773 pd[i].first_tb = NULL;
774 invalidate_page_bitmap(pd + i);
778 for (i = 0; i < L2_SIZE; ++i) {
779 page_flush_tb_1 (level - 1, pp + i);
784 static void page_flush_tb(void)
787 for (i = 0; i < V_L1_SIZE; i++) {
788 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
792 /* flush all the translation blocks */
793 /* XXX: tb_flush is currently not thread safe */
794 void tb_flush(CPUArchState *env1)
797 #if defined(DEBUG_FLUSH)
798 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
799 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
804 cpu_abort(env1, "Internal error: code buffer overflow\n");
808 for(env = first_cpu; env != NULL; env = env->next_cpu) {
809 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
812 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
815 code_gen_ptr = code_gen_buffer;
816 /* XXX: flush processor icache at this point if cache flush is
821 #ifdef DEBUG_TB_CHECK
823 static void tb_invalidate_check(target_ulong address)
825 TranslationBlock *tb;
827 address &= TARGET_PAGE_MASK;
828 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
829 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
830 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
831 address >= tb->pc + tb->size)) {
832 printf("ERROR invalidate: address=" TARGET_FMT_lx
833 " PC=%08lx size=%04x\n",
834 address, (long)tb->pc, tb->size);
840 /* verify that all the pages have correct rights for code */
841 static void tb_page_check(void)
843 TranslationBlock *tb;
844 int i, flags1, flags2;
846 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
847 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
848 flags1 = page_get_flags(tb->pc);
849 flags2 = page_get_flags(tb->pc + tb->size - 1);
850 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
851 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
852 (long)tb->pc, tb->size, flags1, flags2);
860 /* invalidate one TB */
861 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
864 TranslationBlock *tb1;
868 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
871 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
875 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 TranslationBlock *tb1;
882 n1 = (uintptr_t)tb1 & 3;
883 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 *ptb = tb1->page_next[n1];
888 ptb = &tb1->page_next[n1];
892 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 TranslationBlock *tb1, **ptb;
897 ptb = &tb->jmp_next[n];
900 /* find tb(n) in circular list */
903 n1 = (uintptr_t)tb1 & 3;
904 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
905 if (n1 == n && tb1 == tb)
908 ptb = &tb1->jmp_first;
910 ptb = &tb1->jmp_next[n1];
913 /* now we can suppress tb(n) from the list */
914 *ptb = tb->jmp_next[n];
916 tb->jmp_next[n] = NULL;
920 /* reset the jump entry 'n' of a TB so that it is not chained to
922 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
927 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
932 tb_page_addr_t phys_pc;
933 TranslationBlock *tb1, *tb2;
935 /* remove the TB from the hash list */
936 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
937 h = tb_phys_hash_func(phys_pc);
938 tb_remove(&tb_phys_hash[h], tb,
939 offsetof(TranslationBlock, phys_hash_next));
941 /* remove the TB from the page list */
942 if (tb->page_addr[0] != page_addr) {
943 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
944 tb_page_remove(&p->first_tb, tb);
945 invalidate_page_bitmap(p);
947 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
948 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
953 tb_invalidated_flag = 1;
955 /* remove the TB from the hash list */
956 h = tb_jmp_cache_hash_func(tb->pc);
957 for(env = first_cpu; env != NULL; env = env->next_cpu) {
958 if (env->tb_jmp_cache[h] == tb)
959 env->tb_jmp_cache[h] = NULL;
962 /* suppress this TB from the two jump lists */
963 tb_jmp_remove(tb, 0);
964 tb_jmp_remove(tb, 1);
966 /* suppress any remaining jumps to this TB */
969 n1 = (uintptr_t)tb1 & 3;
972 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
973 tb2 = tb1->jmp_next[n1];
974 tb_reset_jump(tb1, n1);
975 tb1->jmp_next[n1] = NULL;
978 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980 tb_phys_invalidate_count++;
983 static inline void set_bits(uint8_t *tab, int start, int len)
989 mask = 0xff << (start & 7);
990 if ((start & ~7) == (end & ~7)) {
992 mask &= ~(0xff << (end & 7));
997 start = (start + 8) & ~7;
999 while (start < end1) {
1004 mask = ~(0xff << (end & 7));
1010 static void build_page_bitmap(PageDesc *p)
1012 int n, tb_start, tb_end;
1013 TranslationBlock *tb;
1015 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1018 while (tb != NULL) {
1019 n = (uintptr_t)tb & 3;
1020 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1021 /* NOTE: this is subtle as a TB may span two physical pages */
1023 /* NOTE: tb_end may be after the end of the page, but
1024 it is not a problem */
1025 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1026 tb_end = tb_start + tb->size;
1027 if (tb_end > TARGET_PAGE_SIZE)
1028 tb_end = TARGET_PAGE_SIZE;
1031 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1034 tb = tb->page_next[n];
1038 TranslationBlock *tb_gen_code(CPUArchState *env,
1039 target_ulong pc, target_ulong cs_base,
1040 int flags, int cflags)
1042 TranslationBlock *tb;
1044 tb_page_addr_t phys_pc, phys_page2;
1045 target_ulong virt_page2;
1048 phys_pc = get_page_addr_code(env, pc);
1051 /* flush must be done */
1053 /* cannot fail at this point */
1055 /* Don't forget to invalidate previous TB info. */
1056 tb_invalidated_flag = 1;
1058 tc_ptr = code_gen_ptr;
1059 tb->tc_ptr = tc_ptr;
1060 tb->cs_base = cs_base;
1062 tb->cflags = cflags;
1063 cpu_gen_code(env, tb, &code_gen_size);
1064 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1065 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067 /* check next page if needed */
1068 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1071 phys_page2 = get_page_addr_code(env, virt_page2);
1073 tb_link_page(tb, phys_pc, phys_page2);
1078 * Invalidate all TBs which intersect with the target physical address range
1079 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1080 * 'is_cpu_write_access' should be true if called from a real cpu write
1081 * access: the virtual CPU will exit the current TB if code is modified inside
1084 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1085 int is_cpu_write_access)
1087 while (start < end) {
1088 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1089 start &= TARGET_PAGE_MASK;
1090 start += TARGET_PAGE_SIZE;
1095 * Invalidate all TBs which intersect with the target physical address range
1096 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1097 * 'is_cpu_write_access' should be true if called from a real cpu write
1098 * access: the virtual CPU will exit the current TB if code is modified inside
1101 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1102 int is_cpu_write_access)
1104 TranslationBlock *tb, *tb_next, *saved_tb;
1105 CPUArchState *env = cpu_single_env;
1106 tb_page_addr_t tb_start, tb_end;
1109 #ifdef TARGET_HAS_PRECISE_SMC
1110 int current_tb_not_found = is_cpu_write_access;
1111 TranslationBlock *current_tb = NULL;
1112 int current_tb_modified = 0;
1113 target_ulong current_pc = 0;
1114 target_ulong current_cs_base = 0;
1115 int current_flags = 0;
1116 #endif /* TARGET_HAS_PRECISE_SMC */
1118 p = page_find(start >> TARGET_PAGE_BITS);
1121 if (!p->code_bitmap &&
1122 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1123 is_cpu_write_access) {
1124 /* build code bitmap */
1125 build_page_bitmap(p);
1128 /* we remove all the TBs in the range [start, end[ */
1129 /* XXX: see if in some cases it could be faster to invalidate all the code */
1131 while (tb != NULL) {
1132 n = (uintptr_t)tb & 3;
1133 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1134 tb_next = tb->page_next[n];
1135 /* NOTE: this is subtle as a TB may span two physical pages */
1137 /* NOTE: tb_end may be after the end of the page, but
1138 it is not a problem */
1139 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1140 tb_end = tb_start + tb->size;
1142 tb_start = tb->page_addr[1];
1143 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1145 if (!(tb_end <= start || tb_start >= end)) {
1146 #ifdef TARGET_HAS_PRECISE_SMC
1147 if (current_tb_not_found) {
1148 current_tb_not_found = 0;
1150 if (env->mem_io_pc) {
1151 /* now we have a real cpu fault */
1152 current_tb = tb_find_pc(env->mem_io_pc);
1155 if (current_tb == tb &&
1156 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1157 /* If we are modifying the current TB, we must stop
1158 its execution. We could be more precise by checking
1159 that the modification is after the current PC, but it
1160 would require a specialized function to partially
1161 restore the CPU state */
1163 current_tb_modified = 1;
1164 cpu_restore_state(current_tb, env, env->mem_io_pc);
1165 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1168 #endif /* TARGET_HAS_PRECISE_SMC */
1169 /* we need to do that to handle the case where a signal
1170 occurs while doing tb_phys_invalidate() */
1173 saved_tb = env->current_tb;
1174 env->current_tb = NULL;
1176 tb_phys_invalidate(tb, -1);
1178 env->current_tb = saved_tb;
1179 if (env->interrupt_request && env->current_tb)
1180 cpu_interrupt(env, env->interrupt_request);
1185 #if !defined(CONFIG_USER_ONLY)
1186 /* if no code remaining, no need to continue to use slow writes */
1188 invalidate_page_bitmap(p);
1189 if (is_cpu_write_access) {
1190 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1194 #ifdef TARGET_HAS_PRECISE_SMC
1195 if (current_tb_modified) {
1196 /* we generate a block containing just the instruction
1197 modifying the memory. It will ensure that it cannot modify
1199 env->current_tb = NULL;
1200 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1201 cpu_resume_from_signal(env, NULL);
1206 /* len must be <= 8 and start must be a multiple of len */
1207 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1213 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1214 cpu_single_env->mem_io_vaddr, len,
1215 cpu_single_env->eip,
1216 cpu_single_env->eip +
1217 (intptr_t)cpu_single_env->segs[R_CS].base);
1220 p = page_find(start >> TARGET_PAGE_BITS);
1223 if (p->code_bitmap) {
1224 offset = start & ~TARGET_PAGE_MASK;
1225 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1226 if (b & ((1 << len) - 1))
1230 tb_invalidate_phys_page_range(start, start + len, 1);
1234 #if !defined(CONFIG_SOFTMMU)
1235 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1236 uintptr_t pc, void *puc)
1238 TranslationBlock *tb;
1241 #ifdef TARGET_HAS_PRECISE_SMC
1242 TranslationBlock *current_tb = NULL;
1243 CPUArchState *env = cpu_single_env;
1244 int current_tb_modified = 0;
1245 target_ulong current_pc = 0;
1246 target_ulong current_cs_base = 0;
1247 int current_flags = 0;
1250 addr &= TARGET_PAGE_MASK;
1251 p = page_find(addr >> TARGET_PAGE_BITS);
1255 #ifdef TARGET_HAS_PRECISE_SMC
1256 if (tb && pc != 0) {
1257 current_tb = tb_find_pc(pc);
1260 while (tb != NULL) {
1261 n = (uintptr_t)tb & 3;
1262 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1263 #ifdef TARGET_HAS_PRECISE_SMC
1264 if (current_tb == tb &&
1265 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1266 /* If we are modifying the current TB, we must stop
1267 its execution. We could be more precise by checking
1268 that the modification is after the current PC, but it
1269 would require a specialized function to partially
1270 restore the CPU state */
1272 current_tb_modified = 1;
1273 cpu_restore_state(current_tb, env, pc);
1274 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1277 #endif /* TARGET_HAS_PRECISE_SMC */
1278 tb_phys_invalidate(tb, addr);
1279 tb = tb->page_next[n];
1282 #ifdef TARGET_HAS_PRECISE_SMC
1283 if (current_tb_modified) {
1284 /* we generate a block containing just the instruction
1285 modifying the memory. It will ensure that it cannot modify
1287 env->current_tb = NULL;
1288 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1289 cpu_resume_from_signal(env, puc);
1295 /* add the tb in the target page and protect it if necessary */
1296 static inline void tb_alloc_page(TranslationBlock *tb,
1297 unsigned int n, tb_page_addr_t page_addr)
1300 #ifndef CONFIG_USER_ONLY
1301 bool page_already_protected;
1304 tb->page_addr[n] = page_addr;
1305 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1306 tb->page_next[n] = p->first_tb;
1307 #ifndef CONFIG_USER_ONLY
1308 page_already_protected = p->first_tb != NULL;
1310 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1311 invalidate_page_bitmap(p);
1313 #if defined(TARGET_HAS_SMC) || 1
1315 #if defined(CONFIG_USER_ONLY)
1316 if (p->flags & PAGE_WRITE) {
1321 /* force the host page as non writable (writes will have a
1322 page fault + mprotect overhead) */
1323 page_addr &= qemu_host_page_mask;
1325 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1326 addr += TARGET_PAGE_SIZE) {
1328 p2 = page_find (addr >> TARGET_PAGE_BITS);
1332 p2->flags &= ~PAGE_WRITE;
1334 mprotect(g2h(page_addr), qemu_host_page_size,
1335 (prot & PAGE_BITS) & ~PAGE_WRITE);
1336 #ifdef DEBUG_TB_INVALIDATE
1337 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1342 /* if some code is already present, then the pages are already
1343 protected. So we handle the case where only the first TB is
1344 allocated in a physical page */
1345 if (!page_already_protected) {
1346 tlb_protect_code(page_addr);
1350 #endif /* TARGET_HAS_SMC */
1353 /* add a new TB and link it to the physical page tables. phys_page2 is
1354 (-1) to indicate that only one page contains the TB. */
1355 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1356 tb_page_addr_t phys_page2)
1359 TranslationBlock **ptb;
1361 /* Grab the mmap lock to stop another thread invalidating this TB
1362 before we are done. */
1364 /* add in the physical hash table */
1365 h = tb_phys_hash_func(phys_pc);
1366 ptb = &tb_phys_hash[h];
1367 tb->phys_hash_next = *ptb;
1370 /* add in the page list */
1371 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1372 if (phys_page2 != -1)
1373 tb_alloc_page(tb, 1, phys_page2);
1375 tb->page_addr[1] = -1;
1377 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1378 tb->jmp_next[0] = NULL;
1379 tb->jmp_next[1] = NULL;
1381 /* init original jump addresses */
1382 if (tb->tb_next_offset[0] != 0xffff)
1383 tb_reset_jump(tb, 0);
1384 if (tb->tb_next_offset[1] != 0xffff)
1385 tb_reset_jump(tb, 1);
1387 #ifdef DEBUG_TB_CHECK
1393 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1394 tb[1].tc_ptr. Return NULL if not found */
1395 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1397 int m_min, m_max, m;
1399 TranslationBlock *tb;
1403 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1404 tc_ptr >= (uintptr_t)code_gen_ptr) {
1407 /* binary search (cf Knuth) */
1410 while (m_min <= m_max) {
1411 m = (m_min + m_max) >> 1;
1413 v = (uintptr_t)tb->tc_ptr;
1416 else if (tc_ptr < v) {
1425 static void tb_reset_jump_recursive(TranslationBlock *tb);
1427 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1429 TranslationBlock *tb1, *tb_next, **ptb;
1432 tb1 = tb->jmp_next[n];
1434 /* find head of list */
1436 n1 = (uintptr_t)tb1 & 3;
1437 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1440 tb1 = tb1->jmp_next[n1];
1442 /* we are now sure now that tb jumps to tb1 */
1445 /* remove tb from the jmp_first list */
1446 ptb = &tb_next->jmp_first;
1449 n1 = (uintptr_t)tb1 & 3;
1450 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1451 if (n1 == n && tb1 == tb)
1453 ptb = &tb1->jmp_next[n1];
1455 *ptb = tb->jmp_next[n];
1456 tb->jmp_next[n] = NULL;
1458 /* suppress the jump to next tb in generated code */
1459 tb_reset_jump(tb, n);
1461 /* suppress jumps in the tb on which we could have jumped */
1462 tb_reset_jump_recursive(tb_next);
1466 static void tb_reset_jump_recursive(TranslationBlock *tb)
1468 tb_reset_jump_recursive2(tb, 0);
1469 tb_reset_jump_recursive2(tb, 1);
1472 #if defined(TARGET_HAS_ICE)
1473 #if defined(CONFIG_USER_ONLY)
1474 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1476 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1479 void tb_invalidate_phys_addr(hwaddr addr)
1481 ram_addr_t ram_addr;
1482 MemoryRegionSection *section;
1484 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1485 if (!(memory_region_is_ram(section->mr)
1486 || (section->mr->rom_device && section->mr->readable))) {
1489 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1490 + memory_region_section_addr(section, addr);
1491 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1494 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1496 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1497 (pc & ~TARGET_PAGE_MASK));
1500 #endif /* TARGET_HAS_ICE */
1502 #if defined(CONFIG_USER_ONLY)
1503 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1508 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1509 int flags, CPUWatchpoint **watchpoint)
1514 /* Add a watchpoint. */
1515 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1516 int flags, CPUWatchpoint **watchpoint)
1518 target_ulong len_mask = ~(len - 1);
1521 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1522 if ((len & (len - 1)) || (addr & ~len_mask) ||
1523 len == 0 || len > TARGET_PAGE_SIZE) {
1524 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1525 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1528 wp = g_malloc(sizeof(*wp));
1531 wp->len_mask = len_mask;
1534 /* keep all GDB-injected watchpoints in front */
1536 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1538 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1540 tlb_flush_page(env, addr);
1547 /* Remove a specific watchpoint. */
1548 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1551 target_ulong len_mask = ~(len - 1);
1554 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1555 if (addr == wp->vaddr && len_mask == wp->len_mask
1556 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1557 cpu_watchpoint_remove_by_ref(env, wp);
1564 /* Remove a specific watchpoint by reference. */
1565 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1567 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1569 tlb_flush_page(env, watchpoint->vaddr);
1574 /* Remove all matching watchpoints. */
1575 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1577 CPUWatchpoint *wp, *next;
1579 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1580 if (wp->flags & mask)
1581 cpu_watchpoint_remove_by_ref(env, wp);
1586 /* Add a breakpoint. */
1587 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1588 CPUBreakpoint **breakpoint)
1590 #if defined(TARGET_HAS_ICE)
1593 bp = g_malloc(sizeof(*bp));
1598 /* keep all GDB-injected breakpoints in front */
1600 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1602 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1604 breakpoint_invalidate(env, pc);
1614 /* Remove a specific breakpoint. */
1615 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1617 #if defined(TARGET_HAS_ICE)
1620 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1621 if (bp->pc == pc && bp->flags == flags) {
1622 cpu_breakpoint_remove_by_ref(env, bp);
1632 /* Remove a specific breakpoint by reference. */
1633 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1635 #if defined(TARGET_HAS_ICE)
1636 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1638 breakpoint_invalidate(env, breakpoint->pc);
1644 /* Remove all matching breakpoints. */
1645 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1647 #if defined(TARGET_HAS_ICE)
1648 CPUBreakpoint *bp, *next;
1650 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1651 if (bp->flags & mask)
1652 cpu_breakpoint_remove_by_ref(env, bp);
1657 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1658 CPU loop after each instruction */
1659 void cpu_single_step(CPUArchState *env, int enabled)
1661 #if defined(TARGET_HAS_ICE)
1662 if (env->singlestep_enabled != enabled) {
1663 env->singlestep_enabled = enabled;
1665 kvm_update_guest_debug(env, 0);
1667 /* must flush all the translated code to avoid inconsistencies */
1668 /* XXX: only flush what is necessary */
1675 static void cpu_unlink_tb(CPUArchState *env)
1677 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1678 problem and hope the cpu will stop of its own accord. For userspace
1679 emulation this often isn't actually as bad as it sounds. Often
1680 signals are used primarily to interrupt blocking syscalls. */
1681 TranslationBlock *tb;
1682 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1684 spin_lock(&interrupt_lock);
1685 tb = env->current_tb;
1686 /* if the cpu is currently executing code, we must unlink it and
1687 all the potentially executing TB */
1689 env->current_tb = NULL;
1690 tb_reset_jump_recursive(tb);
1692 spin_unlock(&interrupt_lock);
1695 #ifndef CONFIG_USER_ONLY
1696 /* mask must never be zero, except for A20 change call */
1697 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1701 old_mask = env->interrupt_request;
1702 env->interrupt_request |= mask;
1705 * If called from iothread context, wake the target cpu in
1708 if (!qemu_cpu_is_self(env)) {
1714 env->icount_decr.u16.high = 0xffff;
1716 && (mask & ~old_mask) != 0) {
1717 cpu_abort(env, "Raised interrupt while not in I/O function");
1724 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1726 #else /* CONFIG_USER_ONLY */
1728 void cpu_interrupt(CPUArchState *env, int mask)
1730 env->interrupt_request |= mask;
1733 #endif /* CONFIG_USER_ONLY */
1735 void cpu_reset_interrupt(CPUArchState *env, int mask)
1737 env->interrupt_request &= ~mask;
1740 void cpu_exit(CPUArchState *env)
1742 env->exit_request = 1;
1746 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1753 fprintf(stderr, "qemu: fatal: ");
1754 vfprintf(stderr, fmt, ap);
1755 fprintf(stderr, "\n");
1756 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1757 if (qemu_log_enabled()) {
1758 qemu_log("qemu: fatal: ");
1759 qemu_log_vprintf(fmt, ap2);
1761 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1767 #if defined(CONFIG_USER_ONLY)
1769 struct sigaction act;
1770 sigfillset(&act.sa_mask);
1771 act.sa_handler = SIG_DFL;
1772 sigaction(SIGABRT, &act, NULL);
1778 CPUArchState *cpu_copy(CPUArchState *env)
1780 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1781 CPUArchState *next_cpu = new_env->next_cpu;
1782 int cpu_index = new_env->cpu_index;
1783 #if defined(TARGET_HAS_ICE)
1788 memcpy(new_env, env, sizeof(CPUArchState));
1790 /* Preserve chaining and index. */
1791 new_env->next_cpu = next_cpu;
1792 new_env->cpu_index = cpu_index;
1794 /* Clone all break/watchpoints.
1795 Note: Once we support ptrace with hw-debug register access, make sure
1796 BP_CPU break/watchpoints are handled correctly on clone. */
1797 QTAILQ_INIT(&env->breakpoints);
1798 QTAILQ_INIT(&env->watchpoints);
1799 #if defined(TARGET_HAS_ICE)
1800 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1801 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1803 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1804 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1812 #if !defined(CONFIG_USER_ONLY)
1813 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1817 /* Discard jump cache entries for any tb which might potentially
1818 overlap the flushed page. */
1819 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1820 memset (&env->tb_jmp_cache[i], 0,
1821 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1823 i = tb_jmp_cache_hash_page(addr);
1824 memset (&env->tb_jmp_cache[i], 0,
1825 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1828 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1833 /* we modify the TLB cache so that the dirty bit will be set again
1834 when accessing the range */
1835 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1836 /* Check that we don't span multiple blocks - this breaks the
1837 address comparisons below. */
1838 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1839 != (end - 1) - start) {
1842 cpu_tlb_reset_dirty_all(start1, length);
1846 /* Note: start and end must be within the same ram block. */
1847 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1852 start &= TARGET_PAGE_MASK;
1853 end = TARGET_PAGE_ALIGN(end);
1855 length = end - start;
1858 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1860 if (tcg_enabled()) {
1861 tlb_reset_dirty_range_all(start, end, length);
1865 static int cpu_physical_memory_set_dirty_tracking(int enable)
1868 in_migration = enable;
1872 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1873 MemoryRegionSection *section,
1877 target_ulong *address)
1882 if (memory_region_is_ram(section->mr)) {
1884 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1885 + memory_region_section_addr(section, paddr);
1886 if (!section->readonly) {
1887 iotlb |= phys_section_notdirty;
1889 iotlb |= phys_section_rom;
1892 /* IO handlers are currently passed a physical address.
1893 It would be nice to pass an offset from the base address
1894 of that region. This would avoid having to special case RAM,
1895 and avoid full address decoding in every device.
1896 We can't use the high bits of pd for this because
1897 IO_MEM_ROMD uses these as a ram address. */
1898 iotlb = section - phys_sections;
1899 iotlb += memory_region_section_addr(section, paddr);
1902 /* Make accesses to pages with watchpoints go via the
1903 watchpoint trap routines. */
1904 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1905 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1906 /* Avoid trapping reads of pages with a write breakpoint. */
1907 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1908 iotlb = phys_section_watch + paddr;
1909 *address |= TLB_MMIO;
1920 * Walks guest process memory "regions" one by one
1921 * and calls callback function 'fn' for each region.
1924 struct walk_memory_regions_data
1926 walk_memory_regions_fn fn;
1932 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1933 abi_ulong end, int new_prot)
1935 if (data->start != -1ul) {
1936 int rc = data->fn(data->priv, data->start, end, data->prot);
1942 data->start = (new_prot ? end : -1ul);
1943 data->prot = new_prot;
1948 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1949 abi_ulong base, int level, void **lp)
1955 return walk_memory_regions_end(data, base, 0);
1960 for (i = 0; i < L2_SIZE; ++i) {
1961 int prot = pd[i].flags;
1963 pa = base | (i << TARGET_PAGE_BITS);
1964 if (prot != data->prot) {
1965 rc = walk_memory_regions_end(data, pa, prot);
1973 for (i = 0; i < L2_SIZE; ++i) {
1974 pa = base | ((abi_ulong)i <<
1975 (TARGET_PAGE_BITS + L2_BITS * level));
1976 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1986 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1988 struct walk_memory_regions_data data;
1996 for (i = 0; i < V_L1_SIZE; i++) {
1997 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1998 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2004 return walk_memory_regions_end(&data, 0, 0);
2007 static int dump_region(void *priv, abi_ulong start,
2008 abi_ulong end, unsigned long prot)
2010 FILE *f = (FILE *)priv;
2012 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2013 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2014 start, end, end - start,
2015 ((prot & PAGE_READ) ? 'r' : '-'),
2016 ((prot & PAGE_WRITE) ? 'w' : '-'),
2017 ((prot & PAGE_EXEC) ? 'x' : '-'));
2022 /* dump memory mappings */
2023 void page_dump(FILE *f)
2025 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2026 "start", "end", "size", "prot");
2027 walk_memory_regions(f, dump_region);
2030 int page_get_flags(target_ulong address)
2034 p = page_find(address >> TARGET_PAGE_BITS);
2040 /* Modify the flags of a page and invalidate the code if necessary.
2041 The flag PAGE_WRITE_ORG is positioned automatically depending
2042 on PAGE_WRITE. The mmap_lock should already be held. */
2043 void page_set_flags(target_ulong start, target_ulong end, int flags)
2045 target_ulong addr, len;
2047 /* This function should never be called with addresses outside the
2048 guest address space. If this assert fires, it probably indicates
2049 a missing call to h2g_valid. */
2050 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2051 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2053 assert(start < end);
2055 start = start & TARGET_PAGE_MASK;
2056 end = TARGET_PAGE_ALIGN(end);
2058 if (flags & PAGE_WRITE) {
2059 flags |= PAGE_WRITE_ORG;
2062 for (addr = start, len = end - start;
2064 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2065 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2067 /* If the write protection bit is set, then we invalidate
2069 if (!(p->flags & PAGE_WRITE) &&
2070 (flags & PAGE_WRITE) &&
2072 tb_invalidate_phys_page(addr, 0, NULL);
2078 int page_check_range(target_ulong start, target_ulong len, int flags)
2084 /* This function should never be called with addresses outside the
2085 guest address space. If this assert fires, it probably indicates
2086 a missing call to h2g_valid. */
2087 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2088 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2094 if (start + len - 1 < start) {
2095 /* We've wrapped around. */
2099 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2100 start = start & TARGET_PAGE_MASK;
2102 for (addr = start, len = end - start;
2104 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2105 p = page_find(addr >> TARGET_PAGE_BITS);
2108 if( !(p->flags & PAGE_VALID) )
2111 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2113 if (flags & PAGE_WRITE) {
2114 if (!(p->flags & PAGE_WRITE_ORG))
2116 /* unprotect the page if it was put read-only because it
2117 contains translated code */
2118 if (!(p->flags & PAGE_WRITE)) {
2119 if (!page_unprotect(addr, 0, NULL))
2128 /* called from signal handler: invalidate the code and unprotect the
2129 page. Return TRUE if the fault was successfully handled. */
2130 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2134 target_ulong host_start, host_end, addr;
2136 /* Technically this isn't safe inside a signal handler. However we
2137 know this only ever happens in a synchronous SEGV handler, so in
2138 practice it seems to be ok. */
2141 p = page_find(address >> TARGET_PAGE_BITS);
2147 /* if the page was really writable, then we change its
2148 protection back to writable */
2149 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2150 host_start = address & qemu_host_page_mask;
2151 host_end = host_start + qemu_host_page_size;
2154 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2155 p = page_find(addr >> TARGET_PAGE_BITS);
2156 p->flags |= PAGE_WRITE;
2159 /* and since the content will be modified, we must invalidate
2160 the corresponding translated code. */
2161 tb_invalidate_phys_page(addr, pc, puc);
2162 #ifdef DEBUG_TB_CHECK
2163 tb_invalidate_check(addr);
2166 mprotect((void *)g2h(host_start), qemu_host_page_size,
2175 #endif /* defined(CONFIG_USER_ONLY) */
2177 #if !defined(CONFIG_USER_ONLY)
2179 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2180 typedef struct subpage_t {
2183 uint16_t sub_section[TARGET_PAGE_SIZE];
2186 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2188 static subpage_t *subpage_init(hwaddr base);
2189 static void destroy_page_desc(uint16_t section_index)
2191 MemoryRegionSection *section = &phys_sections[section_index];
2192 MemoryRegion *mr = section->mr;
2195 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2196 memory_region_destroy(&subpage->iomem);
2201 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2206 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2210 p = phys_map_nodes[lp->ptr];
2211 for (i = 0; i < L2_SIZE; ++i) {
2212 if (!p[i].is_leaf) {
2213 destroy_l2_mapping(&p[i], level - 1);
2215 destroy_page_desc(p[i].ptr);
2219 lp->ptr = PHYS_MAP_NODE_NIL;
2222 static void destroy_all_mappings(AddressSpaceDispatch *d)
2224 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2225 phys_map_nodes_reset();
2228 static uint16_t phys_section_add(MemoryRegionSection *section)
2230 if (phys_sections_nb == phys_sections_nb_alloc) {
2231 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2232 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2233 phys_sections_nb_alloc);
2235 phys_sections[phys_sections_nb] = *section;
2236 return phys_sections_nb++;
2239 static void phys_sections_clear(void)
2241 phys_sections_nb = 0;
2244 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2247 hwaddr base = section->offset_within_address_space
2249 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2250 MemoryRegionSection subsection = {
2251 .offset_within_address_space = base,
2252 .size = TARGET_PAGE_SIZE,
2256 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2258 if (!(existing->mr->subpage)) {
2259 subpage = subpage_init(base);
2260 subsection.mr = &subpage->iomem;
2261 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2262 phys_section_add(&subsection));
2264 subpage = container_of(existing->mr, subpage_t, iomem);
2266 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2267 end = start + section->size - 1;
2268 subpage_register(subpage, start, end, phys_section_add(section));
2272 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2274 hwaddr start_addr = section->offset_within_address_space;
2275 ram_addr_t size = section->size;
2277 uint16_t section_index = phys_section_add(section);
2282 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2286 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2288 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2289 MemoryRegionSection now = *section, remain = *section;
2291 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2292 || (now.size < TARGET_PAGE_SIZE)) {
2293 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2294 - now.offset_within_address_space,
2296 register_subpage(d, &now);
2297 remain.size -= now.size;
2298 remain.offset_within_address_space += now.size;
2299 remain.offset_within_region += now.size;
2301 while (remain.size >= TARGET_PAGE_SIZE) {
2303 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2304 now.size = TARGET_PAGE_SIZE;
2305 register_subpage(d, &now);
2307 now.size &= TARGET_PAGE_MASK;
2308 register_multipage(d, &now);
2310 remain.size -= now.size;
2311 remain.offset_within_address_space += now.size;
2312 remain.offset_within_region += now.size;
2316 register_subpage(d, &now);
2320 void qemu_flush_coalesced_mmio_buffer(void)
2323 kvm_flush_coalesced_mmio_buffer();
2326 #if defined(__linux__) && !defined(TARGET_S390X)
2328 #include <sys/vfs.h>
2330 #define HUGETLBFS_MAGIC 0x958458f6
2332 static long gethugepagesize(const char *path)
2338 ret = statfs(path, &fs);
2339 } while (ret != 0 && errno == EINTR);
2346 if (fs.f_type != HUGETLBFS_MAGIC)
2347 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2352 static void *file_ram_alloc(RAMBlock *block,
2362 unsigned long hpagesize;
2364 hpagesize = gethugepagesize(path);
2369 if (memory < hpagesize) {
2373 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2374 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2378 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2382 fd = mkstemp(filename);
2384 perror("unable to create backing store for hugepages");
2391 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2394 * ftruncate is not supported by hugetlbfs in older
2395 * hosts, so don't bother bailing out on errors.
2396 * If anything goes wrong with it under other filesystems,
2399 if (ftruncate(fd, memory))
2400 perror("ftruncate");
2403 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2404 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2405 * to sidestep this quirk.
2407 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2408 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2410 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2412 if (area == MAP_FAILED) {
2413 perror("file_ram_alloc: can't mmap RAM pages");
2422 static ram_addr_t find_ram_offset(ram_addr_t size)
2424 RAMBlock *block, *next_block;
2425 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2427 if (QLIST_EMPTY(&ram_list.blocks))
2430 QLIST_FOREACH(block, &ram_list.blocks, next) {
2431 ram_addr_t end, next = RAM_ADDR_MAX;
2433 end = block->offset + block->length;
2435 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2436 if (next_block->offset >= end) {
2437 next = MIN(next, next_block->offset);
2440 if (next - end >= size && next - end < mingap) {
2442 mingap = next - end;
2446 if (offset == RAM_ADDR_MAX) {
2447 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2455 ram_addr_t last_ram_offset(void)
2458 ram_addr_t last = 0;
2460 QLIST_FOREACH(block, &ram_list.blocks, next)
2461 last = MAX(last, block->offset + block->length);
2466 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2469 QemuOpts *machine_opts;
2471 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2472 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2474 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2475 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2477 perror("qemu_madvise");
2478 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2479 "but dump_guest_core=off specified\n");
2484 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2486 RAMBlock *new_block, *block;
2489 QLIST_FOREACH(block, &ram_list.blocks, next) {
2490 if (block->offset == addr) {
2496 assert(!new_block->idstr[0]);
2499 char *id = qdev_get_dev_path(dev);
2501 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2505 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2507 QLIST_FOREACH(block, &ram_list.blocks, next) {
2508 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2509 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2516 static int memory_try_enable_merging(void *addr, size_t len)
2520 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2521 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2522 /* disabled by the user */
2526 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2529 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2532 RAMBlock *new_block;
2534 size = TARGET_PAGE_ALIGN(size);
2535 new_block = g_malloc0(sizeof(*new_block));
2538 new_block->offset = find_ram_offset(size);
2540 new_block->host = host;
2541 new_block->flags |= RAM_PREALLOC_MASK;
2544 #if defined (__linux__) && !defined(TARGET_S390X)
2545 new_block->host = file_ram_alloc(new_block, size, mem_path);
2546 if (!new_block->host) {
2547 new_block->host = qemu_vmalloc(size);
2548 memory_try_enable_merging(new_block->host, size);
2551 fprintf(stderr, "-mem-path option unsupported\n");
2555 if (xen_enabled()) {
2556 xen_ram_alloc(new_block->offset, size, mr);
2557 } else if (kvm_enabled()) {
2558 /* some s390/kvm configurations have special constraints */
2559 new_block->host = kvm_vmalloc(size);
2561 new_block->host = qemu_vmalloc(size);
2563 memory_try_enable_merging(new_block->host, size);
2566 new_block->length = size;
2568 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2570 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2571 last_ram_offset() >> TARGET_PAGE_BITS);
2572 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2573 0, size >> TARGET_PAGE_BITS);
2574 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2576 qemu_ram_setup_dump(new_block->host, size);
2577 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2580 kvm_setup_guest_memory(new_block->host, size);
2582 return new_block->offset;
2585 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2587 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2590 void qemu_ram_free_from_ptr(ram_addr_t addr)
2594 QLIST_FOREACH(block, &ram_list.blocks, next) {
2595 if (addr == block->offset) {
2596 QLIST_REMOVE(block, next);
2603 void qemu_ram_free(ram_addr_t addr)
2607 QLIST_FOREACH(block, &ram_list.blocks, next) {
2608 if (addr == block->offset) {
2609 QLIST_REMOVE(block, next);
2610 if (block->flags & RAM_PREALLOC_MASK) {
2612 } else if (mem_path) {
2613 #if defined (__linux__) && !defined(TARGET_S390X)
2615 munmap(block->host, block->length);
2618 qemu_vfree(block->host);
2624 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2625 munmap(block->host, block->length);
2627 if (xen_enabled()) {
2628 xen_invalidate_map_cache_entry(block->host);
2630 qemu_vfree(block->host);
2642 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2649 QLIST_FOREACH(block, &ram_list.blocks, next) {
2650 offset = addr - block->offset;
2651 if (offset < block->length) {
2652 vaddr = block->host + offset;
2653 if (block->flags & RAM_PREALLOC_MASK) {
2657 munmap(vaddr, length);
2659 #if defined(__linux__) && !defined(TARGET_S390X)
2662 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2665 flags |= MAP_PRIVATE;
2667 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2668 flags, block->fd, offset);
2670 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2671 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2678 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2679 flags |= MAP_SHARED | MAP_ANONYMOUS;
2680 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2683 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2684 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2688 if (area != vaddr) {
2689 fprintf(stderr, "Could not remap addr: "
2690 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2694 memory_try_enable_merging(vaddr, length);
2695 qemu_ram_setup_dump(vaddr, length);
2701 #endif /* !_WIN32 */
2703 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2704 With the exception of the softmmu code in this file, this should
2705 only be used for local memory (e.g. video ram) that the device owns,
2706 and knows it isn't going to access beyond the end of the block.
2708 It should not be used for general purpose DMA.
2709 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2711 void *qemu_get_ram_ptr(ram_addr_t addr)
2715 QLIST_FOREACH(block, &ram_list.blocks, next) {
2716 if (addr - block->offset < block->length) {
2717 /* Move this entry to to start of the list. */
2718 if (block != QLIST_FIRST(&ram_list.blocks)) {
2719 QLIST_REMOVE(block, next);
2720 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2722 if (xen_enabled()) {
2723 /* We need to check if the requested address is in the RAM
2724 * because we don't want to map the entire memory in QEMU.
2725 * In that case just map until the end of the page.
2727 if (block->offset == 0) {
2728 return xen_map_cache(addr, 0, 0);
2729 } else if (block->host == NULL) {
2731 xen_map_cache(block->offset, block->length, 1);
2734 return block->host + (addr - block->offset);
2738 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2744 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2745 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2747 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2751 QLIST_FOREACH(block, &ram_list.blocks, next) {
2752 if (addr - block->offset < block->length) {
2753 if (xen_enabled()) {
2754 /* We need to check if the requested address is in the RAM
2755 * because we don't want to map the entire memory in QEMU.
2756 * In that case just map until the end of the page.
2758 if (block->offset == 0) {
2759 return xen_map_cache(addr, 0, 0);
2760 } else if (block->host == NULL) {
2762 xen_map_cache(block->offset, block->length, 1);
2765 return block->host + (addr - block->offset);
2769 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2775 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2776 * but takes a size argument */
2777 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2782 if (xen_enabled()) {
2783 return xen_map_cache(addr, *size, 1);
2787 QLIST_FOREACH(block, &ram_list.blocks, next) {
2788 if (addr - block->offset < block->length) {
2789 if (addr - block->offset + *size > block->length)
2790 *size = block->length - addr + block->offset;
2791 return block->host + (addr - block->offset);
2795 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2800 void qemu_put_ram_ptr(void *addr)
2802 trace_qemu_put_ram_ptr(addr);
2805 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2808 uint8_t *host = ptr;
2810 if (xen_enabled()) {
2811 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2815 QLIST_FOREACH(block, &ram_list.blocks, next) {
2816 /* This case append when the block is not mapped. */
2817 if (block->host == NULL) {
2820 if (host - block->host < block->length) {
2821 *ram_addr = block->offset + (host - block->host);
2829 /* Some of the softmmu routines need to translate from a host pointer
2830 (typically a TLB entry) back to a ram offset. */
2831 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2833 ram_addr_t ram_addr;
2835 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2836 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2842 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2845 #ifdef DEBUG_UNASSIGNED
2846 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2848 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2849 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2854 static void unassigned_mem_write(void *opaque, hwaddr addr,
2855 uint64_t val, unsigned size)
2857 #ifdef DEBUG_UNASSIGNED
2858 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2860 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2861 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2865 static const MemoryRegionOps unassigned_mem_ops = {
2866 .read = unassigned_mem_read,
2867 .write = unassigned_mem_write,
2868 .endianness = DEVICE_NATIVE_ENDIAN,
2871 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2877 static void error_mem_write(void *opaque, hwaddr addr,
2878 uint64_t value, unsigned size)
2883 static const MemoryRegionOps error_mem_ops = {
2884 .read = error_mem_read,
2885 .write = error_mem_write,
2886 .endianness = DEVICE_NATIVE_ENDIAN,
2889 static const MemoryRegionOps rom_mem_ops = {
2890 .read = error_mem_read,
2891 .write = unassigned_mem_write,
2892 .endianness = DEVICE_NATIVE_ENDIAN,
2895 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2896 uint64_t val, unsigned size)
2899 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2900 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2901 #if !defined(CONFIG_USER_ONLY)
2902 tb_invalidate_phys_page_fast(ram_addr, size);
2903 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2908 stb_p(qemu_get_ram_ptr(ram_addr), val);
2911 stw_p(qemu_get_ram_ptr(ram_addr), val);
2914 stl_p(qemu_get_ram_ptr(ram_addr), val);
2919 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2920 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2921 /* we remove the notdirty callback only if the code has been
2923 if (dirty_flags == 0xff)
2924 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2927 static const MemoryRegionOps notdirty_mem_ops = {
2928 .read = error_mem_read,
2929 .write = notdirty_mem_write,
2930 .endianness = DEVICE_NATIVE_ENDIAN,
2933 /* Generate a debug exception if a watchpoint has been hit. */
2934 static void check_watchpoint(int offset, int len_mask, int flags)
2936 CPUArchState *env = cpu_single_env;
2937 target_ulong pc, cs_base;
2938 TranslationBlock *tb;
2943 if (env->watchpoint_hit) {
2944 /* We re-entered the check after replacing the TB. Now raise
2945 * the debug interrupt so that is will trigger after the
2946 * current instruction. */
2947 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2950 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2951 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2952 if ((vaddr == (wp->vaddr & len_mask) ||
2953 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2954 wp->flags |= BP_WATCHPOINT_HIT;
2955 if (!env->watchpoint_hit) {
2956 env->watchpoint_hit = wp;
2957 tb = tb_find_pc(env->mem_io_pc);
2959 cpu_abort(env, "check_watchpoint: could not find TB for "
2960 "pc=%p", (void *)env->mem_io_pc);
2962 cpu_restore_state(tb, env, env->mem_io_pc);
2963 tb_phys_invalidate(tb, -1);
2964 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2965 env->exception_index = EXCP_DEBUG;
2968 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2969 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2970 cpu_resume_from_signal(env, NULL);
2974 wp->flags &= ~BP_WATCHPOINT_HIT;
2979 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2980 so these check for a hit then pass through to the normal out-of-line
2982 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2985 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2987 case 1: return ldub_phys(addr);
2988 case 2: return lduw_phys(addr);
2989 case 4: return ldl_phys(addr);
2994 static void watch_mem_write(void *opaque, hwaddr addr,
2995 uint64_t val, unsigned size)
2997 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3000 stb_phys(addr, val);
3003 stw_phys(addr, val);
3006 stl_phys(addr, val);
3012 static const MemoryRegionOps watch_mem_ops = {
3013 .read = watch_mem_read,
3014 .write = watch_mem_write,
3015 .endianness = DEVICE_NATIVE_ENDIAN,
3018 static uint64_t subpage_read(void *opaque, hwaddr addr,
3021 subpage_t *mmio = opaque;
3022 unsigned int idx = SUBPAGE_IDX(addr);
3023 MemoryRegionSection *section;
3024 #if defined(DEBUG_SUBPAGE)
3025 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3026 mmio, len, addr, idx);
3029 section = &phys_sections[mmio->sub_section[idx]];
3031 addr -= section->offset_within_address_space;
3032 addr += section->offset_within_region;
3033 return io_mem_read(section->mr, addr, len);
3036 static void subpage_write(void *opaque, hwaddr addr,
3037 uint64_t value, unsigned len)
3039 subpage_t *mmio = opaque;
3040 unsigned int idx = SUBPAGE_IDX(addr);
3041 MemoryRegionSection *section;
3042 #if defined(DEBUG_SUBPAGE)
3043 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3044 " idx %d value %"PRIx64"\n",
3045 __func__, mmio, len, addr, idx, value);
3048 section = &phys_sections[mmio->sub_section[idx]];
3050 addr -= section->offset_within_address_space;
3051 addr += section->offset_within_region;
3052 io_mem_write(section->mr, addr, value, len);
3055 static const MemoryRegionOps subpage_ops = {
3056 .read = subpage_read,
3057 .write = subpage_write,
3058 .endianness = DEVICE_NATIVE_ENDIAN,
3061 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3064 ram_addr_t raddr = addr;
3065 void *ptr = qemu_get_ram_ptr(raddr);
3067 case 1: return ldub_p(ptr);
3068 case 2: return lduw_p(ptr);
3069 case 4: return ldl_p(ptr);
3074 static void subpage_ram_write(void *opaque, hwaddr addr,
3075 uint64_t value, unsigned size)
3077 ram_addr_t raddr = addr;
3078 void *ptr = qemu_get_ram_ptr(raddr);
3080 case 1: return stb_p(ptr, value);
3081 case 2: return stw_p(ptr, value);
3082 case 4: return stl_p(ptr, value);
3087 static const MemoryRegionOps subpage_ram_ops = {
3088 .read = subpage_ram_read,
3089 .write = subpage_ram_write,
3090 .endianness = DEVICE_NATIVE_ENDIAN,
3093 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3098 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3100 idx = SUBPAGE_IDX(start);
3101 eidx = SUBPAGE_IDX(end);
3102 #if defined(DEBUG_SUBPAGE)
3103 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3104 mmio, start, end, idx, eidx, memory);
3106 if (memory_region_is_ram(phys_sections[section].mr)) {
3107 MemoryRegionSection new_section = phys_sections[section];
3108 new_section.mr = &io_mem_subpage_ram;
3109 section = phys_section_add(&new_section);
3111 for (; idx <= eidx; idx++) {
3112 mmio->sub_section[idx] = section;
3118 static subpage_t *subpage_init(hwaddr base)
3122 mmio = g_malloc0(sizeof(subpage_t));
3125 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3126 "subpage", TARGET_PAGE_SIZE);
3127 mmio->iomem.subpage = true;
3128 #if defined(DEBUG_SUBPAGE)
3129 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3130 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3132 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3137 static uint16_t dummy_section(MemoryRegion *mr)
3139 MemoryRegionSection section = {
3141 .offset_within_address_space = 0,
3142 .offset_within_region = 0,
3146 return phys_section_add(§ion);
3149 MemoryRegion *iotlb_to_region(hwaddr index)
3151 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3154 static void io_mem_init(void)
3156 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3157 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3158 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3159 "unassigned", UINT64_MAX);
3160 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
3161 "notdirty", UINT64_MAX);
3162 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3163 "subpage-ram", UINT64_MAX);
3164 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3165 "watch", UINT64_MAX);
3168 static void mem_begin(MemoryListener *listener)
3170 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3172 destroy_all_mappings(d);
3173 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3176 static void core_begin(MemoryListener *listener)
3178 phys_sections_clear();
3179 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3180 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3181 phys_section_rom = dummy_section(&io_mem_rom);
3182 phys_section_watch = dummy_section(&io_mem_watch);
3185 static void tcg_commit(MemoryListener *listener)
3189 /* since each CPU stores ram addresses in its TLB cache, we must
3190 reset the modified entries */
3192 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3197 static void core_log_global_start(MemoryListener *listener)
3199 cpu_physical_memory_set_dirty_tracking(1);
3202 static void core_log_global_stop(MemoryListener *listener)
3204 cpu_physical_memory_set_dirty_tracking(0);
3207 static void io_region_add(MemoryListener *listener,
3208 MemoryRegionSection *section)
3210 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3212 mrio->mr = section->mr;
3213 mrio->offset = section->offset_within_region;
3214 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3215 section->offset_within_address_space, section->size);
3216 ioport_register(&mrio->iorange);
3219 static void io_region_del(MemoryListener *listener,
3220 MemoryRegionSection *section)
3222 isa_unassign_ioport(section->offset_within_address_space, section->size);
3225 static MemoryListener core_memory_listener = {
3226 .begin = core_begin,
3227 .log_global_start = core_log_global_start,
3228 .log_global_stop = core_log_global_stop,
3232 static MemoryListener io_memory_listener = {
3233 .region_add = io_region_add,
3234 .region_del = io_region_del,
3238 static MemoryListener tcg_memory_listener = {
3239 .commit = tcg_commit,
3242 void address_space_init_dispatch(AddressSpace *as)
3244 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3246 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3247 d->listener = (MemoryListener) {
3249 .region_add = mem_add,
3250 .region_nop = mem_add,
3254 memory_listener_register(&d->listener, as);
3257 void address_space_destroy_dispatch(AddressSpace *as)
3259 AddressSpaceDispatch *d = as->dispatch;
3261 memory_listener_unregister(&d->listener);
3262 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3264 as->dispatch = NULL;
3267 static void memory_map_init(void)
3269 system_memory = g_malloc(sizeof(*system_memory));
3270 memory_region_init(system_memory, "system", INT64_MAX);
3271 address_space_init(&address_space_memory, system_memory);
3272 address_space_memory.name = "memory";
3274 system_io = g_malloc(sizeof(*system_io));
3275 memory_region_init(system_io, "io", 65536);
3276 address_space_init(&address_space_io, system_io);
3277 address_space_io.name = "I/O";
3279 memory_listener_register(&core_memory_listener, &address_space_memory);
3280 memory_listener_register(&io_memory_listener, &address_space_io);
3281 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3284 MemoryRegion *get_system_memory(void)
3286 return system_memory;
3289 MemoryRegion *get_system_io(void)
3294 #endif /* !defined(CONFIG_USER_ONLY) */
3296 /* physical memory access (slow version, mainly for debug) */
3297 #if defined(CONFIG_USER_ONLY)
3298 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3299 uint8_t *buf, int len, int is_write)
3306 page = addr & TARGET_PAGE_MASK;
3307 l = (page + TARGET_PAGE_SIZE) - addr;
3310 flags = page_get_flags(page);
3311 if (!(flags & PAGE_VALID))
3314 if (!(flags & PAGE_WRITE))
3316 /* XXX: this code should not depend on lock_user */
3317 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3320 unlock_user(p, addr, l);
3322 if (!(flags & PAGE_READ))
3324 /* XXX: this code should not depend on lock_user */
3325 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3328 unlock_user(p, addr, 0);
3339 static void invalidate_and_set_dirty(hwaddr addr,
3342 if (!cpu_physical_memory_is_dirty(addr)) {
3343 /* invalidate code */
3344 tb_invalidate_phys_page_range(addr, addr + length, 0);
3346 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3348 xen_modified_memory(addr, length);
3351 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3352 int len, bool is_write)
3354 AddressSpaceDispatch *d = as->dispatch;
3359 MemoryRegionSection *section;
3362 page = addr & TARGET_PAGE_MASK;
3363 l = (page + TARGET_PAGE_SIZE) - addr;
3366 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3369 if (!memory_region_is_ram(section->mr)) {
3371 addr1 = memory_region_section_addr(section, addr);
3372 /* XXX: could force cpu_single_env to NULL to avoid
3374 if (l >= 4 && ((addr1 & 3) == 0)) {
3375 /* 32 bit write access */
3377 io_mem_write(section->mr, addr1, val, 4);
3379 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3380 /* 16 bit write access */
3382 io_mem_write(section->mr, addr1, val, 2);
3385 /* 8 bit write access */
3387 io_mem_write(section->mr, addr1, val, 1);
3390 } else if (!section->readonly) {
3392 addr1 = memory_region_get_ram_addr(section->mr)
3393 + memory_region_section_addr(section, addr);
3395 ptr = qemu_get_ram_ptr(addr1);
3396 memcpy(ptr, buf, l);
3397 invalidate_and_set_dirty(addr1, l);
3398 qemu_put_ram_ptr(ptr);
3401 if (!(memory_region_is_ram(section->mr) ||
3402 memory_region_is_romd(section->mr))) {
3405 addr1 = memory_region_section_addr(section, addr);
3406 if (l >= 4 && ((addr1 & 3) == 0)) {
3407 /* 32 bit read access */
3408 val = io_mem_read(section->mr, addr1, 4);
3411 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3412 /* 16 bit read access */
3413 val = io_mem_read(section->mr, addr1, 2);
3417 /* 8 bit read access */
3418 val = io_mem_read(section->mr, addr1, 1);
3424 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3425 + memory_region_section_addr(section,
3427 memcpy(buf, ptr, l);
3428 qemu_put_ram_ptr(ptr);
3437 void address_space_write(AddressSpace *as, hwaddr addr,
3438 const uint8_t *buf, int len)
3440 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3444 * address_space_read: read from an address space.
3446 * @as: #AddressSpace to be accessed
3447 * @addr: address within that address space
3448 * @buf: buffer with the data transferred
3450 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3452 address_space_rw(as, addr, buf, len, false);
3456 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3457 int len, int is_write)
3459 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3462 /* used for ROM loading : can write in RAM and ROM */
3463 void cpu_physical_memory_write_rom(hwaddr addr,
3464 const uint8_t *buf, int len)
3466 AddressSpaceDispatch *d = address_space_memory.dispatch;
3470 MemoryRegionSection *section;
3473 page = addr & TARGET_PAGE_MASK;
3474 l = (page + TARGET_PAGE_SIZE) - addr;
3477 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3479 if (!(memory_region_is_ram(section->mr) ||
3480 memory_region_is_romd(section->mr))) {
3483 unsigned long addr1;
3484 addr1 = memory_region_get_ram_addr(section->mr)
3485 + memory_region_section_addr(section, addr);
3487 ptr = qemu_get_ram_ptr(addr1);
3488 memcpy(ptr, buf, l);
3489 invalidate_and_set_dirty(addr1, l);
3490 qemu_put_ram_ptr(ptr);
3504 static BounceBuffer bounce;
3506 typedef struct MapClient {
3508 void (*callback)(void *opaque);
3509 QLIST_ENTRY(MapClient) link;
3512 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3513 = QLIST_HEAD_INITIALIZER(map_client_list);
3515 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3517 MapClient *client = g_malloc(sizeof(*client));
3519 client->opaque = opaque;
3520 client->callback = callback;
3521 QLIST_INSERT_HEAD(&map_client_list, client, link);
3525 static void cpu_unregister_map_client(void *_client)
3527 MapClient *client = (MapClient *)_client;
3529 QLIST_REMOVE(client, link);
3533 static void cpu_notify_map_clients(void)
3537 while (!QLIST_EMPTY(&map_client_list)) {
3538 client = QLIST_FIRST(&map_client_list);
3539 client->callback(client->opaque);
3540 cpu_unregister_map_client(client);
3544 /* Map a physical memory region into a host virtual address.
3545 * May map a subset of the requested range, given by and returned in *plen.
3546 * May return NULL if resources needed to perform the mapping are exhausted.
3547 * Use only for reads OR writes - not for read-modify-write operations.
3548 * Use cpu_register_map_client() to know when retrying the map operation is
3549 * likely to succeed.
3551 void *address_space_map(AddressSpace *as,
3556 AddressSpaceDispatch *d = as->dispatch;
3561 MemoryRegionSection *section;
3562 ram_addr_t raddr = RAM_ADDR_MAX;
3567 page = addr & TARGET_PAGE_MASK;
3568 l = (page + TARGET_PAGE_SIZE) - addr;
3571 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3573 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3574 if (todo || bounce.buffer) {
3577 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3581 address_space_read(as, addr, bounce.buffer, l);
3585 return bounce.buffer;
3588 raddr = memory_region_get_ram_addr(section->mr)
3589 + memory_region_section_addr(section, addr);
3597 ret = qemu_ram_ptr_length(raddr, &rlen);
3602 /* Unmaps a memory region previously mapped by address_space_map().
3603 * Will also mark the memory as dirty if is_write == 1. access_len gives
3604 * the amount of memory that was actually read or written by the caller.
3606 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3607 int is_write, hwaddr access_len)
3609 if (buffer != bounce.buffer) {
3611 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3612 while (access_len) {
3614 l = TARGET_PAGE_SIZE;
3617 invalidate_and_set_dirty(addr1, l);
3622 if (xen_enabled()) {
3623 xen_invalidate_map_cache_entry(buffer);
3628 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3630 qemu_vfree(bounce.buffer);
3631 bounce.buffer = NULL;
3632 cpu_notify_map_clients();
3635 void *cpu_physical_memory_map(hwaddr addr,
3639 return address_space_map(&address_space_memory, addr, plen, is_write);
3642 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3643 int is_write, hwaddr access_len)
3645 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3648 /* warning: addr must be aligned */
3649 static inline uint32_t ldl_phys_internal(hwaddr addr,
3650 enum device_endian endian)
3654 MemoryRegionSection *section;
3656 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3658 if (!(memory_region_is_ram(section->mr) ||
3659 memory_region_is_romd(section->mr))) {
3661 addr = memory_region_section_addr(section, addr);
3662 val = io_mem_read(section->mr, addr, 4);
3663 #if defined(TARGET_WORDS_BIGENDIAN)
3664 if (endian == DEVICE_LITTLE_ENDIAN) {
3668 if (endian == DEVICE_BIG_ENDIAN) {
3674 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3676 + memory_region_section_addr(section, addr));
3678 case DEVICE_LITTLE_ENDIAN:
3679 val = ldl_le_p(ptr);
3681 case DEVICE_BIG_ENDIAN:
3682 val = ldl_be_p(ptr);
3692 uint32_t ldl_phys(hwaddr addr)
3694 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3697 uint32_t ldl_le_phys(hwaddr addr)
3699 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3702 uint32_t ldl_be_phys(hwaddr addr)
3704 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3707 /* warning: addr must be aligned */
3708 static inline uint64_t ldq_phys_internal(hwaddr addr,
3709 enum device_endian endian)
3713 MemoryRegionSection *section;
3715 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3717 if (!(memory_region_is_ram(section->mr) ||
3718 memory_region_is_romd(section->mr))) {
3720 addr = memory_region_section_addr(section, addr);
3722 /* XXX This is broken when device endian != cpu endian.
3723 Fix and add "endian" variable check */
3724 #ifdef TARGET_WORDS_BIGENDIAN
3725 val = io_mem_read(section->mr, addr, 4) << 32;
3726 val |= io_mem_read(section->mr, addr + 4, 4);
3728 val = io_mem_read(section->mr, addr, 4);
3729 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3733 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3735 + memory_region_section_addr(section, addr));
3737 case DEVICE_LITTLE_ENDIAN:
3738 val = ldq_le_p(ptr);
3740 case DEVICE_BIG_ENDIAN:
3741 val = ldq_be_p(ptr);
3751 uint64_t ldq_phys(hwaddr addr)
3753 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3756 uint64_t ldq_le_phys(hwaddr addr)
3758 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3761 uint64_t ldq_be_phys(hwaddr addr)
3763 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3767 uint32_t ldub_phys(hwaddr addr)
3770 cpu_physical_memory_read(addr, &val, 1);
3774 /* warning: addr must be aligned */
3775 static inline uint32_t lduw_phys_internal(hwaddr addr,
3776 enum device_endian endian)
3780 MemoryRegionSection *section;
3782 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3784 if (!(memory_region_is_ram(section->mr) ||
3785 memory_region_is_romd(section->mr))) {
3787 addr = memory_region_section_addr(section, addr);
3788 val = io_mem_read(section->mr, addr, 2);
3789 #if defined(TARGET_WORDS_BIGENDIAN)
3790 if (endian == DEVICE_LITTLE_ENDIAN) {
3794 if (endian == DEVICE_BIG_ENDIAN) {
3800 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3802 + memory_region_section_addr(section, addr));
3804 case DEVICE_LITTLE_ENDIAN:
3805 val = lduw_le_p(ptr);
3807 case DEVICE_BIG_ENDIAN:
3808 val = lduw_be_p(ptr);
3818 uint32_t lduw_phys(hwaddr addr)
3820 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3823 uint32_t lduw_le_phys(hwaddr addr)
3825 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3828 uint32_t lduw_be_phys(hwaddr addr)
3830 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3833 /* warning: addr must be aligned. The ram page is not masked as dirty
3834 and the code inside is not invalidated. It is useful if the dirty
3835 bits are used to track modified PTEs */
3836 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3839 MemoryRegionSection *section;
3841 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3843 if (!memory_region_is_ram(section->mr) || section->readonly) {
3844 addr = memory_region_section_addr(section, addr);
3845 if (memory_region_is_ram(section->mr)) {
3846 section = &phys_sections[phys_section_rom];
3848 io_mem_write(section->mr, addr, val, 4);
3850 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3852 + memory_region_section_addr(section, addr);
3853 ptr = qemu_get_ram_ptr(addr1);
3856 if (unlikely(in_migration)) {
3857 if (!cpu_physical_memory_is_dirty(addr1)) {
3858 /* invalidate code */
3859 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3861 cpu_physical_memory_set_dirty_flags(
3862 addr1, (0xff & ~CODE_DIRTY_FLAG));
3868 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3871 MemoryRegionSection *section;
3873 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3875 if (!memory_region_is_ram(section->mr) || section->readonly) {
3876 addr = memory_region_section_addr(section, addr);
3877 if (memory_region_is_ram(section->mr)) {
3878 section = &phys_sections[phys_section_rom];
3880 #ifdef TARGET_WORDS_BIGENDIAN
3881 io_mem_write(section->mr, addr, val >> 32, 4);
3882 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3884 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3885 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3888 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3890 + memory_region_section_addr(section, addr));
3895 /* warning: addr must be aligned */
3896 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3897 enum device_endian endian)
3900 MemoryRegionSection *section;
3902 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3904 if (!memory_region_is_ram(section->mr) || section->readonly) {
3905 addr = memory_region_section_addr(section, addr);
3906 if (memory_region_is_ram(section->mr)) {
3907 section = &phys_sections[phys_section_rom];
3909 #if defined(TARGET_WORDS_BIGENDIAN)
3910 if (endian == DEVICE_LITTLE_ENDIAN) {
3914 if (endian == DEVICE_BIG_ENDIAN) {
3918 io_mem_write(section->mr, addr, val, 4);
3920 unsigned long addr1;
3921 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3922 + memory_region_section_addr(section, addr);
3924 ptr = qemu_get_ram_ptr(addr1);
3926 case DEVICE_LITTLE_ENDIAN:
3929 case DEVICE_BIG_ENDIAN:
3936 invalidate_and_set_dirty(addr1, 4);
3940 void stl_phys(hwaddr addr, uint32_t val)
3942 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3945 void stl_le_phys(hwaddr addr, uint32_t val)
3947 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3950 void stl_be_phys(hwaddr addr, uint32_t val)
3952 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3956 void stb_phys(hwaddr addr, uint32_t val)
3959 cpu_physical_memory_write(addr, &v, 1);
3962 /* warning: addr must be aligned */
3963 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3964 enum device_endian endian)
3967 MemoryRegionSection *section;
3969 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3971 if (!memory_region_is_ram(section->mr) || section->readonly) {
3972 addr = memory_region_section_addr(section, addr);
3973 if (memory_region_is_ram(section->mr)) {
3974 section = &phys_sections[phys_section_rom];
3976 #if defined(TARGET_WORDS_BIGENDIAN)
3977 if (endian == DEVICE_LITTLE_ENDIAN) {
3981 if (endian == DEVICE_BIG_ENDIAN) {
3985 io_mem_write(section->mr, addr, val, 2);
3987 unsigned long addr1;
3988 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3989 + memory_region_section_addr(section, addr);
3991 ptr = qemu_get_ram_ptr(addr1);
3993 case DEVICE_LITTLE_ENDIAN:
3996 case DEVICE_BIG_ENDIAN:
4003 invalidate_and_set_dirty(addr1, 2);
4007 void stw_phys(hwaddr addr, uint32_t val)
4009 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4012 void stw_le_phys(hwaddr addr, uint32_t val)
4014 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4017 void stw_be_phys(hwaddr addr, uint32_t val)
4019 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4023 void stq_phys(hwaddr addr, uint64_t val)
4026 cpu_physical_memory_write(addr, &val, 8);
4029 void stq_le_phys(hwaddr addr, uint64_t val)
4031 val = cpu_to_le64(val);
4032 cpu_physical_memory_write(addr, &val, 8);
4035 void stq_be_phys(hwaddr addr, uint64_t val)
4037 val = cpu_to_be64(val);
4038 cpu_physical_memory_write(addr, &val, 8);
4041 /* virtual memory access for debug (includes writing to ROM) */
4042 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4043 uint8_t *buf, int len, int is_write)
4050 page = addr & TARGET_PAGE_MASK;
4051 phys_addr = cpu_get_phys_page_debug(env, page);
4052 /* if no physical page mapped, return an error */
4053 if (phys_addr == -1)
4055 l = (page + TARGET_PAGE_SIZE) - addr;
4058 phys_addr += (addr & ~TARGET_PAGE_MASK);
4060 cpu_physical_memory_write_rom(phys_addr, buf, l);
4062 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4071 /* in deterministic execution mode, instructions doing device I/Os
4072 must be at the end of the TB */
4073 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4075 TranslationBlock *tb;
4077 target_ulong pc, cs_base;
4080 tb = tb_find_pc(retaddr);
4082 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4085 n = env->icount_decr.u16.low + tb->icount;
4086 cpu_restore_state(tb, env, retaddr);
4087 /* Calculate how many instructions had been executed before the fault
4089 n = n - env->icount_decr.u16.low;
4090 /* Generate a new TB ending on the I/O insn. */
4092 /* On MIPS and SH, delay slot instructions can only be restarted if
4093 they were already the first instruction in the TB. If this is not
4094 the first instruction in a TB then re-execute the preceding
4096 #if defined(TARGET_MIPS)
4097 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4098 env->active_tc.PC -= 4;
4099 env->icount_decr.u16.low++;
4100 env->hflags &= ~MIPS_HFLAG_BMASK;
4102 #elif defined(TARGET_SH4)
4103 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4106 env->icount_decr.u16.low++;
4107 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4110 /* This should never happen. */
4111 if (n > CF_COUNT_MASK)
4112 cpu_abort(env, "TB too big during recompile");
4114 cflags = n | CF_LAST_IO;
4116 cs_base = tb->cs_base;
4118 tb_phys_invalidate(tb, -1);
4119 /* FIXME: In theory this could raise an exception. In practice
4120 we have already translated the block once so it's probably ok. */
4121 tb_gen_code(env, pc, cs_base, flags, cflags);
4122 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4123 the first in the TB) then we end up generating a whole new TB and
4124 repeating the fault, which is horribly inefficient.
4125 Better would be to execute just this insn uncached, or generate a
4127 cpu_resume_from_signal(env, NULL);
4130 #if !defined(CONFIG_USER_ONLY)
4132 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4134 int i, target_code_size, max_target_code_size;
4135 int direct_jmp_count, direct_jmp2_count, cross_page;
4136 TranslationBlock *tb;
4138 target_code_size = 0;
4139 max_target_code_size = 0;
4141 direct_jmp_count = 0;
4142 direct_jmp2_count = 0;
4143 for(i = 0; i < nb_tbs; i++) {
4145 target_code_size += tb->size;
4146 if (tb->size > max_target_code_size)
4147 max_target_code_size = tb->size;
4148 if (tb->page_addr[1] != -1)
4150 if (tb->tb_next_offset[0] != 0xffff) {
4152 if (tb->tb_next_offset[1] != 0xffff) {
4153 direct_jmp2_count++;
4157 /* XXX: avoid using doubles ? */
4158 cpu_fprintf(f, "Translation buffer state:\n");
4159 cpu_fprintf(f, "gen code size %td/%zd\n",
4160 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4161 cpu_fprintf(f, "TB count %d/%d\n",
4162 nb_tbs, code_gen_max_blocks);
4163 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4164 nb_tbs ? target_code_size / nb_tbs : 0,
4165 max_target_code_size);
4166 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4167 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4168 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4169 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4171 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4172 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4174 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4176 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4177 cpu_fprintf(f, "\nStatistics:\n");
4178 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4179 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4180 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4181 tcg_dump_info(f, cpu_fprintf);
4185 * A helper function for the _utterly broken_ virtio device model to find out if
4186 * it's running on a big endian machine. Don't do this at home kids!
4188 bool virtio_is_big_endian(void);
4189 bool virtio_is_big_endian(void)
4191 #if defined(TARGET_WORDS_BIGENDIAN)
4200 #ifndef CONFIG_USER_ONLY
4201 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4203 MemoryRegionSection *section;
4205 section = phys_page_find(address_space_memory.dispatch,
4206 phys_addr >> TARGET_PAGE_BITS);
4208 return !(memory_region_is_ram(section->mr) ||
4209 memory_region_is_romd(section->mr));