2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
35 #include "qemu-timer.h"
38 #include "exec-memory.h"
39 #if defined(CONFIG_USER_ONLY)
41 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
42 #include <sys/param.h>
43 #if __FreeBSD_version >= 700104
44 #define HAVE_KINFO_GETVMMAP
45 #define sigqueue sigqueue_freebsd /* avoid redefinition */
48 #include <machine/profile.h>
56 #else /* !CONFIG_USER_ONLY */
57 #include "xen-mapcache.h"
63 #include "memory-internal.h"
65 //#define DEBUG_TB_INVALIDATE
67 //#define DEBUG_UNASSIGNED
69 /* make various TB consistency checks */
70 //#define DEBUG_TB_CHECK
72 //#define DEBUG_IOPORT
73 //#define DEBUG_SUBPAGE
75 #if !defined(CONFIG_USER_ONLY)
76 /* TB consistency checks only implemented for usermode emulation. */
80 #define SMC_BITMAP_USE_THRESHOLD 10
82 /* Code generation and translation blocks */
83 static TranslationBlock *tbs;
84 static int code_gen_max_blocks;
85 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
87 /* any access to the tbs or the page table must use this lock */
88 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
90 uint8_t *code_gen_prologue;
91 static uint8_t *code_gen_buffer;
92 static size_t code_gen_buffer_size;
93 /* threshold to flush the translated code buffer */
94 static size_t code_gen_buffer_max_size;
95 static uint8_t *code_gen_ptr;
97 #if !defined(CONFIG_USER_ONLY)
99 static int in_migration;
101 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
103 static MemoryRegion *system_memory;
104 static MemoryRegion *system_io;
106 AddressSpace address_space_io;
107 AddressSpace address_space_memory;
108 DMAContext dma_context_memory;
110 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
111 static MemoryRegion io_mem_subpage_ram;
115 CPUArchState *first_cpu;
116 /* current CPU in the current thread. It is only valid inside
118 DEFINE_TLS(CPUArchState *,cpu_single_env);
119 /* 0 = Do not count executed instructions.
120 1 = Precise instruction counting.
121 2 = Adaptive rate instruction counting. */
124 typedef struct PageDesc {
125 /* list of TBs intersecting this ram page */
126 TranslationBlock *first_tb;
127 /* in order to optimize self modifying code, we count the number
128 of lookups we do to a given page to use a bitmap */
129 unsigned int code_write_count;
130 uint8_t *code_bitmap;
131 #if defined(CONFIG_USER_ONLY)
136 /* In system mode we want L1_MAP to be based on ram offsets,
137 while in user mode we want it to be based on virtual addresses. */
138 #if !defined(CONFIG_USER_ONLY)
139 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
140 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
145 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
148 /* Size of the L2 (and L3, etc) page tables. */
150 #define L2_SIZE (1 << L2_BITS)
152 #define P_L2_LEVELS \
153 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
155 /* The bits remaining after N lower levels of page tables. */
156 #define V_L1_BITS_REM \
157 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
159 #if V_L1_BITS_REM < 4
160 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
162 #define V_L1_BITS V_L1_BITS_REM
165 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
167 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
169 uintptr_t qemu_real_host_page_size;
170 uintptr_t qemu_host_page_size;
171 uintptr_t qemu_host_page_mask;
173 /* This is a multi-level map on the virtual address space.
174 The bottom level has pointers to PageDesc. */
175 static void *l1_map[V_L1_SIZE];
177 #if !defined(CONFIG_USER_ONLY)
179 static MemoryRegionSection *phys_sections;
180 static unsigned phys_sections_nb, phys_sections_nb_alloc;
181 static uint16_t phys_section_unassigned;
182 static uint16_t phys_section_notdirty;
183 static uint16_t phys_section_rom;
184 static uint16_t phys_section_watch;
186 /* Simple allocator for PhysPageEntry nodes */
187 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
188 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
190 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
192 static void io_mem_init(void);
193 static void memory_map_init(void);
194 static void *qemu_safe_ram_ptr(ram_addr_t addr);
196 static MemoryRegion io_mem_watch;
198 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
199 tb_page_addr_t phys_page2);
202 static int tb_flush_count;
203 static int tb_phys_invalidate_count;
206 static inline void map_exec(void *addr, long size)
209 VirtualProtect(addr, size,
210 PAGE_EXECUTE_READWRITE, &old_protect);
213 static inline void map_exec(void *addr, long size)
215 unsigned long start, end, page_size;
217 page_size = getpagesize();
218 start = (unsigned long)addr;
219 start &= ~(page_size - 1);
221 end = (unsigned long)addr + size;
222 end += page_size - 1;
223 end &= ~(page_size - 1);
225 mprotect((void *)start, end - start,
226 PROT_READ | PROT_WRITE | PROT_EXEC);
230 static void page_init(void)
232 /* NOTE: we can always suppose that qemu_host_page_size >=
236 SYSTEM_INFO system_info;
238 GetSystemInfo(&system_info);
239 qemu_real_host_page_size = system_info.dwPageSize;
242 qemu_real_host_page_size = getpagesize();
244 if (qemu_host_page_size == 0) {
245 qemu_host_page_size = qemu_real_host_page_size;
247 if (qemu_host_page_size < TARGET_PAGE_SIZE) {
248 qemu_host_page_size = TARGET_PAGE_SIZE;
250 qemu_host_page_mask = ~(qemu_host_page_size - 1);
252 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
254 #ifdef HAVE_KINFO_GETVMMAP
255 struct kinfo_vmentry *freep;
258 freep = kinfo_getvmmap(getpid(), &cnt);
261 for (i = 0; i < cnt; i++) {
262 unsigned long startaddr, endaddr;
264 startaddr = freep[i].kve_start;
265 endaddr = freep[i].kve_end;
266 if (h2g_valid(startaddr)) {
267 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
269 if (h2g_valid(endaddr)) {
270 endaddr = h2g(endaddr);
271 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
273 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
275 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
286 last_brk = (unsigned long)sbrk(0);
288 f = fopen("/compat/linux/proc/self/maps", "r");
293 unsigned long startaddr, endaddr;
296 n = fscanf(f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
298 if (n == 2 && h2g_valid(startaddr)) {
299 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
301 if (h2g_valid(endaddr)) {
302 endaddr = h2g(endaddr);
306 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
318 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
324 #if defined(CONFIG_USER_ONLY)
325 /* We can't use g_malloc because it may recurse into a locked mutex. */
326 # define ALLOC(P, SIZE) \
328 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
329 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
332 # define ALLOC(P, SIZE) \
333 do { P = g_malloc0(SIZE); } while (0)
336 /* Level 1. Always allocated. */
337 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
340 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
347 ALLOC(p, sizeof(void *) * L2_SIZE);
351 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
359 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
365 return pd + (index & (L2_SIZE - 1));
368 static inline PageDesc *page_find(tb_page_addr_t index)
370 return page_find_alloc(index, 0);
373 #if !defined(CONFIG_USER_ONLY)
375 static void phys_map_node_reserve(unsigned nodes)
377 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
378 typedef PhysPageEntry Node[L2_SIZE];
379 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
380 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
381 phys_map_nodes_nb + nodes);
382 phys_map_nodes = g_renew(Node, phys_map_nodes,
383 phys_map_nodes_nb_alloc);
387 static uint16_t phys_map_node_alloc(void)
392 ret = phys_map_nodes_nb++;
393 assert(ret != PHYS_MAP_NODE_NIL);
394 assert(ret != phys_map_nodes_nb_alloc);
395 for (i = 0; i < L2_SIZE; ++i) {
396 phys_map_nodes[ret][i].is_leaf = 0;
397 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
402 static void phys_map_nodes_reset(void)
404 phys_map_nodes_nb = 0;
408 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
409 hwaddr *nb, uint16_t leaf,
414 hwaddr step = (hwaddr)1 << (level * L2_BITS);
416 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
417 lp->ptr = phys_map_node_alloc();
418 p = phys_map_nodes[lp->ptr];
420 for (i = 0; i < L2_SIZE; i++) {
422 p[i].ptr = phys_section_unassigned;
426 p = phys_map_nodes[lp->ptr];
428 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
430 while (*nb && lp < &p[L2_SIZE]) {
431 if ((*index & (step - 1)) == 0 && *nb >= step) {
437 phys_page_set_level(lp, index, nb, leaf, level - 1);
443 static void phys_page_set(AddressSpaceDispatch *d,
444 hwaddr index, hwaddr nb,
447 /* Wildly overreserve - it doesn't matter much. */
448 phys_map_node_reserve(3 * P_L2_LEVELS);
450 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
453 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
455 PhysPageEntry lp = d->phys_map;
458 uint16_t s_index = phys_section_unassigned;
460 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
461 if (lp.ptr == PHYS_MAP_NODE_NIL) {
464 p = phys_map_nodes[lp.ptr];
465 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
470 return &phys_sections[s_index];
473 bool memory_region_is_unassigned(MemoryRegion *mr)
475 return mr != &io_mem_ram && mr != &io_mem_rom
476 && mr != &io_mem_notdirty && !mr->rom_device
477 && mr != &io_mem_watch;
480 #define mmap_lock() do { } while(0)
481 #define mmap_unlock() do { } while(0)
484 #if defined(CONFIG_USER_ONLY)
485 /* Currently it is not recommended to allocate big chunks of data in
486 user mode. It will change when a dedicated libc will be used. */
487 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
488 region in which the guest needs to run. Revisit this. */
489 #define USE_STATIC_CODE_GEN_BUFFER
492 /* ??? Should configure for this, not list operating systems here. */
493 #if (defined(__linux__) \
494 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
495 || defined(__DragonFly__) || defined(__OpenBSD__) \
496 || defined(__NetBSD__))
500 /* Minimum size of the code gen buffer. This number is randomly chosen,
501 but not so small that we can't have a fair number of TB's live. */
502 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
504 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
505 indicated, this is constrained by the range of direct branches on the
506 host cpu, as used by the TCG implementation of goto_tb. */
507 #if defined(__x86_64__)
508 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
509 #elif defined(__sparc__)
510 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
511 #elif defined(__arm__)
512 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
513 #elif defined(__s390x__)
514 /* We have a +- 4GB range on the branches; leave some slop. */
515 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
517 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
520 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
522 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
523 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
524 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
526 static inline size_t size_code_gen_buffer(size_t tb_size)
528 /* Size the buffer. */
530 #ifdef USE_STATIC_CODE_GEN_BUFFER
531 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
533 /* ??? Needs adjustments. */
534 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
535 static buffer, we could size this on RESERVED_VA, on the text
536 segment size of the executable, or continue to use the default. */
537 tb_size = (unsigned long)(ram_size / 4);
540 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
541 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
543 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
544 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
546 code_gen_buffer_size = tb_size;
550 #ifdef USE_STATIC_CODE_GEN_BUFFER
551 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
552 __attribute__((aligned(CODE_GEN_ALIGN)));
554 static inline void *alloc_code_gen_buffer(void)
556 map_exec(static_code_gen_buffer, code_gen_buffer_size);
557 return static_code_gen_buffer;
559 #elif defined(USE_MMAP)
560 static inline void *alloc_code_gen_buffer(void)
562 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
566 /* Constrain the position of the buffer based on the host cpu.
567 Note that these addresses are chosen in concert with the
568 addresses assigned in the relevant linker script file. */
569 # if defined(__PIE__) || defined(__PIC__)
570 /* Don't bother setting a preferred location if we're building
571 a position-independent executable. We're more likely to get
572 an address near the main executable if we let the kernel
573 choose the address. */
574 # elif defined(__x86_64__) && defined(MAP_32BIT)
575 /* Force the memory down into low memory with the executable.
576 Leave the choice of exact location with the kernel. */
578 /* Cannot expect to map more than 800MB in low memory. */
579 if (code_gen_buffer_size > 800u * 1024 * 1024) {
580 code_gen_buffer_size = 800u * 1024 * 1024;
582 # elif defined(__sparc__)
583 start = 0x40000000ul;
584 # elif defined(__s390x__)
585 start = 0x90000000ul;
588 buf = mmap((void *)start, code_gen_buffer_size,
589 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
590 return buf == MAP_FAILED ? NULL : buf;
593 static inline void *alloc_code_gen_buffer(void)
595 void *buf = g_malloc(code_gen_buffer_size);
598 map_exec(buf, code_gen_buffer_size);
602 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
604 static inline void code_gen_alloc(size_t tb_size)
606 code_gen_buffer_size = size_code_gen_buffer(tb_size);
607 code_gen_buffer = alloc_code_gen_buffer();
608 if (code_gen_buffer == NULL) {
609 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
613 qemu_madvise(code_gen_buffer, code_gen_buffer_size, QEMU_MADV_HUGEPAGE);
615 /* Steal room for the prologue at the end of the buffer. This ensures
616 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
617 from TB's to the prologue are going to be in range. It also means
618 that we don't need to mark (additional) portions of the data segment
620 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
621 code_gen_buffer_size -= 1024;
623 code_gen_buffer_max_size = code_gen_buffer_size -
624 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
625 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
626 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
629 /* Must be called before using the QEMU cpus. 'tb_size' is the size
630 (in bytes) allocated to the translation buffer. Zero means default
632 void tcg_exec_init(unsigned long tb_size)
635 code_gen_alloc(tb_size);
636 code_gen_ptr = code_gen_buffer;
637 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
639 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
640 /* There's no guest base to take into account, so go ahead and
641 initialize the prologue now. */
642 tcg_prologue_init(&tcg_ctx);
646 bool tcg_enabled(void)
648 return code_gen_buffer != NULL;
651 void cpu_exec_init_all(void)
653 #if !defined(CONFIG_USER_ONLY)
659 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
661 static int cpu_common_post_load(void *opaque, int version_id)
663 CPUArchState *env = opaque;
665 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
666 version_id is increased. */
667 env->interrupt_request &= ~0x01;
673 static const VMStateDescription vmstate_cpu_common = {
674 .name = "cpu_common",
676 .minimum_version_id = 1,
677 .minimum_version_id_old = 1,
678 .post_load = cpu_common_post_load,
679 .fields = (VMStateField []) {
680 VMSTATE_UINT32(halted, CPUArchState),
681 VMSTATE_UINT32(interrupt_request, CPUArchState),
682 VMSTATE_END_OF_LIST()
687 CPUArchState *qemu_get_cpu(int cpu)
689 CPUArchState *env = first_cpu;
692 if (env->cpu_index == cpu)
700 void cpu_exec_init(CPUArchState *env)
702 #ifndef CONFIG_USER_ONLY
703 CPUState *cpu = ENV_GET_CPU(env);
708 #if defined(CONFIG_USER_ONLY)
711 env->next_cpu = NULL;
714 while (*penv != NULL) {
715 penv = &(*penv)->next_cpu;
718 env->cpu_index = cpu_index;
720 QTAILQ_INIT(&env->breakpoints);
721 QTAILQ_INIT(&env->watchpoints);
722 #ifndef CONFIG_USER_ONLY
723 cpu->thread_id = qemu_get_thread_id();
726 #if defined(CONFIG_USER_ONLY)
729 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
730 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
731 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
732 cpu_save, cpu_load, env);
736 /* Allocate a new translation block. Flush the translation buffer if
737 too many translation blocks or too much generated code. */
738 static TranslationBlock *tb_alloc(target_ulong pc)
740 TranslationBlock *tb;
742 if (nb_tbs >= code_gen_max_blocks ||
743 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size) {
752 void tb_free(TranslationBlock *tb)
754 /* In practice this is mostly used for single use temporary TB
755 Ignore the hard cases and just back up if this TB happens to
756 be the last one generated. */
757 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
758 code_gen_ptr = tb->tc_ptr;
763 static inline void invalidate_page_bitmap(PageDesc *p)
765 if (p->code_bitmap) {
766 g_free(p->code_bitmap);
767 p->code_bitmap = NULL;
769 p->code_write_count = 0;
772 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
773 static void page_flush_tb_1(int level, void **lp)
783 for (i = 0; i < L2_SIZE; ++i) {
784 pd[i].first_tb = NULL;
785 invalidate_page_bitmap(pd + i);
790 for (i = 0; i < L2_SIZE; ++i) {
791 page_flush_tb_1(level - 1, pp + i);
796 static void page_flush_tb(void)
800 for (i = 0; i < V_L1_SIZE; i++) {
801 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
805 /* flush all the translation blocks */
806 /* XXX: tb_flush is currently not thread safe */
807 void tb_flush(CPUArchState *env1)
811 #if defined(DEBUG_FLUSH)
812 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
813 (unsigned long)(code_gen_ptr - code_gen_buffer),
815 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
817 if ((unsigned long)(code_gen_ptr - code_gen_buffer)
818 > code_gen_buffer_size) {
819 cpu_abort(env1, "Internal error: code buffer overflow\n");
823 for (env = first_cpu; env != NULL; env = env->next_cpu) {
824 memset(env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
827 memset(tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof(void *));
830 code_gen_ptr = code_gen_buffer;
831 /* XXX: flush processor icache at this point if cache flush is
836 #ifdef DEBUG_TB_CHECK
838 static void tb_invalidate_check(target_ulong address)
840 TranslationBlock *tb;
843 address &= TARGET_PAGE_MASK;
844 for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
845 for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
846 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
847 address >= tb->pc + tb->size)) {
848 printf("ERROR invalidate: address=" TARGET_FMT_lx
849 " PC=%08lx size=%04x\n",
850 address, (long)tb->pc, tb->size);
856 /* verify that all the pages have correct rights for code */
857 static void tb_page_check(void)
859 TranslationBlock *tb;
860 int i, flags1, flags2;
862 for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
863 for (tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
864 flags1 = page_get_flags(tb->pc);
865 flags2 = page_get_flags(tb->pc + tb->size - 1);
866 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
867 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
868 (long)tb->pc, tb->size, flags1, flags2);
877 /* invalidate one TB */
878 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
881 TranslationBlock *tb1;
886 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
889 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
893 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
895 TranslationBlock *tb1;
900 n1 = (uintptr_t)tb1 & 3;
901 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
903 *ptb = tb1->page_next[n1];
906 ptb = &tb1->page_next[n1];
910 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
912 TranslationBlock *tb1, **ptb;
915 ptb = &tb->jmp_next[n];
918 /* find tb(n) in circular list */
921 n1 = (uintptr_t)tb1 & 3;
922 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
923 if (n1 == n && tb1 == tb) {
927 ptb = &tb1->jmp_first;
929 ptb = &tb1->jmp_next[n1];
932 /* now we can suppress tb(n) from the list */
933 *ptb = tb->jmp_next[n];
935 tb->jmp_next[n] = NULL;
939 /* reset the jump entry 'n' of a TB so that it is not chained to
941 static inline void tb_reset_jump(TranslationBlock *tb, int n)
943 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
946 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
951 tb_page_addr_t phys_pc;
952 TranslationBlock *tb1, *tb2;
954 /* remove the TB from the hash list */
955 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
956 h = tb_phys_hash_func(phys_pc);
957 tb_remove(&tb_phys_hash[h], tb,
958 offsetof(TranslationBlock, phys_hash_next));
960 /* remove the TB from the page list */
961 if (tb->page_addr[0] != page_addr) {
962 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
963 tb_page_remove(&p->first_tb, tb);
964 invalidate_page_bitmap(p);
966 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
967 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
968 tb_page_remove(&p->first_tb, tb);
969 invalidate_page_bitmap(p);
972 tb_invalidated_flag = 1;
974 /* remove the TB from the hash list */
975 h = tb_jmp_cache_hash_func(tb->pc);
976 for (env = first_cpu; env != NULL; env = env->next_cpu) {
977 if (env->tb_jmp_cache[h] == tb) {
978 env->tb_jmp_cache[h] = NULL;
982 /* suppress this TB from the two jump lists */
983 tb_jmp_remove(tb, 0);
984 tb_jmp_remove(tb, 1);
986 /* suppress any remaining jumps to this TB */
989 n1 = (uintptr_t)tb1 & 3;
993 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
994 tb2 = tb1->jmp_next[n1];
995 tb_reset_jump(tb1, n1);
996 tb1->jmp_next[n1] = NULL;
999 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
1001 tb_phys_invalidate_count++;
1004 static inline void set_bits(uint8_t *tab, int start, int len)
1006 int end, mask, end1;
1010 mask = 0xff << (start & 7);
1011 if ((start & ~7) == (end & ~7)) {
1013 mask &= ~(0xff << (end & 7));
1018 start = (start + 8) & ~7;
1020 while (start < end1) {
1025 mask = ~(0xff << (end & 7));
1031 static void build_page_bitmap(PageDesc *p)
1033 int n, tb_start, tb_end;
1034 TranslationBlock *tb;
1036 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1039 while (tb != NULL) {
1040 n = (uintptr_t)tb & 3;
1041 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1042 /* NOTE: this is subtle as a TB may span two physical pages */
1044 /* NOTE: tb_end may be after the end of the page, but
1045 it is not a problem */
1046 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1047 tb_end = tb_start + tb->size;
1048 if (tb_end > TARGET_PAGE_SIZE) {
1049 tb_end = TARGET_PAGE_SIZE;
1053 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1055 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1056 tb = tb->page_next[n];
1060 TranslationBlock *tb_gen_code(CPUArchState *env,
1061 target_ulong pc, target_ulong cs_base,
1062 int flags, int cflags)
1064 TranslationBlock *tb;
1066 tb_page_addr_t phys_pc, phys_page2;
1067 target_ulong virt_page2;
1070 phys_pc = get_page_addr_code(env, pc);
1073 /* flush must be done */
1075 /* cannot fail at this point */
1077 /* Don't forget to invalidate previous TB info. */
1078 tb_invalidated_flag = 1;
1080 tc_ptr = code_gen_ptr;
1081 tb->tc_ptr = tc_ptr;
1082 tb->cs_base = cs_base;
1084 tb->cflags = cflags;
1085 cpu_gen_code(env, tb, &code_gen_size);
1086 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1087 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1089 /* check next page if needed */
1090 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1092 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1093 phys_page2 = get_page_addr_code(env, virt_page2);
1095 tb_link_page(tb, phys_pc, phys_page2);
1100 * Invalidate all TBs which intersect with the target physical address range
1101 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1102 * 'is_cpu_write_access' should be true if called from a real cpu write
1103 * access: the virtual CPU will exit the current TB if code is modified inside
1106 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1107 int is_cpu_write_access)
1109 while (start < end) {
1110 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1111 start &= TARGET_PAGE_MASK;
1112 start += TARGET_PAGE_SIZE;
1117 * Invalidate all TBs which intersect with the target physical address range
1118 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1119 * 'is_cpu_write_access' should be true if called from a real cpu write
1120 * access: the virtual CPU will exit the current TB if code is modified inside
1123 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1124 int is_cpu_write_access)
1126 TranslationBlock *tb, *tb_next, *saved_tb;
1127 CPUArchState *env = cpu_single_env;
1128 tb_page_addr_t tb_start, tb_end;
1131 #ifdef TARGET_HAS_PRECISE_SMC
1132 int current_tb_not_found = is_cpu_write_access;
1133 TranslationBlock *current_tb = NULL;
1134 int current_tb_modified = 0;
1135 target_ulong current_pc = 0;
1136 target_ulong current_cs_base = 0;
1137 int current_flags = 0;
1138 #endif /* TARGET_HAS_PRECISE_SMC */
1140 p = page_find(start >> TARGET_PAGE_BITS);
1144 if (!p->code_bitmap &&
1145 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1146 is_cpu_write_access) {
1147 /* build code bitmap */
1148 build_page_bitmap(p);
1151 /* we remove all the TBs in the range [start, end[ */
1152 /* XXX: see if in some cases it could be faster to invalidate all
1155 while (tb != NULL) {
1156 n = (uintptr_t)tb & 3;
1157 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1158 tb_next = tb->page_next[n];
1159 /* NOTE: this is subtle as a TB may span two physical pages */
1161 /* NOTE: tb_end may be after the end of the page, but
1162 it is not a problem */
1163 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1164 tb_end = tb_start + tb->size;
1166 tb_start = tb->page_addr[1];
1167 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1169 if (!(tb_end <= start || tb_start >= end)) {
1170 #ifdef TARGET_HAS_PRECISE_SMC
1171 if (current_tb_not_found) {
1172 current_tb_not_found = 0;
1174 if (env->mem_io_pc) {
1175 /* now we have a real cpu fault */
1176 current_tb = tb_find_pc(env->mem_io_pc);
1179 if (current_tb == tb &&
1180 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1181 /* If we are modifying the current TB, we must stop
1182 its execution. We could be more precise by checking
1183 that the modification is after the current PC, but it
1184 would require a specialized function to partially
1185 restore the CPU state */
1187 current_tb_modified = 1;
1188 cpu_restore_state(current_tb, env, env->mem_io_pc);
1189 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1192 #endif /* TARGET_HAS_PRECISE_SMC */
1193 /* we need to do that to handle the case where a signal
1194 occurs while doing tb_phys_invalidate() */
1197 saved_tb = env->current_tb;
1198 env->current_tb = NULL;
1200 tb_phys_invalidate(tb, -1);
1202 env->current_tb = saved_tb;
1203 if (env->interrupt_request && env->current_tb) {
1204 cpu_interrupt(env, env->interrupt_request);
1210 #if !defined(CONFIG_USER_ONLY)
1211 /* if no code remaining, no need to continue to use slow writes */
1213 invalidate_page_bitmap(p);
1214 if (is_cpu_write_access) {
1215 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1219 #ifdef TARGET_HAS_PRECISE_SMC
1220 if (current_tb_modified) {
1221 /* we generate a block containing just the instruction
1222 modifying the memory. It will ensure that it cannot modify
1224 env->current_tb = NULL;
1225 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1226 cpu_resume_from_signal(env, NULL);
1231 /* len must be <= 8 and start must be a multiple of len */
1232 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1239 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1240 cpu_single_env->mem_io_vaddr, len,
1241 cpu_single_env->eip,
1242 cpu_single_env->eip +
1243 (intptr_t)cpu_single_env->segs[R_CS].base);
1246 p = page_find(start >> TARGET_PAGE_BITS);
1250 if (p->code_bitmap) {
1251 offset = start & ~TARGET_PAGE_MASK;
1252 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1253 if (b & ((1 << len) - 1)) {
1258 tb_invalidate_phys_page_range(start, start + len, 1);
1262 #if !defined(CONFIG_SOFTMMU)
1263 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1264 uintptr_t pc, void *puc)
1266 TranslationBlock *tb;
1269 #ifdef TARGET_HAS_PRECISE_SMC
1270 TranslationBlock *current_tb = NULL;
1271 CPUArchState *env = cpu_single_env;
1272 int current_tb_modified = 0;
1273 target_ulong current_pc = 0;
1274 target_ulong current_cs_base = 0;
1275 int current_flags = 0;
1278 addr &= TARGET_PAGE_MASK;
1279 p = page_find(addr >> TARGET_PAGE_BITS);
1284 #ifdef TARGET_HAS_PRECISE_SMC
1285 if (tb && pc != 0) {
1286 current_tb = tb_find_pc(pc);
1289 while (tb != NULL) {
1290 n = (uintptr_t)tb & 3;
1291 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1292 #ifdef TARGET_HAS_PRECISE_SMC
1293 if (current_tb == tb &&
1294 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1295 /* If we are modifying the current TB, we must stop
1296 its execution. We could be more precise by checking
1297 that the modification is after the current PC, but it
1298 would require a specialized function to partially
1299 restore the CPU state */
1301 current_tb_modified = 1;
1302 cpu_restore_state(current_tb, env, pc);
1303 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1306 #endif /* TARGET_HAS_PRECISE_SMC */
1307 tb_phys_invalidate(tb, addr);
1308 tb = tb->page_next[n];
1311 #ifdef TARGET_HAS_PRECISE_SMC
1312 if (current_tb_modified) {
1313 /* we generate a block containing just the instruction
1314 modifying the memory. It will ensure that it cannot modify
1316 env->current_tb = NULL;
1317 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1318 cpu_resume_from_signal(env, puc);
1324 /* add the tb in the target page and protect it if necessary */
1325 static inline void tb_alloc_page(TranslationBlock *tb,
1326 unsigned int n, tb_page_addr_t page_addr)
1329 #ifndef CONFIG_USER_ONLY
1330 bool page_already_protected;
1333 tb->page_addr[n] = page_addr;
1334 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1335 tb->page_next[n] = p->first_tb;
1336 #ifndef CONFIG_USER_ONLY
1337 page_already_protected = p->first_tb != NULL;
1339 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1340 invalidate_page_bitmap(p);
1342 #if defined(TARGET_HAS_SMC) || 1
1344 #if defined(CONFIG_USER_ONLY)
1345 if (p->flags & PAGE_WRITE) {
1350 /* force the host page as non writable (writes will have a
1351 page fault + mprotect overhead) */
1352 page_addr &= qemu_host_page_mask;
1354 for (addr = page_addr; addr < page_addr + qemu_host_page_size;
1355 addr += TARGET_PAGE_SIZE) {
1357 p2 = page_find(addr >> TARGET_PAGE_BITS);
1362 p2->flags &= ~PAGE_WRITE;
1364 mprotect(g2h(page_addr), qemu_host_page_size,
1365 (prot & PAGE_BITS) & ~PAGE_WRITE);
1366 #ifdef DEBUG_TB_INVALIDATE
1367 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1372 /* if some code is already present, then the pages are already
1373 protected. So we handle the case where only the first TB is
1374 allocated in a physical page */
1375 if (!page_already_protected) {
1376 tlb_protect_code(page_addr);
1380 #endif /* TARGET_HAS_SMC */
1383 /* add a new TB and link it to the physical page tables. phys_page2 is
1384 (-1) to indicate that only one page contains the TB. */
1385 static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
1386 tb_page_addr_t phys_page2)
1389 TranslationBlock **ptb;
1391 /* Grab the mmap lock to stop another thread invalidating this TB
1392 before we are done. */
1394 /* add in the physical hash table */
1395 h = tb_phys_hash_func(phys_pc);
1396 ptb = &tb_phys_hash[h];
1397 tb->phys_hash_next = *ptb;
1400 /* add in the page list */
1401 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1402 if (phys_page2 != -1) {
1403 tb_alloc_page(tb, 1, phys_page2);
1405 tb->page_addr[1] = -1;
1408 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1409 tb->jmp_next[0] = NULL;
1410 tb->jmp_next[1] = NULL;
1412 /* init original jump addresses */
1413 if (tb->tb_next_offset[0] != 0xffff) {
1414 tb_reset_jump(tb, 0);
1416 if (tb->tb_next_offset[1] != 0xffff) {
1417 tb_reset_jump(tb, 1);
1420 #ifdef DEBUG_TB_CHECK
1426 #if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
1427 /* check whether the given addr is in TCG generated code buffer or not */
1428 bool is_tcg_gen_code(uintptr_t tc_ptr)
1430 /* This can be called during code generation, code_gen_buffer_max_size
1431 is used instead of code_gen_ptr for upper boundary checking */
1432 return (tc_ptr >= (uintptr_t)code_gen_buffer &&
1433 tc_ptr < (uintptr_t)(code_gen_buffer + code_gen_buffer_max_size));
1437 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1438 tb[1].tc_ptr. Return NULL if not found */
1439 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1441 int m_min, m_max, m;
1443 TranslationBlock *tb;
1448 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1449 tc_ptr >= (uintptr_t)code_gen_ptr) {
1452 /* binary search (cf Knuth) */
1455 while (m_min <= m_max) {
1456 m = (m_min + m_max) >> 1;
1458 v = (uintptr_t)tb->tc_ptr;
1461 } else if (tc_ptr < v) {
1470 static void tb_reset_jump_recursive(TranslationBlock *tb);
1472 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1474 TranslationBlock *tb1, *tb_next, **ptb;
1477 tb1 = tb->jmp_next[n];
1479 /* find head of list */
1481 n1 = (uintptr_t)tb1 & 3;
1482 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1486 tb1 = tb1->jmp_next[n1];
1488 /* we are now sure now that tb jumps to tb1 */
1491 /* remove tb from the jmp_first list */
1492 ptb = &tb_next->jmp_first;
1495 n1 = (uintptr_t)tb1 & 3;
1496 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1497 if (n1 == n && tb1 == tb) {
1500 ptb = &tb1->jmp_next[n1];
1502 *ptb = tb->jmp_next[n];
1503 tb->jmp_next[n] = NULL;
1505 /* suppress the jump to next tb in generated code */
1506 tb_reset_jump(tb, n);
1508 /* suppress jumps in the tb on which we could have jumped */
1509 tb_reset_jump_recursive(tb_next);
1513 static void tb_reset_jump_recursive(TranslationBlock *tb)
1515 tb_reset_jump_recursive2(tb, 0);
1516 tb_reset_jump_recursive2(tb, 1);
1519 #if defined(TARGET_HAS_ICE)
1520 #if defined(CONFIG_USER_ONLY)
1521 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1523 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1526 void tb_invalidate_phys_addr(hwaddr addr)
1528 ram_addr_t ram_addr;
1529 MemoryRegionSection *section;
1531 section = phys_page_find(address_space_memory.dispatch,
1532 addr >> TARGET_PAGE_BITS);
1533 if (!(memory_region_is_ram(section->mr)
1534 || (section->mr->rom_device && section->mr->readable))) {
1537 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1538 + memory_region_section_addr(section, addr);
1539 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1542 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1544 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1545 (pc & ~TARGET_PAGE_MASK));
1548 #endif /* TARGET_HAS_ICE */
1550 #if defined(CONFIG_USER_ONLY)
1551 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1556 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1557 int flags, CPUWatchpoint **watchpoint)
1562 /* Add a watchpoint. */
1563 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1564 int flags, CPUWatchpoint **watchpoint)
1566 target_ulong len_mask = ~(len - 1);
1569 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1570 if ((len & (len - 1)) || (addr & ~len_mask) ||
1571 len == 0 || len > TARGET_PAGE_SIZE) {
1572 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1573 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1576 wp = g_malloc(sizeof(*wp));
1579 wp->len_mask = len_mask;
1582 /* keep all GDB-injected watchpoints in front */
1584 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1586 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1588 tlb_flush_page(env, addr);
1595 /* Remove a specific watchpoint. */
1596 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1599 target_ulong len_mask = ~(len - 1);
1602 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1603 if (addr == wp->vaddr && len_mask == wp->len_mask
1604 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1605 cpu_watchpoint_remove_by_ref(env, wp);
1612 /* Remove a specific watchpoint by reference. */
1613 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1615 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1617 tlb_flush_page(env, watchpoint->vaddr);
1622 /* Remove all matching watchpoints. */
1623 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1625 CPUWatchpoint *wp, *next;
1627 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1628 if (wp->flags & mask)
1629 cpu_watchpoint_remove_by_ref(env, wp);
1634 /* Add a breakpoint. */
1635 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1636 CPUBreakpoint **breakpoint)
1638 #if defined(TARGET_HAS_ICE)
1641 bp = g_malloc(sizeof(*bp));
1646 /* keep all GDB-injected breakpoints in front */
1648 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1650 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1652 breakpoint_invalidate(env, pc);
1662 /* Remove a specific breakpoint. */
1663 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1665 #if defined(TARGET_HAS_ICE)
1668 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1669 if (bp->pc == pc && bp->flags == flags) {
1670 cpu_breakpoint_remove_by_ref(env, bp);
1680 /* Remove a specific breakpoint by reference. */
1681 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1683 #if defined(TARGET_HAS_ICE)
1684 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1686 breakpoint_invalidate(env, breakpoint->pc);
1692 /* Remove all matching breakpoints. */
1693 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1695 #if defined(TARGET_HAS_ICE)
1696 CPUBreakpoint *bp, *next;
1698 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1699 if (bp->flags & mask)
1700 cpu_breakpoint_remove_by_ref(env, bp);
1705 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1706 CPU loop after each instruction */
1707 void cpu_single_step(CPUArchState *env, int enabled)
1709 #if defined(TARGET_HAS_ICE)
1710 if (env->singlestep_enabled != enabled) {
1711 env->singlestep_enabled = enabled;
1713 kvm_update_guest_debug(env, 0);
1715 /* must flush all the translated code to avoid inconsistencies */
1716 /* XXX: only flush what is necessary */
1723 static void cpu_unlink_tb(CPUArchState *env)
1725 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1726 problem and hope the cpu will stop of its own accord. For userspace
1727 emulation this often isn't actually as bad as it sounds. Often
1728 signals are used primarily to interrupt blocking syscalls. */
1729 TranslationBlock *tb;
1730 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1732 spin_lock(&interrupt_lock);
1733 tb = env->current_tb;
1734 /* if the cpu is currently executing code, we must unlink it and
1735 all the potentially executing TB */
1737 env->current_tb = NULL;
1738 tb_reset_jump_recursive(tb);
1740 spin_unlock(&interrupt_lock);
1743 #ifndef CONFIG_USER_ONLY
1744 /* mask must never be zero, except for A20 change call */
1745 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1747 CPUState *cpu = ENV_GET_CPU(env);
1750 old_mask = env->interrupt_request;
1751 env->interrupt_request |= mask;
1754 * If called from iothread context, wake the target cpu in
1757 if (!qemu_cpu_is_self(cpu)) {
1763 env->icount_decr.u16.high = 0xffff;
1765 && (mask & ~old_mask) != 0) {
1766 cpu_abort(env, "Raised interrupt while not in I/O function");
1773 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1775 #else /* CONFIG_USER_ONLY */
1777 void cpu_interrupt(CPUArchState *env, int mask)
1779 env->interrupt_request |= mask;
1782 #endif /* CONFIG_USER_ONLY */
1784 void cpu_reset_interrupt(CPUArchState *env, int mask)
1786 env->interrupt_request &= ~mask;
1789 void cpu_exit(CPUArchState *env)
1791 env->exit_request = 1;
1795 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1802 fprintf(stderr, "qemu: fatal: ");
1803 vfprintf(stderr, fmt, ap);
1804 fprintf(stderr, "\n");
1805 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1806 if (qemu_log_enabled()) {
1807 qemu_log("qemu: fatal: ");
1808 qemu_log_vprintf(fmt, ap2);
1810 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1816 #if defined(CONFIG_USER_ONLY)
1818 struct sigaction act;
1819 sigfillset(&act.sa_mask);
1820 act.sa_handler = SIG_DFL;
1821 sigaction(SIGABRT, &act, NULL);
1827 CPUArchState *cpu_copy(CPUArchState *env)
1829 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1830 CPUArchState *next_cpu = new_env->next_cpu;
1831 int cpu_index = new_env->cpu_index;
1832 #if defined(TARGET_HAS_ICE)
1837 memcpy(new_env, env, sizeof(CPUArchState));
1839 /* Preserve chaining and index. */
1840 new_env->next_cpu = next_cpu;
1841 new_env->cpu_index = cpu_index;
1843 /* Clone all break/watchpoints.
1844 Note: Once we support ptrace with hw-debug register access, make sure
1845 BP_CPU break/watchpoints are handled correctly on clone. */
1846 QTAILQ_INIT(&env->breakpoints);
1847 QTAILQ_INIT(&env->watchpoints);
1848 #if defined(TARGET_HAS_ICE)
1849 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1850 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1852 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1853 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1861 #if !defined(CONFIG_USER_ONLY)
1862 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1866 /* Discard jump cache entries for any tb which might potentially
1867 overlap the flushed page. */
1868 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1869 memset (&env->tb_jmp_cache[i], 0,
1870 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1872 i = tb_jmp_cache_hash_page(addr);
1873 memset (&env->tb_jmp_cache[i], 0,
1874 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1877 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1882 /* we modify the TLB cache so that the dirty bit will be set again
1883 when accessing the range */
1884 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1885 /* Check that we don't span multiple blocks - this breaks the
1886 address comparisons below. */
1887 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1888 != (end - 1) - start) {
1891 cpu_tlb_reset_dirty_all(start1, length);
1895 /* Note: start and end must be within the same ram block. */
1896 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1901 start &= TARGET_PAGE_MASK;
1902 end = TARGET_PAGE_ALIGN(end);
1904 length = end - start;
1907 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1909 if (tcg_enabled()) {
1910 tlb_reset_dirty_range_all(start, end, length);
1914 static int cpu_physical_memory_set_dirty_tracking(int enable)
1917 in_migration = enable;
1921 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1922 MemoryRegionSection *section,
1926 target_ulong *address)
1931 if (memory_region_is_ram(section->mr)) {
1933 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1934 + memory_region_section_addr(section, paddr);
1935 if (!section->readonly) {
1936 iotlb |= phys_section_notdirty;
1938 iotlb |= phys_section_rom;
1941 /* IO handlers are currently passed a physical address.
1942 It would be nice to pass an offset from the base address
1943 of that region. This would avoid having to special case RAM,
1944 and avoid full address decoding in every device.
1945 We can't use the high bits of pd for this because
1946 IO_MEM_ROMD uses these as a ram address. */
1947 iotlb = section - phys_sections;
1948 iotlb += memory_region_section_addr(section, paddr);
1951 /* Make accesses to pages with watchpoints go via the
1952 watchpoint trap routines. */
1953 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1954 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1955 /* Avoid trapping reads of pages with a write breakpoint. */
1956 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1957 iotlb = phys_section_watch + paddr;
1958 *address |= TLB_MMIO;
1969 * Walks guest process memory "regions" one by one
1970 * and calls callback function 'fn' for each region.
1972 struct walk_memory_regions_data {
1973 walk_memory_regions_fn fn;
1979 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1980 abi_ulong end, int new_prot)
1982 if (data->start != -1ul) {
1983 int rc = data->fn(data->priv, data->start, end, data->prot);
1989 data->start = (new_prot ? end : -1ul);
1990 data->prot = new_prot;
1995 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1996 abi_ulong base, int level, void **lp)
2002 return walk_memory_regions_end(data, base, 0);
2008 for (i = 0; i < L2_SIZE; ++i) {
2009 int prot = pd[i].flags;
2011 pa = base | (i << TARGET_PAGE_BITS);
2012 if (prot != data->prot) {
2013 rc = walk_memory_regions_end(data, pa, prot);
2022 for (i = 0; i < L2_SIZE; ++i) {
2023 pa = base | ((abi_ulong)i <<
2024 (TARGET_PAGE_BITS + L2_BITS * level));
2025 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
2035 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
2037 struct walk_memory_regions_data data;
2045 for (i = 0; i < V_L1_SIZE; i++) {
2046 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
2047 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2054 return walk_memory_regions_end(&data, 0, 0);
2057 static int dump_region(void *priv, abi_ulong start,
2058 abi_ulong end, unsigned long prot)
2060 FILE *f = (FILE *)priv;
2062 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2063 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2064 start, end, end - start,
2065 ((prot & PAGE_READ) ? 'r' : '-'),
2066 ((prot & PAGE_WRITE) ? 'w' : '-'),
2067 ((prot & PAGE_EXEC) ? 'x' : '-'));
2072 /* dump memory mappings */
2073 void page_dump(FILE *f)
2075 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2076 "start", "end", "size", "prot");
2077 walk_memory_regions(f, dump_region);
2080 int page_get_flags(target_ulong address)
2084 p = page_find(address >> TARGET_PAGE_BITS);
2091 /* Modify the flags of a page and invalidate the code if necessary.
2092 The flag PAGE_WRITE_ORG is positioned automatically depending
2093 on PAGE_WRITE. The mmap_lock should already be held. */
2094 void page_set_flags(target_ulong start, target_ulong end, int flags)
2096 target_ulong addr, len;
2098 /* This function should never be called with addresses outside the
2099 guest address space. If this assert fires, it probably indicates
2100 a missing call to h2g_valid. */
2101 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2102 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2104 assert(start < end);
2106 start = start & TARGET_PAGE_MASK;
2107 end = TARGET_PAGE_ALIGN(end);
2109 if (flags & PAGE_WRITE) {
2110 flags |= PAGE_WRITE_ORG;
2113 for (addr = start, len = end - start;
2115 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2116 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2118 /* If the write protection bit is set, then we invalidate
2120 if (!(p->flags & PAGE_WRITE) &&
2121 (flags & PAGE_WRITE) &&
2123 tb_invalidate_phys_page(addr, 0, NULL);
2129 int page_check_range(target_ulong start, target_ulong len, int flags)
2135 /* This function should never be called with addresses outside the
2136 guest address space. If this assert fires, it probably indicates
2137 a missing call to h2g_valid. */
2138 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2139 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2145 if (start + len - 1 < start) {
2146 /* We've wrapped around. */
2150 /* must do before we loose bits in the next step */
2151 end = TARGET_PAGE_ALIGN(start + len);
2152 start = start & TARGET_PAGE_MASK;
2154 for (addr = start, len = end - start;
2156 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2157 p = page_find(addr >> TARGET_PAGE_BITS);
2161 if (!(p->flags & PAGE_VALID)) {
2165 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
2168 if (flags & PAGE_WRITE) {
2169 if (!(p->flags & PAGE_WRITE_ORG)) {
2172 /* unprotect the page if it was put read-only because it
2173 contains translated code */
2174 if (!(p->flags & PAGE_WRITE)) {
2175 if (!page_unprotect(addr, 0, NULL)) {
2185 /* called from signal handler: invalidate the code and unprotect the
2186 page. Return TRUE if the fault was successfully handled. */
2187 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2191 target_ulong host_start, host_end, addr;
2193 /* Technically this isn't safe inside a signal handler. However we
2194 know this only ever happens in a synchronous SEGV handler, so in
2195 practice it seems to be ok. */
2198 p = page_find(address >> TARGET_PAGE_BITS);
2204 /* if the page was really writable, then we change its
2205 protection back to writable */
2206 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2207 host_start = address & qemu_host_page_mask;
2208 host_end = host_start + qemu_host_page_size;
2211 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2212 p = page_find(addr >> TARGET_PAGE_BITS);
2213 p->flags |= PAGE_WRITE;
2216 /* and since the content will be modified, we must invalidate
2217 the corresponding translated code. */
2218 tb_invalidate_phys_page(addr, pc, puc);
2219 #ifdef DEBUG_TB_CHECK
2220 tb_invalidate_check(addr);
2223 mprotect((void *)g2h(host_start), qemu_host_page_size,
2232 #endif /* defined(CONFIG_USER_ONLY) */
2234 #if !defined(CONFIG_USER_ONLY)
2236 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2237 typedef struct subpage_t {
2240 uint16_t sub_section[TARGET_PAGE_SIZE];
2243 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2245 static subpage_t *subpage_init(hwaddr base);
2246 static void destroy_page_desc(uint16_t section_index)
2248 MemoryRegionSection *section = &phys_sections[section_index];
2249 MemoryRegion *mr = section->mr;
2252 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2253 memory_region_destroy(&subpage->iomem);
2258 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2263 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2267 p = phys_map_nodes[lp->ptr];
2268 for (i = 0; i < L2_SIZE; ++i) {
2269 if (!p[i].is_leaf) {
2270 destroy_l2_mapping(&p[i], level - 1);
2272 destroy_page_desc(p[i].ptr);
2276 lp->ptr = PHYS_MAP_NODE_NIL;
2279 static void destroy_all_mappings(AddressSpaceDispatch *d)
2281 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2282 phys_map_nodes_reset();
2285 static uint16_t phys_section_add(MemoryRegionSection *section)
2287 if (phys_sections_nb == phys_sections_nb_alloc) {
2288 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2289 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2290 phys_sections_nb_alloc);
2292 phys_sections[phys_sections_nb] = *section;
2293 return phys_sections_nb++;
2296 static void phys_sections_clear(void)
2298 phys_sections_nb = 0;
2301 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2304 hwaddr base = section->offset_within_address_space
2306 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2307 MemoryRegionSection subsection = {
2308 .offset_within_address_space = base,
2309 .size = TARGET_PAGE_SIZE,
2313 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2315 if (!(existing->mr->subpage)) {
2316 subpage = subpage_init(base);
2317 subsection.mr = &subpage->iomem;
2318 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2319 phys_section_add(&subsection));
2321 subpage = container_of(existing->mr, subpage_t, iomem);
2323 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2324 end = start + section->size - 1;
2325 subpage_register(subpage, start, end, phys_section_add(section));
2329 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2331 hwaddr start_addr = section->offset_within_address_space;
2332 ram_addr_t size = section->size;
2334 uint16_t section_index = phys_section_add(section);
2339 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2343 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2345 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2346 MemoryRegionSection now = *section, remain = *section;
2348 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2349 || (now.size < TARGET_PAGE_SIZE)) {
2350 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2351 - now.offset_within_address_space,
2353 register_subpage(d, &now);
2354 remain.size -= now.size;
2355 remain.offset_within_address_space += now.size;
2356 remain.offset_within_region += now.size;
2358 while (remain.size >= TARGET_PAGE_SIZE) {
2360 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2361 now.size = TARGET_PAGE_SIZE;
2362 register_subpage(d, &now);
2364 now.size &= TARGET_PAGE_MASK;
2365 register_multipage(d, &now);
2367 remain.size -= now.size;
2368 remain.offset_within_address_space += now.size;
2369 remain.offset_within_region += now.size;
2373 register_subpage(d, &now);
2377 void qemu_flush_coalesced_mmio_buffer(void)
2380 kvm_flush_coalesced_mmio_buffer();
2383 #if defined(__linux__) && !defined(TARGET_S390X)
2385 #include <sys/vfs.h>
2387 #define HUGETLBFS_MAGIC 0x958458f6
2389 static long gethugepagesize(const char *path)
2395 ret = statfs(path, &fs);
2396 } while (ret != 0 && errno == EINTR);
2403 if (fs.f_type != HUGETLBFS_MAGIC)
2404 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2409 static void *file_ram_alloc(RAMBlock *block,
2419 unsigned long hpagesize;
2421 hpagesize = gethugepagesize(path);
2426 if (memory < hpagesize) {
2430 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2431 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2435 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2439 fd = mkstemp(filename);
2441 perror("unable to create backing store for hugepages");
2448 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2451 * ftruncate is not supported by hugetlbfs in older
2452 * hosts, so don't bother bailing out on errors.
2453 * If anything goes wrong with it under other filesystems,
2456 if (ftruncate(fd, memory))
2457 perror("ftruncate");
2460 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2461 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2462 * to sidestep this quirk.
2464 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2465 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2467 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2469 if (area == MAP_FAILED) {
2470 perror("file_ram_alloc: can't mmap RAM pages");
2479 static ram_addr_t find_ram_offset(ram_addr_t size)
2481 RAMBlock *block, *next_block;
2482 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2484 if (QLIST_EMPTY(&ram_list.blocks))
2487 QLIST_FOREACH(block, &ram_list.blocks, next) {
2488 ram_addr_t end, next = RAM_ADDR_MAX;
2490 end = block->offset + block->length;
2492 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2493 if (next_block->offset >= end) {
2494 next = MIN(next, next_block->offset);
2497 if (next - end >= size && next - end < mingap) {
2499 mingap = next - end;
2503 if (offset == RAM_ADDR_MAX) {
2504 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2512 ram_addr_t last_ram_offset(void)
2515 ram_addr_t last = 0;
2517 QLIST_FOREACH(block, &ram_list.blocks, next)
2518 last = MAX(last, block->offset + block->length);
2523 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2526 QemuOpts *machine_opts;
2528 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2529 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2531 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2532 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2534 perror("qemu_madvise");
2535 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2536 "but dump_guest_core=off specified\n");
2541 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2543 RAMBlock *new_block, *block;
2546 QLIST_FOREACH(block, &ram_list.blocks, next) {
2547 if (block->offset == addr) {
2553 assert(!new_block->idstr[0]);
2556 char *id = qdev_get_dev_path(dev);
2558 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2562 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2564 QLIST_FOREACH(block, &ram_list.blocks, next) {
2565 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2566 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2573 static int memory_try_enable_merging(void *addr, size_t len)
2577 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2578 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2579 /* disabled by the user */
2583 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2586 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2589 RAMBlock *new_block;
2591 size = TARGET_PAGE_ALIGN(size);
2592 new_block = g_malloc0(sizeof(*new_block));
2595 new_block->offset = find_ram_offset(size);
2597 new_block->host = host;
2598 new_block->flags |= RAM_PREALLOC_MASK;
2601 #if defined (__linux__) && !defined(TARGET_S390X)
2602 new_block->host = file_ram_alloc(new_block, size, mem_path);
2603 if (!new_block->host) {
2604 new_block->host = qemu_vmalloc(size);
2605 memory_try_enable_merging(new_block->host, size);
2608 fprintf(stderr, "-mem-path option unsupported\n");
2612 if (xen_enabled()) {
2613 xen_ram_alloc(new_block->offset, size, mr);
2614 } else if (kvm_enabled()) {
2615 /* some s390/kvm configurations have special constraints */
2616 new_block->host = kvm_vmalloc(size);
2618 new_block->host = qemu_vmalloc(size);
2620 memory_try_enable_merging(new_block->host, size);
2623 new_block->length = size;
2625 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2627 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2628 last_ram_offset() >> TARGET_PAGE_BITS);
2629 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2630 0, size >> TARGET_PAGE_BITS);
2631 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2633 qemu_ram_setup_dump(new_block->host, size);
2634 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2637 kvm_setup_guest_memory(new_block->host, size);
2639 return new_block->offset;
2642 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2644 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2647 void qemu_ram_free_from_ptr(ram_addr_t addr)
2651 QLIST_FOREACH(block, &ram_list.blocks, next) {
2652 if (addr == block->offset) {
2653 QLIST_REMOVE(block, next);
2660 void qemu_ram_free(ram_addr_t addr)
2664 QLIST_FOREACH(block, &ram_list.blocks, next) {
2665 if (addr == block->offset) {
2666 QLIST_REMOVE(block, next);
2667 if (block->flags & RAM_PREALLOC_MASK) {
2669 } else if (mem_path) {
2670 #if defined (__linux__) && !defined(TARGET_S390X)
2672 munmap(block->host, block->length);
2675 qemu_vfree(block->host);
2681 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2682 munmap(block->host, block->length);
2684 if (xen_enabled()) {
2685 xen_invalidate_map_cache_entry(block->host);
2687 qemu_vfree(block->host);
2699 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2706 QLIST_FOREACH(block, &ram_list.blocks, next) {
2707 offset = addr - block->offset;
2708 if (offset < block->length) {
2709 vaddr = block->host + offset;
2710 if (block->flags & RAM_PREALLOC_MASK) {
2714 munmap(vaddr, length);
2716 #if defined(__linux__) && !defined(TARGET_S390X)
2719 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2722 flags |= MAP_PRIVATE;
2724 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2725 flags, block->fd, offset);
2727 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2728 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2735 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2736 flags |= MAP_SHARED | MAP_ANONYMOUS;
2737 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2740 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2741 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2745 if (area != vaddr) {
2746 fprintf(stderr, "Could not remap addr: "
2747 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2751 memory_try_enable_merging(vaddr, length);
2752 qemu_ram_setup_dump(vaddr, length);
2758 #endif /* !_WIN32 */
2760 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2761 With the exception of the softmmu code in this file, this should
2762 only be used for local memory (e.g. video ram) that the device owns,
2763 and knows it isn't going to access beyond the end of the block.
2765 It should not be used for general purpose DMA.
2766 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2768 void *qemu_get_ram_ptr(ram_addr_t addr)
2772 QLIST_FOREACH(block, &ram_list.blocks, next) {
2773 if (addr - block->offset < block->length) {
2774 /* Move this entry to to start of the list. */
2775 if (block != QLIST_FIRST(&ram_list.blocks)) {
2776 QLIST_REMOVE(block, next);
2777 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2779 if (xen_enabled()) {
2780 /* We need to check if the requested address is in the RAM
2781 * because we don't want to map the entire memory in QEMU.
2782 * In that case just map until the end of the page.
2784 if (block->offset == 0) {
2785 return xen_map_cache(addr, 0, 0);
2786 } else if (block->host == NULL) {
2788 xen_map_cache(block->offset, block->length, 1);
2791 return block->host + (addr - block->offset);
2795 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2801 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2802 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2804 static void *qemu_safe_ram_ptr(ram_addr_t addr)
2808 QLIST_FOREACH(block, &ram_list.blocks, next) {
2809 if (addr - block->offset < block->length) {
2810 if (xen_enabled()) {
2811 /* We need to check if the requested address is in the RAM
2812 * because we don't want to map the entire memory in QEMU.
2813 * In that case just map until the end of the page.
2815 if (block->offset == 0) {
2816 return xen_map_cache(addr, 0, 0);
2817 } else if (block->host == NULL) {
2819 xen_map_cache(block->offset, block->length, 1);
2822 return block->host + (addr - block->offset);
2826 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2832 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2833 * but takes a size argument */
2834 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2839 if (xen_enabled()) {
2840 return xen_map_cache(addr, *size, 1);
2844 QLIST_FOREACH(block, &ram_list.blocks, next) {
2845 if (addr - block->offset < block->length) {
2846 if (addr - block->offset + *size > block->length)
2847 *size = block->length - addr + block->offset;
2848 return block->host + (addr - block->offset);
2852 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2857 void qemu_put_ram_ptr(void *addr)
2859 trace_qemu_put_ram_ptr(addr);
2862 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2865 uint8_t *host = ptr;
2867 if (xen_enabled()) {
2868 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2872 QLIST_FOREACH(block, &ram_list.blocks, next) {
2873 /* This case append when the block is not mapped. */
2874 if (block->host == NULL) {
2877 if (host - block->host < block->length) {
2878 *ram_addr = block->offset + (host - block->host);
2886 /* Some of the softmmu routines need to translate from a host pointer
2887 (typically a TLB entry) back to a ram offset. */
2888 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2890 ram_addr_t ram_addr;
2892 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2893 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2899 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2902 #ifdef DEBUG_UNASSIGNED
2903 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2905 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2906 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2911 static void unassigned_mem_write(void *opaque, hwaddr addr,
2912 uint64_t val, unsigned size)
2914 #ifdef DEBUG_UNASSIGNED
2915 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2917 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2918 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2922 static const MemoryRegionOps unassigned_mem_ops = {
2923 .read = unassigned_mem_read,
2924 .write = unassigned_mem_write,
2925 .endianness = DEVICE_NATIVE_ENDIAN,
2928 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2934 static void error_mem_write(void *opaque, hwaddr addr,
2935 uint64_t value, unsigned size)
2940 static const MemoryRegionOps error_mem_ops = {
2941 .read = error_mem_read,
2942 .write = error_mem_write,
2943 .endianness = DEVICE_NATIVE_ENDIAN,
2946 static const MemoryRegionOps rom_mem_ops = {
2947 .read = error_mem_read,
2948 .write = unassigned_mem_write,
2949 .endianness = DEVICE_NATIVE_ENDIAN,
2952 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2953 uint64_t val, unsigned size)
2956 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2957 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2958 #if !defined(CONFIG_USER_ONLY)
2959 tb_invalidate_phys_page_fast(ram_addr, size);
2960 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2965 stb_p(qemu_get_ram_ptr(ram_addr), val);
2968 stw_p(qemu_get_ram_ptr(ram_addr), val);
2971 stl_p(qemu_get_ram_ptr(ram_addr), val);
2976 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2977 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2978 /* we remove the notdirty callback only if the code has been
2980 if (dirty_flags == 0xff)
2981 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2984 static const MemoryRegionOps notdirty_mem_ops = {
2985 .read = error_mem_read,
2986 .write = notdirty_mem_write,
2987 .endianness = DEVICE_NATIVE_ENDIAN,
2990 /* Generate a debug exception if a watchpoint has been hit. */
2991 static void check_watchpoint(int offset, int len_mask, int flags)
2993 CPUArchState *env = cpu_single_env;
2994 target_ulong pc, cs_base;
2995 TranslationBlock *tb;
3000 if (env->watchpoint_hit) {
3001 /* We re-entered the check after replacing the TB. Now raise
3002 * the debug interrupt so that is will trigger after the
3003 * current instruction. */
3004 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
3007 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
3008 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
3009 if ((vaddr == (wp->vaddr & len_mask) ||
3010 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
3011 wp->flags |= BP_WATCHPOINT_HIT;
3012 if (!env->watchpoint_hit) {
3013 env->watchpoint_hit = wp;
3014 tb = tb_find_pc(env->mem_io_pc);
3016 cpu_abort(env, "check_watchpoint: could not find TB for "
3017 "pc=%p", (void *)env->mem_io_pc);
3019 cpu_restore_state(tb, env, env->mem_io_pc);
3020 tb_phys_invalidate(tb, -1);
3021 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
3022 env->exception_index = EXCP_DEBUG;
3025 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
3026 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
3027 cpu_resume_from_signal(env, NULL);
3031 wp->flags &= ~BP_WATCHPOINT_HIT;
3036 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
3037 so these check for a hit then pass through to the normal out-of-line
3039 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
3042 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
3044 case 1: return ldub_phys(addr);
3045 case 2: return lduw_phys(addr);
3046 case 4: return ldl_phys(addr);
3051 static void watch_mem_write(void *opaque, hwaddr addr,
3052 uint64_t val, unsigned size)
3054 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3057 stb_phys(addr, val);
3060 stw_phys(addr, val);
3063 stl_phys(addr, val);
3069 static const MemoryRegionOps watch_mem_ops = {
3070 .read = watch_mem_read,
3071 .write = watch_mem_write,
3072 .endianness = DEVICE_NATIVE_ENDIAN,
3075 static uint64_t subpage_read(void *opaque, hwaddr addr,
3078 subpage_t *mmio = opaque;
3079 unsigned int idx = SUBPAGE_IDX(addr);
3080 MemoryRegionSection *section;
3081 #if defined(DEBUG_SUBPAGE)
3082 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3083 mmio, len, addr, idx);
3086 section = &phys_sections[mmio->sub_section[idx]];
3088 addr -= section->offset_within_address_space;
3089 addr += section->offset_within_region;
3090 return io_mem_read(section->mr, addr, len);
3093 static void subpage_write(void *opaque, hwaddr addr,
3094 uint64_t value, unsigned len)
3096 subpage_t *mmio = opaque;
3097 unsigned int idx = SUBPAGE_IDX(addr);
3098 MemoryRegionSection *section;
3099 #if defined(DEBUG_SUBPAGE)
3100 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3101 " idx %d value %"PRIx64"\n",
3102 __func__, mmio, len, addr, idx, value);
3105 section = &phys_sections[mmio->sub_section[idx]];
3107 addr -= section->offset_within_address_space;
3108 addr += section->offset_within_region;
3109 io_mem_write(section->mr, addr, value, len);
3112 static const MemoryRegionOps subpage_ops = {
3113 .read = subpage_read,
3114 .write = subpage_write,
3115 .endianness = DEVICE_NATIVE_ENDIAN,
3118 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3121 ram_addr_t raddr = addr;
3122 void *ptr = qemu_get_ram_ptr(raddr);
3124 case 1: return ldub_p(ptr);
3125 case 2: return lduw_p(ptr);
3126 case 4: return ldl_p(ptr);
3131 static void subpage_ram_write(void *opaque, hwaddr addr,
3132 uint64_t value, unsigned size)
3134 ram_addr_t raddr = addr;
3135 void *ptr = qemu_get_ram_ptr(raddr);
3137 case 1: return stb_p(ptr, value);
3138 case 2: return stw_p(ptr, value);
3139 case 4: return stl_p(ptr, value);
3144 static const MemoryRegionOps subpage_ram_ops = {
3145 .read = subpage_ram_read,
3146 .write = subpage_ram_write,
3147 .endianness = DEVICE_NATIVE_ENDIAN,
3150 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3155 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3157 idx = SUBPAGE_IDX(start);
3158 eidx = SUBPAGE_IDX(end);
3159 #if defined(DEBUG_SUBPAGE)
3160 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3161 mmio, start, end, idx, eidx, memory);
3163 if (memory_region_is_ram(phys_sections[section].mr)) {
3164 MemoryRegionSection new_section = phys_sections[section];
3165 new_section.mr = &io_mem_subpage_ram;
3166 section = phys_section_add(&new_section);
3168 for (; idx <= eidx; idx++) {
3169 mmio->sub_section[idx] = section;
3175 static subpage_t *subpage_init(hwaddr base)
3179 mmio = g_malloc0(sizeof(subpage_t));
3182 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3183 "subpage", TARGET_PAGE_SIZE);
3184 mmio->iomem.subpage = true;
3185 #if defined(DEBUG_SUBPAGE)
3186 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3187 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3189 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3194 static uint16_t dummy_section(MemoryRegion *mr)
3196 MemoryRegionSection section = {
3198 .offset_within_address_space = 0,
3199 .offset_within_region = 0,
3203 return phys_section_add(§ion);
3206 MemoryRegion *iotlb_to_region(hwaddr index)
3208 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3211 static void io_mem_init(void)
3213 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3214 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3215 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3216 "unassigned", UINT64_MAX);
3217 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
3218 "notdirty", UINT64_MAX);
3219 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3220 "subpage-ram", UINT64_MAX);
3221 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3222 "watch", UINT64_MAX);
3225 static void mem_begin(MemoryListener *listener)
3227 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3229 destroy_all_mappings(d);
3230 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3233 static void core_begin(MemoryListener *listener)
3235 phys_sections_clear();
3236 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3237 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3238 phys_section_rom = dummy_section(&io_mem_rom);
3239 phys_section_watch = dummy_section(&io_mem_watch);
3242 static void tcg_commit(MemoryListener *listener)
3246 /* since each CPU stores ram addresses in its TLB cache, we must
3247 reset the modified entries */
3249 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3254 static void core_log_global_start(MemoryListener *listener)
3256 cpu_physical_memory_set_dirty_tracking(1);
3259 static void core_log_global_stop(MemoryListener *listener)
3261 cpu_physical_memory_set_dirty_tracking(0);
3264 static void io_region_add(MemoryListener *listener,
3265 MemoryRegionSection *section)
3267 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3269 mrio->mr = section->mr;
3270 mrio->offset = section->offset_within_region;
3271 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3272 section->offset_within_address_space, section->size);
3273 ioport_register(&mrio->iorange);
3276 static void io_region_del(MemoryListener *listener,
3277 MemoryRegionSection *section)
3279 isa_unassign_ioport(section->offset_within_address_space, section->size);
3282 static MemoryListener core_memory_listener = {
3283 .begin = core_begin,
3284 .log_global_start = core_log_global_start,
3285 .log_global_stop = core_log_global_stop,
3289 static MemoryListener io_memory_listener = {
3290 .region_add = io_region_add,
3291 .region_del = io_region_del,
3295 static MemoryListener tcg_memory_listener = {
3296 .commit = tcg_commit,
3299 void address_space_init_dispatch(AddressSpace *as)
3301 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3303 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3304 d->listener = (MemoryListener) {
3306 .region_add = mem_add,
3307 .region_nop = mem_add,
3311 memory_listener_register(&d->listener, as);
3314 void address_space_destroy_dispatch(AddressSpace *as)
3316 AddressSpaceDispatch *d = as->dispatch;
3318 memory_listener_unregister(&d->listener);
3319 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3321 as->dispatch = NULL;
3324 static void memory_map_init(void)
3326 system_memory = g_malloc(sizeof(*system_memory));
3327 memory_region_init(system_memory, "system", INT64_MAX);
3328 address_space_init(&address_space_memory, system_memory);
3329 address_space_memory.name = "memory";
3331 system_io = g_malloc(sizeof(*system_io));
3332 memory_region_init(system_io, "io", 65536);
3333 address_space_init(&address_space_io, system_io);
3334 address_space_io.name = "I/O";
3336 memory_listener_register(&core_memory_listener, &address_space_memory);
3337 memory_listener_register(&io_memory_listener, &address_space_io);
3338 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3340 dma_context_init(&dma_context_memory, &address_space_memory,
3344 MemoryRegion *get_system_memory(void)
3346 return system_memory;
3349 MemoryRegion *get_system_io(void)
3354 #endif /* !defined(CONFIG_USER_ONLY) */
3356 /* physical memory access (slow version, mainly for debug) */
3357 #if defined(CONFIG_USER_ONLY)
3358 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3359 uint8_t *buf, int len, int is_write)
3366 page = addr & TARGET_PAGE_MASK;
3367 l = (page + TARGET_PAGE_SIZE) - addr;
3370 flags = page_get_flags(page);
3371 if (!(flags & PAGE_VALID))
3374 if (!(flags & PAGE_WRITE))
3376 /* XXX: this code should not depend on lock_user */
3377 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3380 unlock_user(p, addr, l);
3382 if (!(flags & PAGE_READ))
3384 /* XXX: this code should not depend on lock_user */
3385 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3388 unlock_user(p, addr, 0);
3399 static void invalidate_and_set_dirty(hwaddr addr,
3402 if (!cpu_physical_memory_is_dirty(addr)) {
3403 /* invalidate code */
3404 tb_invalidate_phys_page_range(addr, addr + length, 0);
3406 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3408 xen_modified_memory(addr, length);
3411 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3412 int len, bool is_write)
3414 AddressSpaceDispatch *d = as->dispatch;
3419 MemoryRegionSection *section;
3422 page = addr & TARGET_PAGE_MASK;
3423 l = (page + TARGET_PAGE_SIZE) - addr;
3426 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3429 if (!memory_region_is_ram(section->mr)) {
3431 addr1 = memory_region_section_addr(section, addr);
3432 /* XXX: could force cpu_single_env to NULL to avoid
3434 if (l >= 4 && ((addr1 & 3) == 0)) {
3435 /* 32 bit write access */
3437 io_mem_write(section->mr, addr1, val, 4);
3439 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3440 /* 16 bit write access */
3442 io_mem_write(section->mr, addr1, val, 2);
3445 /* 8 bit write access */
3447 io_mem_write(section->mr, addr1, val, 1);
3450 } else if (!section->readonly) {
3452 addr1 = memory_region_get_ram_addr(section->mr)
3453 + memory_region_section_addr(section, addr);
3455 ptr = qemu_get_ram_ptr(addr1);
3456 memcpy(ptr, buf, l);
3457 invalidate_and_set_dirty(addr1, l);
3458 qemu_put_ram_ptr(ptr);
3461 if (!(memory_region_is_ram(section->mr) ||
3462 memory_region_is_romd(section->mr))) {
3465 addr1 = memory_region_section_addr(section, addr);
3466 if (l >= 4 && ((addr1 & 3) == 0)) {
3467 /* 32 bit read access */
3468 val = io_mem_read(section->mr, addr1, 4);
3471 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3472 /* 16 bit read access */
3473 val = io_mem_read(section->mr, addr1, 2);
3477 /* 8 bit read access */
3478 val = io_mem_read(section->mr, addr1, 1);
3484 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3485 + memory_region_section_addr(section,
3487 memcpy(buf, ptr, l);
3488 qemu_put_ram_ptr(ptr);
3497 void address_space_write(AddressSpace *as, hwaddr addr,
3498 const uint8_t *buf, int len)
3500 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3504 * address_space_read: read from an address space.
3506 * @as: #AddressSpace to be accessed
3507 * @addr: address within that address space
3508 * @buf: buffer with the data transferred
3510 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3512 address_space_rw(as, addr, buf, len, false);
3516 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3517 int len, int is_write)
3519 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3522 /* used for ROM loading : can write in RAM and ROM */
3523 void cpu_physical_memory_write_rom(hwaddr addr,
3524 const uint8_t *buf, int len)
3526 AddressSpaceDispatch *d = address_space_memory.dispatch;
3530 MemoryRegionSection *section;
3533 page = addr & TARGET_PAGE_MASK;
3534 l = (page + TARGET_PAGE_SIZE) - addr;
3537 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3539 if (!(memory_region_is_ram(section->mr) ||
3540 memory_region_is_romd(section->mr))) {
3543 unsigned long addr1;
3544 addr1 = memory_region_get_ram_addr(section->mr)
3545 + memory_region_section_addr(section, addr);
3547 ptr = qemu_get_ram_ptr(addr1);
3548 memcpy(ptr, buf, l);
3549 invalidate_and_set_dirty(addr1, l);
3550 qemu_put_ram_ptr(ptr);
3564 static BounceBuffer bounce;
3566 typedef struct MapClient {
3568 void (*callback)(void *opaque);
3569 QLIST_ENTRY(MapClient) link;
3572 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3573 = QLIST_HEAD_INITIALIZER(map_client_list);
3575 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3577 MapClient *client = g_malloc(sizeof(*client));
3579 client->opaque = opaque;
3580 client->callback = callback;
3581 QLIST_INSERT_HEAD(&map_client_list, client, link);
3585 static void cpu_unregister_map_client(void *_client)
3587 MapClient *client = (MapClient *)_client;
3589 QLIST_REMOVE(client, link);
3593 static void cpu_notify_map_clients(void)
3597 while (!QLIST_EMPTY(&map_client_list)) {
3598 client = QLIST_FIRST(&map_client_list);
3599 client->callback(client->opaque);
3600 cpu_unregister_map_client(client);
3604 /* Map a physical memory region into a host virtual address.
3605 * May map a subset of the requested range, given by and returned in *plen.
3606 * May return NULL if resources needed to perform the mapping are exhausted.
3607 * Use only for reads OR writes - not for read-modify-write operations.
3608 * Use cpu_register_map_client() to know when retrying the map operation is
3609 * likely to succeed.
3611 void *address_space_map(AddressSpace *as,
3616 AddressSpaceDispatch *d = as->dispatch;
3621 MemoryRegionSection *section;
3622 ram_addr_t raddr = RAM_ADDR_MAX;
3627 page = addr & TARGET_PAGE_MASK;
3628 l = (page + TARGET_PAGE_SIZE) - addr;
3631 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3633 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3634 if (todo || bounce.buffer) {
3637 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3641 address_space_read(as, addr, bounce.buffer, l);
3645 return bounce.buffer;
3648 raddr = memory_region_get_ram_addr(section->mr)
3649 + memory_region_section_addr(section, addr);
3657 ret = qemu_ram_ptr_length(raddr, &rlen);
3662 /* Unmaps a memory region previously mapped by address_space_map().
3663 * Will also mark the memory as dirty if is_write == 1. access_len gives
3664 * the amount of memory that was actually read or written by the caller.
3666 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3667 int is_write, hwaddr access_len)
3669 if (buffer != bounce.buffer) {
3671 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3672 while (access_len) {
3674 l = TARGET_PAGE_SIZE;
3677 invalidate_and_set_dirty(addr1, l);
3682 if (xen_enabled()) {
3683 xen_invalidate_map_cache_entry(buffer);
3688 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3690 qemu_vfree(bounce.buffer);
3691 bounce.buffer = NULL;
3692 cpu_notify_map_clients();
3695 void *cpu_physical_memory_map(hwaddr addr,
3699 return address_space_map(&address_space_memory, addr, plen, is_write);
3702 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3703 int is_write, hwaddr access_len)
3705 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3708 /* warning: addr must be aligned */
3709 static inline uint32_t ldl_phys_internal(hwaddr addr,
3710 enum device_endian endian)
3714 MemoryRegionSection *section;
3716 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3718 if (!(memory_region_is_ram(section->mr) ||
3719 memory_region_is_romd(section->mr))) {
3721 addr = memory_region_section_addr(section, addr);
3722 val = io_mem_read(section->mr, addr, 4);
3723 #if defined(TARGET_WORDS_BIGENDIAN)
3724 if (endian == DEVICE_LITTLE_ENDIAN) {
3728 if (endian == DEVICE_BIG_ENDIAN) {
3734 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3736 + memory_region_section_addr(section, addr));
3738 case DEVICE_LITTLE_ENDIAN:
3739 val = ldl_le_p(ptr);
3741 case DEVICE_BIG_ENDIAN:
3742 val = ldl_be_p(ptr);
3752 uint32_t ldl_phys(hwaddr addr)
3754 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3757 uint32_t ldl_le_phys(hwaddr addr)
3759 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3762 uint32_t ldl_be_phys(hwaddr addr)
3764 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3767 /* warning: addr must be aligned */
3768 static inline uint64_t ldq_phys_internal(hwaddr addr,
3769 enum device_endian endian)
3773 MemoryRegionSection *section;
3775 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3777 if (!(memory_region_is_ram(section->mr) ||
3778 memory_region_is_romd(section->mr))) {
3780 addr = memory_region_section_addr(section, addr);
3782 /* XXX This is broken when device endian != cpu endian.
3783 Fix and add "endian" variable check */
3784 #ifdef TARGET_WORDS_BIGENDIAN
3785 val = io_mem_read(section->mr, addr, 4) << 32;
3786 val |= io_mem_read(section->mr, addr + 4, 4);
3788 val = io_mem_read(section->mr, addr, 4);
3789 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3793 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3795 + memory_region_section_addr(section, addr));
3797 case DEVICE_LITTLE_ENDIAN:
3798 val = ldq_le_p(ptr);
3800 case DEVICE_BIG_ENDIAN:
3801 val = ldq_be_p(ptr);
3811 uint64_t ldq_phys(hwaddr addr)
3813 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3816 uint64_t ldq_le_phys(hwaddr addr)
3818 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3821 uint64_t ldq_be_phys(hwaddr addr)
3823 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3827 uint32_t ldub_phys(hwaddr addr)
3830 cpu_physical_memory_read(addr, &val, 1);
3834 /* warning: addr must be aligned */
3835 static inline uint32_t lduw_phys_internal(hwaddr addr,
3836 enum device_endian endian)
3840 MemoryRegionSection *section;
3842 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3844 if (!(memory_region_is_ram(section->mr) ||
3845 memory_region_is_romd(section->mr))) {
3847 addr = memory_region_section_addr(section, addr);
3848 val = io_mem_read(section->mr, addr, 2);
3849 #if defined(TARGET_WORDS_BIGENDIAN)
3850 if (endian == DEVICE_LITTLE_ENDIAN) {
3854 if (endian == DEVICE_BIG_ENDIAN) {
3860 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3862 + memory_region_section_addr(section, addr));
3864 case DEVICE_LITTLE_ENDIAN:
3865 val = lduw_le_p(ptr);
3867 case DEVICE_BIG_ENDIAN:
3868 val = lduw_be_p(ptr);
3878 uint32_t lduw_phys(hwaddr addr)
3880 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3883 uint32_t lduw_le_phys(hwaddr addr)
3885 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3888 uint32_t lduw_be_phys(hwaddr addr)
3890 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3893 /* warning: addr must be aligned. The ram page is not masked as dirty
3894 and the code inside is not invalidated. It is useful if the dirty
3895 bits are used to track modified PTEs */
3896 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3899 MemoryRegionSection *section;
3901 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3903 if (!memory_region_is_ram(section->mr) || section->readonly) {
3904 addr = memory_region_section_addr(section, addr);
3905 if (memory_region_is_ram(section->mr)) {
3906 section = &phys_sections[phys_section_rom];
3908 io_mem_write(section->mr, addr, val, 4);
3910 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3912 + memory_region_section_addr(section, addr);
3913 ptr = qemu_get_ram_ptr(addr1);
3916 if (unlikely(in_migration)) {
3917 if (!cpu_physical_memory_is_dirty(addr1)) {
3918 /* invalidate code */
3919 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3921 cpu_physical_memory_set_dirty_flags(
3922 addr1, (0xff & ~CODE_DIRTY_FLAG));
3928 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3931 MemoryRegionSection *section;
3933 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3935 if (!memory_region_is_ram(section->mr) || section->readonly) {
3936 addr = memory_region_section_addr(section, addr);
3937 if (memory_region_is_ram(section->mr)) {
3938 section = &phys_sections[phys_section_rom];
3940 #ifdef TARGET_WORDS_BIGENDIAN
3941 io_mem_write(section->mr, addr, val >> 32, 4);
3942 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3944 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3945 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3948 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3950 + memory_region_section_addr(section, addr));
3955 /* warning: addr must be aligned */
3956 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3957 enum device_endian endian)
3960 MemoryRegionSection *section;
3962 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3964 if (!memory_region_is_ram(section->mr) || section->readonly) {
3965 addr = memory_region_section_addr(section, addr);
3966 if (memory_region_is_ram(section->mr)) {
3967 section = &phys_sections[phys_section_rom];
3969 #if defined(TARGET_WORDS_BIGENDIAN)
3970 if (endian == DEVICE_LITTLE_ENDIAN) {
3974 if (endian == DEVICE_BIG_ENDIAN) {
3978 io_mem_write(section->mr, addr, val, 4);
3980 unsigned long addr1;
3981 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3982 + memory_region_section_addr(section, addr);
3984 ptr = qemu_get_ram_ptr(addr1);
3986 case DEVICE_LITTLE_ENDIAN:
3989 case DEVICE_BIG_ENDIAN:
3996 invalidate_and_set_dirty(addr1, 4);
4000 void stl_phys(hwaddr addr, uint32_t val)
4002 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4005 void stl_le_phys(hwaddr addr, uint32_t val)
4007 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4010 void stl_be_phys(hwaddr addr, uint32_t val)
4012 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4016 void stb_phys(hwaddr addr, uint32_t val)
4019 cpu_physical_memory_write(addr, &v, 1);
4022 /* warning: addr must be aligned */
4023 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
4024 enum device_endian endian)
4027 MemoryRegionSection *section;
4029 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
4031 if (!memory_region_is_ram(section->mr) || section->readonly) {
4032 addr = memory_region_section_addr(section, addr);
4033 if (memory_region_is_ram(section->mr)) {
4034 section = &phys_sections[phys_section_rom];
4036 #if defined(TARGET_WORDS_BIGENDIAN)
4037 if (endian == DEVICE_LITTLE_ENDIAN) {
4041 if (endian == DEVICE_BIG_ENDIAN) {
4045 io_mem_write(section->mr, addr, val, 2);
4047 unsigned long addr1;
4048 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
4049 + memory_region_section_addr(section, addr);
4051 ptr = qemu_get_ram_ptr(addr1);
4053 case DEVICE_LITTLE_ENDIAN:
4056 case DEVICE_BIG_ENDIAN:
4063 invalidate_and_set_dirty(addr1, 2);
4067 void stw_phys(hwaddr addr, uint32_t val)
4069 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4072 void stw_le_phys(hwaddr addr, uint32_t val)
4074 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4077 void stw_be_phys(hwaddr addr, uint32_t val)
4079 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4083 void stq_phys(hwaddr addr, uint64_t val)
4086 cpu_physical_memory_write(addr, &val, 8);
4089 void stq_le_phys(hwaddr addr, uint64_t val)
4091 val = cpu_to_le64(val);
4092 cpu_physical_memory_write(addr, &val, 8);
4095 void stq_be_phys(hwaddr addr, uint64_t val)
4097 val = cpu_to_be64(val);
4098 cpu_physical_memory_write(addr, &val, 8);
4101 /* virtual memory access for debug (includes writing to ROM) */
4102 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4103 uint8_t *buf, int len, int is_write)
4110 page = addr & TARGET_PAGE_MASK;
4111 phys_addr = cpu_get_phys_page_debug(env, page);
4112 /* if no physical page mapped, return an error */
4113 if (phys_addr == -1)
4115 l = (page + TARGET_PAGE_SIZE) - addr;
4118 phys_addr += (addr & ~TARGET_PAGE_MASK);
4120 cpu_physical_memory_write_rom(phys_addr, buf, l);
4122 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4131 /* in deterministic execution mode, instructions doing device I/Os
4132 must be at the end of the TB */
4133 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4135 TranslationBlock *tb;
4137 target_ulong pc, cs_base;
4140 tb = tb_find_pc(retaddr);
4142 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4145 n = env->icount_decr.u16.low + tb->icount;
4146 cpu_restore_state(tb, env, retaddr);
4147 /* Calculate how many instructions had been executed before the fault
4149 n = n - env->icount_decr.u16.low;
4150 /* Generate a new TB ending on the I/O insn. */
4152 /* On MIPS and SH, delay slot instructions can only be restarted if
4153 they were already the first instruction in the TB. If this is not
4154 the first instruction in a TB then re-execute the preceding
4156 #if defined(TARGET_MIPS)
4157 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4158 env->active_tc.PC -= 4;
4159 env->icount_decr.u16.low++;
4160 env->hflags &= ~MIPS_HFLAG_BMASK;
4162 #elif defined(TARGET_SH4)
4163 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4166 env->icount_decr.u16.low++;
4167 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4170 /* This should never happen. */
4171 if (n > CF_COUNT_MASK) {
4172 cpu_abort(env, "TB too big during recompile");
4175 cflags = n | CF_LAST_IO;
4177 cs_base = tb->cs_base;
4179 tb_phys_invalidate(tb, -1);
4180 /* FIXME: In theory this could raise an exception. In practice
4181 we have already translated the block once so it's probably ok. */
4182 tb_gen_code(env, pc, cs_base, flags, cflags);
4183 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4184 the first in the TB) then we end up generating a whole new TB and
4185 repeating the fault, which is horribly inefficient.
4186 Better would be to execute just this insn uncached, or generate a
4188 cpu_resume_from_signal(env, NULL);
4191 #if !defined(CONFIG_USER_ONLY)
4193 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4195 int i, target_code_size, max_target_code_size;
4196 int direct_jmp_count, direct_jmp2_count, cross_page;
4197 TranslationBlock *tb;
4199 target_code_size = 0;
4200 max_target_code_size = 0;
4202 direct_jmp_count = 0;
4203 direct_jmp2_count = 0;
4204 for (i = 0; i < nb_tbs; i++) {
4206 target_code_size += tb->size;
4207 if (tb->size > max_target_code_size) {
4208 max_target_code_size = tb->size;
4210 if (tb->page_addr[1] != -1) {
4213 if (tb->tb_next_offset[0] != 0xffff) {
4215 if (tb->tb_next_offset[1] != 0xffff) {
4216 direct_jmp2_count++;
4220 /* XXX: avoid using doubles ? */
4221 cpu_fprintf(f, "Translation buffer state:\n");
4222 cpu_fprintf(f, "gen code size %td/%zd\n",
4223 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4224 cpu_fprintf(f, "TB count %d/%d\n",
4225 nb_tbs, code_gen_max_blocks);
4226 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4227 nb_tbs ? target_code_size / nb_tbs : 0,
4228 max_target_code_size);
4229 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4230 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4231 target_code_size ? (double) (code_gen_ptr - code_gen_buffer)
4232 / target_code_size : 0);
4233 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4235 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4236 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4238 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4240 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4241 cpu_fprintf(f, "\nStatistics:\n");
4242 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4243 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4244 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4245 tcg_dump_info(f, cpu_fprintf);
4249 * A helper function for the _utterly broken_ virtio device model to find out if
4250 * it's running on a big endian machine. Don't do this at home kids!
4252 bool virtio_is_big_endian(void);
4253 bool virtio_is_big_endian(void)
4255 #if defined(TARGET_WORDS_BIGENDIAN)
4264 #ifndef CONFIG_USER_ONLY
4265 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4267 MemoryRegionSection *section;
4269 section = phys_page_find(address_space_memory.dispatch,
4270 phys_addr >> TARGET_PAGE_BITS);
4272 return !(memory_region_is_ram(section->mr) ||
4273 memory_region_is_romd(section->mr));