2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
35 #include "qemu-timer.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
47 #include <machine/profile.h>
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
62 #include "memory-internal.h"
64 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
95 #if !defined(CONFIG_USER_ONLY)
97 static int in_migration;
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
145 /* Size of the L2 (and L3, etc) page tables. */
147 #define L2_SIZE (1 << L2_BITS)
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
159 #define V_L1_BITS V_L1_BITS_REM
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
174 #if !defined(CONFIG_USER_ONLY)
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
189 static void io_mem_init(void);
190 static void memory_map_init(void);
192 static MemoryRegion io_mem_watch;
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
200 static inline void map_exec(void *addr, long size)
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
208 static inline void map_exec(void *addr, long size)
210 unsigned long start, end, page_size;
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
225 static void page_init(void)
227 /* NOTE: we can always suppose that qemu_host_page_size >=
231 SYSTEM_INFO system_info;
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
237 qemu_real_host_page_size = getpagesize();
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
251 freep = kinfo_getvmmap(getpid(), &cnt);
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
279 last_brk = (unsigned long)sbrk(0);
281 f = fopen("/compat/linux/proc/self/maps", "r");
286 unsigned long startaddr, endaddr;
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
340 ALLOC(p, sizeof(void *) * L2_SIZE);
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
358 return pd + (index & (L2_SIZE - 1));
361 static inline PageDesc *page_find(tb_page_addr_t index)
363 return page_find_alloc(index, 0);
366 #if !defined(CONFIG_USER_ONLY)
368 static void phys_map_node_reserve(unsigned nodes)
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
380 static uint16_t phys_map_node_alloc(void)
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
395 static void phys_map_nodes_reset(void)
397 phys_map_nodes_nb = 0;
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
413 for (i = 0; i < L2_SIZE; i++) {
415 p[i].ptr = phys_section_unassigned;
419 p = phys_map_nodes[lp->ptr];
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
448 PhysPageEntry lp = d->phys_map;
451 uint16_t s_index = phys_section_unassigned;
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
463 return &phys_sections[s_index];
466 bool memory_region_is_unassigned(MemoryRegion *mr)
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
519 static inline size_t size_code_gen_buffer(size_t tb_size)
521 /* Size the buffer. */
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
539 code_gen_buffer_size = tb_size;
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
547 static inline void *alloc_code_gen_buffer(void)
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
586 static inline void *alloc_code_gen_buffer(void)
588 void *buf = g_malloc(code_gen_buffer_size);
590 map_exec(buf, code_gen_buffer_size);
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
596 static inline void code_gen_alloc(size_t tb_size)
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
622 void tcg_exec_init(unsigned long tb_size)
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
636 bool tcg_enabled(void)
638 return code_gen_buffer != NULL;
641 void cpu_exec_init_all(void)
643 #if !defined(CONFIG_USER_ONLY)
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
651 static int cpu_common_post_load(void *opaque, int version_id)
653 CPUArchState *env = opaque;
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
677 CPUArchState *qemu_get_cpu(int cpu)
679 CPUArchState *env = first_cpu;
682 if (env->cpu_index == cpu)
690 void cpu_exec_init(CPUArchState *env)
692 #ifndef CONFIG_USER_ONLY
693 CPUState *cpu = ENV_GET_CPU(env);
698 #if defined(CONFIG_USER_ONLY)
701 env->next_cpu = NULL;
704 while (*penv != NULL) {
705 penv = &(*penv)->next_cpu;
708 env->cpu_index = cpu_index;
710 QTAILQ_INIT(&env->breakpoints);
711 QTAILQ_INIT(&env->watchpoints);
712 #ifndef CONFIG_USER_ONLY
713 cpu->thread_id = qemu_get_thread_id();
716 #if defined(CONFIG_USER_ONLY)
719 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
720 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
721 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
722 cpu_save, cpu_load, env);
726 /* Allocate a new translation block. Flush the translation buffer if
727 too many translation blocks or too much generated code. */
728 static TranslationBlock *tb_alloc(target_ulong pc)
730 TranslationBlock *tb;
732 if (nb_tbs >= code_gen_max_blocks ||
733 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
741 void tb_free(TranslationBlock *tb)
743 /* In practice this is mostly used for single use temporary TB
744 Ignore the hard cases and just back up if this TB happens to
745 be the last one generated. */
746 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
747 code_gen_ptr = tb->tc_ptr;
752 static inline void invalidate_page_bitmap(PageDesc *p)
754 if (p->code_bitmap) {
755 g_free(p->code_bitmap);
756 p->code_bitmap = NULL;
758 p->code_write_count = 0;
761 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
763 static void page_flush_tb_1 (int level, void **lp)
772 for (i = 0; i < L2_SIZE; ++i) {
773 pd[i].first_tb = NULL;
774 invalidate_page_bitmap(pd + i);
778 for (i = 0; i < L2_SIZE; ++i) {
779 page_flush_tb_1 (level - 1, pp + i);
784 static void page_flush_tb(void)
787 for (i = 0; i < V_L1_SIZE; i++) {
788 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
792 /* flush all the translation blocks */
793 /* XXX: tb_flush is currently not thread safe */
794 void tb_flush(CPUArchState *env1)
797 #if defined(DEBUG_FLUSH)
798 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
799 (unsigned long)(code_gen_ptr - code_gen_buffer),
801 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
803 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
804 cpu_abort(env1, "Internal error: code buffer overflow\n");
808 for(env = first_cpu; env != NULL; env = env->next_cpu) {
809 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
812 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
815 code_gen_ptr = code_gen_buffer;
816 /* XXX: flush processor icache at this point if cache flush is
821 #ifdef DEBUG_TB_CHECK
823 static void tb_invalidate_check(target_ulong address)
825 TranslationBlock *tb;
827 address &= TARGET_PAGE_MASK;
828 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
829 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
830 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
831 address >= tb->pc + tb->size)) {
832 printf("ERROR invalidate: address=" TARGET_FMT_lx
833 " PC=%08lx size=%04x\n",
834 address, (long)tb->pc, tb->size);
840 /* verify that all the pages have correct rights for code */
841 static void tb_page_check(void)
843 TranslationBlock *tb;
844 int i, flags1, flags2;
846 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
847 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
848 flags1 = page_get_flags(tb->pc);
849 flags2 = page_get_flags(tb->pc + tb->size - 1);
850 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
851 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
852 (long)tb->pc, tb->size, flags1, flags2);
860 /* invalidate one TB */
861 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
864 TranslationBlock *tb1;
868 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
871 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
875 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
877 TranslationBlock *tb1;
882 n1 = (uintptr_t)tb1 & 3;
883 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
885 *ptb = tb1->page_next[n1];
888 ptb = &tb1->page_next[n1];
892 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
894 TranslationBlock *tb1, **ptb;
897 ptb = &tb->jmp_next[n];
900 /* find tb(n) in circular list */
903 n1 = (uintptr_t)tb1 & 3;
904 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
905 if (n1 == n && tb1 == tb)
908 ptb = &tb1->jmp_first;
910 ptb = &tb1->jmp_next[n1];
913 /* now we can suppress tb(n) from the list */
914 *ptb = tb->jmp_next[n];
916 tb->jmp_next[n] = NULL;
920 /* reset the jump entry 'n' of a TB so that it is not chained to
922 static inline void tb_reset_jump(TranslationBlock *tb, int n)
924 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
927 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
932 tb_page_addr_t phys_pc;
933 TranslationBlock *tb1, *tb2;
935 /* remove the TB from the hash list */
936 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
937 h = tb_phys_hash_func(phys_pc);
938 tb_remove(&tb_phys_hash[h], tb,
939 offsetof(TranslationBlock, phys_hash_next));
941 /* remove the TB from the page list */
942 if (tb->page_addr[0] != page_addr) {
943 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
944 tb_page_remove(&p->first_tb, tb);
945 invalidate_page_bitmap(p);
947 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
948 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
949 tb_page_remove(&p->first_tb, tb);
950 invalidate_page_bitmap(p);
953 tb_invalidated_flag = 1;
955 /* remove the TB from the hash list */
956 h = tb_jmp_cache_hash_func(tb->pc);
957 for(env = first_cpu; env != NULL; env = env->next_cpu) {
958 if (env->tb_jmp_cache[h] == tb)
959 env->tb_jmp_cache[h] = NULL;
962 /* suppress this TB from the two jump lists */
963 tb_jmp_remove(tb, 0);
964 tb_jmp_remove(tb, 1);
966 /* suppress any remaining jumps to this TB */
969 n1 = (uintptr_t)tb1 & 3;
972 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
973 tb2 = tb1->jmp_next[n1];
974 tb_reset_jump(tb1, n1);
975 tb1->jmp_next[n1] = NULL;
978 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
980 tb_phys_invalidate_count++;
983 static inline void set_bits(uint8_t *tab, int start, int len)
989 mask = 0xff << (start & 7);
990 if ((start & ~7) == (end & ~7)) {
992 mask &= ~(0xff << (end & 7));
997 start = (start + 8) & ~7;
999 while (start < end1) {
1004 mask = ~(0xff << (end & 7));
1010 static void build_page_bitmap(PageDesc *p)
1012 int n, tb_start, tb_end;
1013 TranslationBlock *tb;
1015 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1018 while (tb != NULL) {
1019 n = (uintptr_t)tb & 3;
1020 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1021 /* NOTE: this is subtle as a TB may span two physical pages */
1023 /* NOTE: tb_end may be after the end of the page, but
1024 it is not a problem */
1025 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1026 tb_end = tb_start + tb->size;
1027 if (tb_end > TARGET_PAGE_SIZE)
1028 tb_end = TARGET_PAGE_SIZE;
1031 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1033 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1034 tb = tb->page_next[n];
1038 TranslationBlock *tb_gen_code(CPUArchState *env,
1039 target_ulong pc, target_ulong cs_base,
1040 int flags, int cflags)
1042 TranslationBlock *tb;
1044 tb_page_addr_t phys_pc, phys_page2;
1045 target_ulong virt_page2;
1048 phys_pc = get_page_addr_code(env, pc);
1051 /* flush must be done */
1053 /* cannot fail at this point */
1055 /* Don't forget to invalidate previous TB info. */
1056 tb_invalidated_flag = 1;
1058 tc_ptr = code_gen_ptr;
1059 tb->tc_ptr = tc_ptr;
1060 tb->cs_base = cs_base;
1062 tb->cflags = cflags;
1063 cpu_gen_code(env, tb, &code_gen_size);
1064 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1065 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1067 /* check next page if needed */
1068 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1070 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1071 phys_page2 = get_page_addr_code(env, virt_page2);
1073 tb_link_page(tb, phys_pc, phys_page2);
1078 * Invalidate all TBs which intersect with the target physical address range
1079 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1080 * 'is_cpu_write_access' should be true if called from a real cpu write
1081 * access: the virtual CPU will exit the current TB if code is modified inside
1084 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1085 int is_cpu_write_access)
1087 while (start < end) {
1088 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1089 start &= TARGET_PAGE_MASK;
1090 start += TARGET_PAGE_SIZE;
1095 * Invalidate all TBs which intersect with the target physical address range
1096 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1097 * 'is_cpu_write_access' should be true if called from a real cpu write
1098 * access: the virtual CPU will exit the current TB if code is modified inside
1101 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1102 int is_cpu_write_access)
1104 TranslationBlock *tb, *tb_next, *saved_tb;
1105 CPUArchState *env = cpu_single_env;
1106 tb_page_addr_t tb_start, tb_end;
1109 #ifdef TARGET_HAS_PRECISE_SMC
1110 int current_tb_not_found = is_cpu_write_access;
1111 TranslationBlock *current_tb = NULL;
1112 int current_tb_modified = 0;
1113 target_ulong current_pc = 0;
1114 target_ulong current_cs_base = 0;
1115 int current_flags = 0;
1116 #endif /* TARGET_HAS_PRECISE_SMC */
1118 p = page_find(start >> TARGET_PAGE_BITS);
1121 if (!p->code_bitmap &&
1122 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1123 is_cpu_write_access) {
1124 /* build code bitmap */
1125 build_page_bitmap(p);
1128 /* we remove all the TBs in the range [start, end[ */
1129 /* XXX: see if in some cases it could be faster to invalidate all the code */
1131 while (tb != NULL) {
1132 n = (uintptr_t)tb & 3;
1133 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1134 tb_next = tb->page_next[n];
1135 /* NOTE: this is subtle as a TB may span two physical pages */
1137 /* NOTE: tb_end may be after the end of the page, but
1138 it is not a problem */
1139 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1140 tb_end = tb_start + tb->size;
1142 tb_start = tb->page_addr[1];
1143 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1145 if (!(tb_end <= start || tb_start >= end)) {
1146 #ifdef TARGET_HAS_PRECISE_SMC
1147 if (current_tb_not_found) {
1148 current_tb_not_found = 0;
1150 if (env->mem_io_pc) {
1151 /* now we have a real cpu fault */
1152 current_tb = tb_find_pc(env->mem_io_pc);
1155 if (current_tb == tb &&
1156 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1157 /* If we are modifying the current TB, we must stop
1158 its execution. We could be more precise by checking
1159 that the modification is after the current PC, but it
1160 would require a specialized function to partially
1161 restore the CPU state */
1163 current_tb_modified = 1;
1164 cpu_restore_state(current_tb, env, env->mem_io_pc);
1165 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1168 #endif /* TARGET_HAS_PRECISE_SMC */
1169 /* we need to do that to handle the case where a signal
1170 occurs while doing tb_phys_invalidate() */
1173 saved_tb = env->current_tb;
1174 env->current_tb = NULL;
1176 tb_phys_invalidate(tb, -1);
1178 env->current_tb = saved_tb;
1179 if (env->interrupt_request && env->current_tb)
1180 cpu_interrupt(env, env->interrupt_request);
1185 #if !defined(CONFIG_USER_ONLY)
1186 /* if no code remaining, no need to continue to use slow writes */
1188 invalidate_page_bitmap(p);
1189 if (is_cpu_write_access) {
1190 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1194 #ifdef TARGET_HAS_PRECISE_SMC
1195 if (current_tb_modified) {
1196 /* we generate a block containing just the instruction
1197 modifying the memory. It will ensure that it cannot modify
1199 env->current_tb = NULL;
1200 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1201 cpu_resume_from_signal(env, NULL);
1206 /* len must be <= 8 and start must be a multiple of len */
1207 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1213 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1214 cpu_single_env->mem_io_vaddr, len,
1215 cpu_single_env->eip,
1216 cpu_single_env->eip +
1217 (intptr_t)cpu_single_env->segs[R_CS].base);
1220 p = page_find(start >> TARGET_PAGE_BITS);
1223 if (p->code_bitmap) {
1224 offset = start & ~TARGET_PAGE_MASK;
1225 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1226 if (b & ((1 << len) - 1))
1230 tb_invalidate_phys_page_range(start, start + len, 1);
1234 #if !defined(CONFIG_SOFTMMU)
1235 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1236 uintptr_t pc, void *puc)
1238 TranslationBlock *tb;
1241 #ifdef TARGET_HAS_PRECISE_SMC
1242 TranslationBlock *current_tb = NULL;
1243 CPUArchState *env = cpu_single_env;
1244 int current_tb_modified = 0;
1245 target_ulong current_pc = 0;
1246 target_ulong current_cs_base = 0;
1247 int current_flags = 0;
1250 addr &= TARGET_PAGE_MASK;
1251 p = page_find(addr >> TARGET_PAGE_BITS);
1255 #ifdef TARGET_HAS_PRECISE_SMC
1256 if (tb && pc != 0) {
1257 current_tb = tb_find_pc(pc);
1260 while (tb != NULL) {
1261 n = (uintptr_t)tb & 3;
1262 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1263 #ifdef TARGET_HAS_PRECISE_SMC
1264 if (current_tb == tb &&
1265 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1266 /* If we are modifying the current TB, we must stop
1267 its execution. We could be more precise by checking
1268 that the modification is after the current PC, but it
1269 would require a specialized function to partially
1270 restore the CPU state */
1272 current_tb_modified = 1;
1273 cpu_restore_state(current_tb, env, pc);
1274 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1277 #endif /* TARGET_HAS_PRECISE_SMC */
1278 tb_phys_invalidate(tb, addr);
1279 tb = tb->page_next[n];
1282 #ifdef TARGET_HAS_PRECISE_SMC
1283 if (current_tb_modified) {
1284 /* we generate a block containing just the instruction
1285 modifying the memory. It will ensure that it cannot modify
1287 env->current_tb = NULL;
1288 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1289 cpu_resume_from_signal(env, puc);
1295 /* add the tb in the target page and protect it if necessary */
1296 static inline void tb_alloc_page(TranslationBlock *tb,
1297 unsigned int n, tb_page_addr_t page_addr)
1300 #ifndef CONFIG_USER_ONLY
1301 bool page_already_protected;
1304 tb->page_addr[n] = page_addr;
1305 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1306 tb->page_next[n] = p->first_tb;
1307 #ifndef CONFIG_USER_ONLY
1308 page_already_protected = p->first_tb != NULL;
1310 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1311 invalidate_page_bitmap(p);
1313 #if defined(TARGET_HAS_SMC) || 1
1315 #if defined(CONFIG_USER_ONLY)
1316 if (p->flags & PAGE_WRITE) {
1321 /* force the host page as non writable (writes will have a
1322 page fault + mprotect overhead) */
1323 page_addr &= qemu_host_page_mask;
1325 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1326 addr += TARGET_PAGE_SIZE) {
1328 p2 = page_find (addr >> TARGET_PAGE_BITS);
1332 p2->flags &= ~PAGE_WRITE;
1334 mprotect(g2h(page_addr), qemu_host_page_size,
1335 (prot & PAGE_BITS) & ~PAGE_WRITE);
1336 #ifdef DEBUG_TB_INVALIDATE
1337 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1342 /* if some code is already present, then the pages are already
1343 protected. So we handle the case where only the first TB is
1344 allocated in a physical page */
1345 if (!page_already_protected) {
1346 tlb_protect_code(page_addr);
1350 #endif /* TARGET_HAS_SMC */
1353 /* add a new TB and link it to the physical page tables. phys_page2 is
1354 (-1) to indicate that only one page contains the TB. */
1355 void tb_link_page(TranslationBlock *tb,
1356 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1359 TranslationBlock **ptb;
1361 /* Grab the mmap lock to stop another thread invalidating this TB
1362 before we are done. */
1364 /* add in the physical hash table */
1365 h = tb_phys_hash_func(phys_pc);
1366 ptb = &tb_phys_hash[h];
1367 tb->phys_hash_next = *ptb;
1370 /* add in the page list */
1371 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1372 if (phys_page2 != -1)
1373 tb_alloc_page(tb, 1, phys_page2);
1375 tb->page_addr[1] = -1;
1377 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1378 tb->jmp_next[0] = NULL;
1379 tb->jmp_next[1] = NULL;
1381 /* init original jump addresses */
1382 if (tb->tb_next_offset[0] != 0xffff)
1383 tb_reset_jump(tb, 0);
1384 if (tb->tb_next_offset[1] != 0xffff)
1385 tb_reset_jump(tb, 1);
1387 #ifdef DEBUG_TB_CHECK
1393 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1394 tb[1].tc_ptr. Return NULL if not found */
1395 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1397 int m_min, m_max, m;
1399 TranslationBlock *tb;
1403 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1404 tc_ptr >= (uintptr_t)code_gen_ptr) {
1407 /* binary search (cf Knuth) */
1410 while (m_min <= m_max) {
1411 m = (m_min + m_max) >> 1;
1413 v = (uintptr_t)tb->tc_ptr;
1416 else if (tc_ptr < v) {
1425 static void tb_reset_jump_recursive(TranslationBlock *tb);
1427 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1429 TranslationBlock *tb1, *tb_next, **ptb;
1432 tb1 = tb->jmp_next[n];
1434 /* find head of list */
1436 n1 = (uintptr_t)tb1 & 3;
1437 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1440 tb1 = tb1->jmp_next[n1];
1442 /* we are now sure now that tb jumps to tb1 */
1445 /* remove tb from the jmp_first list */
1446 ptb = &tb_next->jmp_first;
1449 n1 = (uintptr_t)tb1 & 3;
1450 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1451 if (n1 == n && tb1 == tb)
1453 ptb = &tb1->jmp_next[n1];
1455 *ptb = tb->jmp_next[n];
1456 tb->jmp_next[n] = NULL;
1458 /* suppress the jump to next tb in generated code */
1459 tb_reset_jump(tb, n);
1461 /* suppress jumps in the tb on which we could have jumped */
1462 tb_reset_jump_recursive(tb_next);
1466 static void tb_reset_jump_recursive(TranslationBlock *tb)
1468 tb_reset_jump_recursive2(tb, 0);
1469 tb_reset_jump_recursive2(tb, 1);
1472 #if defined(TARGET_HAS_ICE)
1473 #if defined(CONFIG_USER_ONLY)
1474 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1476 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1479 void tb_invalidate_phys_addr(hwaddr addr)
1481 ram_addr_t ram_addr;
1482 MemoryRegionSection *section;
1484 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1485 if (!(memory_region_is_ram(section->mr)
1486 || (section->mr->rom_device && section->mr->readable))) {
1489 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1490 + memory_region_section_addr(section, addr);
1491 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1494 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1496 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1497 (pc & ~TARGET_PAGE_MASK));
1500 #endif /* TARGET_HAS_ICE */
1502 #if defined(CONFIG_USER_ONLY)
1503 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1508 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1509 int flags, CPUWatchpoint **watchpoint)
1514 /* Add a watchpoint. */
1515 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1516 int flags, CPUWatchpoint **watchpoint)
1518 target_ulong len_mask = ~(len - 1);
1521 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1522 if ((len & (len - 1)) || (addr & ~len_mask) ||
1523 len == 0 || len > TARGET_PAGE_SIZE) {
1524 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1525 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1528 wp = g_malloc(sizeof(*wp));
1531 wp->len_mask = len_mask;
1534 /* keep all GDB-injected watchpoints in front */
1536 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1538 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1540 tlb_flush_page(env, addr);
1547 /* Remove a specific watchpoint. */
1548 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1551 target_ulong len_mask = ~(len - 1);
1554 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1555 if (addr == wp->vaddr && len_mask == wp->len_mask
1556 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1557 cpu_watchpoint_remove_by_ref(env, wp);
1564 /* Remove a specific watchpoint by reference. */
1565 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1567 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1569 tlb_flush_page(env, watchpoint->vaddr);
1574 /* Remove all matching watchpoints. */
1575 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1577 CPUWatchpoint *wp, *next;
1579 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1580 if (wp->flags & mask)
1581 cpu_watchpoint_remove_by_ref(env, wp);
1586 /* Add a breakpoint. */
1587 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1588 CPUBreakpoint **breakpoint)
1590 #if defined(TARGET_HAS_ICE)
1593 bp = g_malloc(sizeof(*bp));
1598 /* keep all GDB-injected breakpoints in front */
1600 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1602 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1604 breakpoint_invalidate(env, pc);
1614 /* Remove a specific breakpoint. */
1615 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1617 #if defined(TARGET_HAS_ICE)
1620 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1621 if (bp->pc == pc && bp->flags == flags) {
1622 cpu_breakpoint_remove_by_ref(env, bp);
1632 /* Remove a specific breakpoint by reference. */
1633 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1635 #if defined(TARGET_HAS_ICE)
1636 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1638 breakpoint_invalidate(env, breakpoint->pc);
1644 /* Remove all matching breakpoints. */
1645 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1647 #if defined(TARGET_HAS_ICE)
1648 CPUBreakpoint *bp, *next;
1650 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1651 if (bp->flags & mask)
1652 cpu_breakpoint_remove_by_ref(env, bp);
1657 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1658 CPU loop after each instruction */
1659 void cpu_single_step(CPUArchState *env, int enabled)
1661 #if defined(TARGET_HAS_ICE)
1662 if (env->singlestep_enabled != enabled) {
1663 env->singlestep_enabled = enabled;
1665 kvm_update_guest_debug(env, 0);
1667 /* must flush all the translated code to avoid inconsistencies */
1668 /* XXX: only flush what is necessary */
1675 static void cpu_unlink_tb(CPUArchState *env)
1677 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1678 problem and hope the cpu will stop of its own accord. For userspace
1679 emulation this often isn't actually as bad as it sounds. Often
1680 signals are used primarily to interrupt blocking syscalls. */
1681 TranslationBlock *tb;
1682 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1684 spin_lock(&interrupt_lock);
1685 tb = env->current_tb;
1686 /* if the cpu is currently executing code, we must unlink it and
1687 all the potentially executing TB */
1689 env->current_tb = NULL;
1690 tb_reset_jump_recursive(tb);
1692 spin_unlock(&interrupt_lock);
1695 #ifndef CONFIG_USER_ONLY
1696 /* mask must never be zero, except for A20 change call */
1697 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1699 CPUState *cpu = ENV_GET_CPU(env);
1702 old_mask = env->interrupt_request;
1703 env->interrupt_request |= mask;
1706 * If called from iothread context, wake the target cpu in
1709 if (!qemu_cpu_is_self(cpu)) {
1715 env->icount_decr.u16.high = 0xffff;
1717 && (mask & ~old_mask) != 0) {
1718 cpu_abort(env, "Raised interrupt while not in I/O function");
1725 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1727 #else /* CONFIG_USER_ONLY */
1729 void cpu_interrupt(CPUArchState *env, int mask)
1731 env->interrupt_request |= mask;
1734 #endif /* CONFIG_USER_ONLY */
1736 void cpu_reset_interrupt(CPUArchState *env, int mask)
1738 env->interrupt_request &= ~mask;
1741 void cpu_exit(CPUArchState *env)
1743 env->exit_request = 1;
1747 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1754 fprintf(stderr, "qemu: fatal: ");
1755 vfprintf(stderr, fmt, ap);
1756 fprintf(stderr, "\n");
1757 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1758 if (qemu_log_enabled()) {
1759 qemu_log("qemu: fatal: ");
1760 qemu_log_vprintf(fmt, ap2);
1762 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1768 #if defined(CONFIG_USER_ONLY)
1770 struct sigaction act;
1771 sigfillset(&act.sa_mask);
1772 act.sa_handler = SIG_DFL;
1773 sigaction(SIGABRT, &act, NULL);
1779 CPUArchState *cpu_copy(CPUArchState *env)
1781 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1782 CPUArchState *next_cpu = new_env->next_cpu;
1783 int cpu_index = new_env->cpu_index;
1784 #if defined(TARGET_HAS_ICE)
1789 memcpy(new_env, env, sizeof(CPUArchState));
1791 /* Preserve chaining and index. */
1792 new_env->next_cpu = next_cpu;
1793 new_env->cpu_index = cpu_index;
1795 /* Clone all break/watchpoints.
1796 Note: Once we support ptrace with hw-debug register access, make sure
1797 BP_CPU break/watchpoints are handled correctly on clone. */
1798 QTAILQ_INIT(&env->breakpoints);
1799 QTAILQ_INIT(&env->watchpoints);
1800 #if defined(TARGET_HAS_ICE)
1801 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1802 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1804 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1805 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1813 #if !defined(CONFIG_USER_ONLY)
1814 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1818 /* Discard jump cache entries for any tb which might potentially
1819 overlap the flushed page. */
1820 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1821 memset (&env->tb_jmp_cache[i], 0,
1822 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1824 i = tb_jmp_cache_hash_page(addr);
1825 memset (&env->tb_jmp_cache[i], 0,
1826 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1829 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1834 /* we modify the TLB cache so that the dirty bit will be set again
1835 when accessing the range */
1836 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1837 /* Check that we don't span multiple blocks - this breaks the
1838 address comparisons below. */
1839 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1840 != (end - 1) - start) {
1843 cpu_tlb_reset_dirty_all(start1, length);
1847 /* Note: start and end must be within the same ram block. */
1848 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1853 start &= TARGET_PAGE_MASK;
1854 end = TARGET_PAGE_ALIGN(end);
1856 length = end - start;
1859 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1861 if (tcg_enabled()) {
1862 tlb_reset_dirty_range_all(start, end, length);
1866 int cpu_physical_memory_set_dirty_tracking(int enable)
1869 in_migration = enable;
1873 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1874 MemoryRegionSection *section,
1878 target_ulong *address)
1883 if (memory_region_is_ram(section->mr)) {
1885 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1886 + memory_region_section_addr(section, paddr);
1887 if (!section->readonly) {
1888 iotlb |= phys_section_notdirty;
1890 iotlb |= phys_section_rom;
1893 /* IO handlers are currently passed a physical address.
1894 It would be nice to pass an offset from the base address
1895 of that region. This would avoid having to special case RAM,
1896 and avoid full address decoding in every device.
1897 We can't use the high bits of pd for this because
1898 IO_MEM_ROMD uses these as a ram address. */
1899 iotlb = section - phys_sections;
1900 iotlb += memory_region_section_addr(section, paddr);
1903 /* Make accesses to pages with watchpoints go via the
1904 watchpoint trap routines. */
1905 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1906 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1907 /* Avoid trapping reads of pages with a write breakpoint. */
1908 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1909 iotlb = phys_section_watch + paddr;
1910 *address |= TLB_MMIO;
1921 * Walks guest process memory "regions" one by one
1922 * and calls callback function 'fn' for each region.
1925 struct walk_memory_regions_data
1927 walk_memory_regions_fn fn;
1933 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1934 abi_ulong end, int new_prot)
1936 if (data->start != -1ul) {
1937 int rc = data->fn(data->priv, data->start, end, data->prot);
1943 data->start = (new_prot ? end : -1ul);
1944 data->prot = new_prot;
1949 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1950 abi_ulong base, int level, void **lp)
1956 return walk_memory_regions_end(data, base, 0);
1961 for (i = 0; i < L2_SIZE; ++i) {
1962 int prot = pd[i].flags;
1964 pa = base | (i << TARGET_PAGE_BITS);
1965 if (prot != data->prot) {
1966 rc = walk_memory_regions_end(data, pa, prot);
1974 for (i = 0; i < L2_SIZE; ++i) {
1975 pa = base | ((abi_ulong)i <<
1976 (TARGET_PAGE_BITS + L2_BITS * level));
1977 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1987 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1989 struct walk_memory_regions_data data;
1997 for (i = 0; i < V_L1_SIZE; i++) {
1998 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1999 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2005 return walk_memory_regions_end(&data, 0, 0);
2008 static int dump_region(void *priv, abi_ulong start,
2009 abi_ulong end, unsigned long prot)
2011 FILE *f = (FILE *)priv;
2013 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2014 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2015 start, end, end - start,
2016 ((prot & PAGE_READ) ? 'r' : '-'),
2017 ((prot & PAGE_WRITE) ? 'w' : '-'),
2018 ((prot & PAGE_EXEC) ? 'x' : '-'));
2023 /* dump memory mappings */
2024 void page_dump(FILE *f)
2026 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2027 "start", "end", "size", "prot");
2028 walk_memory_regions(f, dump_region);
2031 int page_get_flags(target_ulong address)
2035 p = page_find(address >> TARGET_PAGE_BITS);
2041 /* Modify the flags of a page and invalidate the code if necessary.
2042 The flag PAGE_WRITE_ORG is positioned automatically depending
2043 on PAGE_WRITE. The mmap_lock should already be held. */
2044 void page_set_flags(target_ulong start, target_ulong end, int flags)
2046 target_ulong addr, len;
2048 /* This function should never be called with addresses outside the
2049 guest address space. If this assert fires, it probably indicates
2050 a missing call to h2g_valid. */
2051 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2052 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2054 assert(start < end);
2056 start = start & TARGET_PAGE_MASK;
2057 end = TARGET_PAGE_ALIGN(end);
2059 if (flags & PAGE_WRITE) {
2060 flags |= PAGE_WRITE_ORG;
2063 for (addr = start, len = end - start;
2065 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2066 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2068 /* If the write protection bit is set, then we invalidate
2070 if (!(p->flags & PAGE_WRITE) &&
2071 (flags & PAGE_WRITE) &&
2073 tb_invalidate_phys_page(addr, 0, NULL);
2079 int page_check_range(target_ulong start, target_ulong len, int flags)
2085 /* This function should never be called with addresses outside the
2086 guest address space. If this assert fires, it probably indicates
2087 a missing call to h2g_valid. */
2088 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2089 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2095 if (start + len - 1 < start) {
2096 /* We've wrapped around. */
2100 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2101 start = start & TARGET_PAGE_MASK;
2103 for (addr = start, len = end - start;
2105 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2106 p = page_find(addr >> TARGET_PAGE_BITS);
2109 if( !(p->flags & PAGE_VALID) )
2112 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2114 if (flags & PAGE_WRITE) {
2115 if (!(p->flags & PAGE_WRITE_ORG))
2117 /* unprotect the page if it was put read-only because it
2118 contains translated code */
2119 if (!(p->flags & PAGE_WRITE)) {
2120 if (!page_unprotect(addr, 0, NULL))
2129 /* called from signal handler: invalidate the code and unprotect the
2130 page. Return TRUE if the fault was successfully handled. */
2131 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2135 target_ulong host_start, host_end, addr;
2137 /* Technically this isn't safe inside a signal handler. However we
2138 know this only ever happens in a synchronous SEGV handler, so in
2139 practice it seems to be ok. */
2142 p = page_find(address >> TARGET_PAGE_BITS);
2148 /* if the page was really writable, then we change its
2149 protection back to writable */
2150 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2151 host_start = address & qemu_host_page_mask;
2152 host_end = host_start + qemu_host_page_size;
2155 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2156 p = page_find(addr >> TARGET_PAGE_BITS);
2157 p->flags |= PAGE_WRITE;
2160 /* and since the content will be modified, we must invalidate
2161 the corresponding translated code. */
2162 tb_invalidate_phys_page(addr, pc, puc);
2163 #ifdef DEBUG_TB_CHECK
2164 tb_invalidate_check(addr);
2167 mprotect((void *)g2h(host_start), qemu_host_page_size,
2176 #endif /* defined(CONFIG_USER_ONLY) */
2178 #if !defined(CONFIG_USER_ONLY)
2180 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2181 typedef struct subpage_t {
2184 uint16_t sub_section[TARGET_PAGE_SIZE];
2187 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2189 static subpage_t *subpage_init(hwaddr base);
2190 static void destroy_page_desc(uint16_t section_index)
2192 MemoryRegionSection *section = &phys_sections[section_index];
2193 MemoryRegion *mr = section->mr;
2196 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2197 memory_region_destroy(&subpage->iomem);
2202 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2207 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2211 p = phys_map_nodes[lp->ptr];
2212 for (i = 0; i < L2_SIZE; ++i) {
2213 if (!p[i].is_leaf) {
2214 destroy_l2_mapping(&p[i], level - 1);
2216 destroy_page_desc(p[i].ptr);
2220 lp->ptr = PHYS_MAP_NODE_NIL;
2223 static void destroy_all_mappings(AddressSpaceDispatch *d)
2225 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2226 phys_map_nodes_reset();
2229 static uint16_t phys_section_add(MemoryRegionSection *section)
2231 if (phys_sections_nb == phys_sections_nb_alloc) {
2232 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2233 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2234 phys_sections_nb_alloc);
2236 phys_sections[phys_sections_nb] = *section;
2237 return phys_sections_nb++;
2240 static void phys_sections_clear(void)
2242 phys_sections_nb = 0;
2245 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2248 hwaddr base = section->offset_within_address_space
2250 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2251 MemoryRegionSection subsection = {
2252 .offset_within_address_space = base,
2253 .size = TARGET_PAGE_SIZE,
2257 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2259 if (!(existing->mr->subpage)) {
2260 subpage = subpage_init(base);
2261 subsection.mr = &subpage->iomem;
2262 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2263 phys_section_add(&subsection));
2265 subpage = container_of(existing->mr, subpage_t, iomem);
2267 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2268 end = start + section->size - 1;
2269 subpage_register(subpage, start, end, phys_section_add(section));
2273 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2275 hwaddr start_addr = section->offset_within_address_space;
2276 ram_addr_t size = section->size;
2278 uint16_t section_index = phys_section_add(section);
2283 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2287 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2289 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2290 MemoryRegionSection now = *section, remain = *section;
2292 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2293 || (now.size < TARGET_PAGE_SIZE)) {
2294 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2295 - now.offset_within_address_space,
2297 register_subpage(d, &now);
2298 remain.size -= now.size;
2299 remain.offset_within_address_space += now.size;
2300 remain.offset_within_region += now.size;
2302 while (remain.size >= TARGET_PAGE_SIZE) {
2304 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2305 now.size = TARGET_PAGE_SIZE;
2306 register_subpage(d, &now);
2308 now.size &= TARGET_PAGE_MASK;
2309 register_multipage(d, &now);
2311 remain.size -= now.size;
2312 remain.offset_within_address_space += now.size;
2313 remain.offset_within_region += now.size;
2317 register_subpage(d, &now);
2321 void qemu_flush_coalesced_mmio_buffer(void)
2324 kvm_flush_coalesced_mmio_buffer();
2327 #if defined(__linux__) && !defined(TARGET_S390X)
2329 #include <sys/vfs.h>
2331 #define HUGETLBFS_MAGIC 0x958458f6
2333 static long gethugepagesize(const char *path)
2339 ret = statfs(path, &fs);
2340 } while (ret != 0 && errno == EINTR);
2347 if (fs.f_type != HUGETLBFS_MAGIC)
2348 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2353 static void *file_ram_alloc(RAMBlock *block,
2363 unsigned long hpagesize;
2365 hpagesize = gethugepagesize(path);
2370 if (memory < hpagesize) {
2374 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2375 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2379 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2383 fd = mkstemp(filename);
2385 perror("unable to create backing store for hugepages");
2392 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2395 * ftruncate is not supported by hugetlbfs in older
2396 * hosts, so don't bother bailing out on errors.
2397 * If anything goes wrong with it under other filesystems,
2400 if (ftruncate(fd, memory))
2401 perror("ftruncate");
2404 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2405 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2406 * to sidestep this quirk.
2408 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2409 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2411 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2413 if (area == MAP_FAILED) {
2414 perror("file_ram_alloc: can't mmap RAM pages");
2423 static ram_addr_t find_ram_offset(ram_addr_t size)
2425 RAMBlock *block, *next_block;
2426 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2428 if (QLIST_EMPTY(&ram_list.blocks))
2431 QLIST_FOREACH(block, &ram_list.blocks, next) {
2432 ram_addr_t end, next = RAM_ADDR_MAX;
2434 end = block->offset + block->length;
2436 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2437 if (next_block->offset >= end) {
2438 next = MIN(next, next_block->offset);
2441 if (next - end >= size && next - end < mingap) {
2443 mingap = next - end;
2447 if (offset == RAM_ADDR_MAX) {
2448 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2456 ram_addr_t last_ram_offset(void)
2459 ram_addr_t last = 0;
2461 QLIST_FOREACH(block, &ram_list.blocks, next)
2462 last = MAX(last, block->offset + block->length);
2467 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2470 QemuOpts *machine_opts;
2472 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2473 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2475 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2476 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2478 perror("qemu_madvise");
2479 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2480 "but dump_guest_core=off specified\n");
2485 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2487 RAMBlock *new_block, *block;
2490 QLIST_FOREACH(block, &ram_list.blocks, next) {
2491 if (block->offset == addr) {
2497 assert(!new_block->idstr[0]);
2500 char *id = qdev_get_dev_path(dev);
2502 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2506 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2508 QLIST_FOREACH(block, &ram_list.blocks, next) {
2509 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2510 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2517 static int memory_try_enable_merging(void *addr, size_t len)
2521 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2522 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2523 /* disabled by the user */
2527 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2530 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2533 RAMBlock *new_block;
2535 size = TARGET_PAGE_ALIGN(size);
2536 new_block = g_malloc0(sizeof(*new_block));
2539 new_block->offset = find_ram_offset(size);
2541 new_block->host = host;
2542 new_block->flags |= RAM_PREALLOC_MASK;
2545 #if defined (__linux__) && !defined(TARGET_S390X)
2546 new_block->host = file_ram_alloc(new_block, size, mem_path);
2547 if (!new_block->host) {
2548 new_block->host = qemu_vmalloc(size);
2549 memory_try_enable_merging(new_block->host, size);
2552 fprintf(stderr, "-mem-path option unsupported\n");
2556 if (xen_enabled()) {
2557 xen_ram_alloc(new_block->offset, size, mr);
2558 } else if (kvm_enabled()) {
2559 /* some s390/kvm configurations have special constraints */
2560 new_block->host = kvm_vmalloc(size);
2562 new_block->host = qemu_vmalloc(size);
2564 memory_try_enable_merging(new_block->host, size);
2567 new_block->length = size;
2569 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2571 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2572 last_ram_offset() >> TARGET_PAGE_BITS);
2573 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2574 0, size >> TARGET_PAGE_BITS);
2575 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2577 qemu_ram_setup_dump(new_block->host, size);
2578 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2581 kvm_setup_guest_memory(new_block->host, size);
2583 return new_block->offset;
2586 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2588 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2591 void qemu_ram_free_from_ptr(ram_addr_t addr)
2595 QLIST_FOREACH(block, &ram_list.blocks, next) {
2596 if (addr == block->offset) {
2597 QLIST_REMOVE(block, next);
2604 void qemu_ram_free(ram_addr_t addr)
2608 QLIST_FOREACH(block, &ram_list.blocks, next) {
2609 if (addr == block->offset) {
2610 QLIST_REMOVE(block, next);
2611 if (block->flags & RAM_PREALLOC_MASK) {
2613 } else if (mem_path) {
2614 #if defined (__linux__) && !defined(TARGET_S390X)
2616 munmap(block->host, block->length);
2619 qemu_vfree(block->host);
2625 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2626 munmap(block->host, block->length);
2628 if (xen_enabled()) {
2629 xen_invalidate_map_cache_entry(block->host);
2631 qemu_vfree(block->host);
2643 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2650 QLIST_FOREACH(block, &ram_list.blocks, next) {
2651 offset = addr - block->offset;
2652 if (offset < block->length) {
2653 vaddr = block->host + offset;
2654 if (block->flags & RAM_PREALLOC_MASK) {
2658 munmap(vaddr, length);
2660 #if defined(__linux__) && !defined(TARGET_S390X)
2663 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2666 flags |= MAP_PRIVATE;
2668 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2669 flags, block->fd, offset);
2671 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2672 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2679 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2680 flags |= MAP_SHARED | MAP_ANONYMOUS;
2681 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2684 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2685 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2689 if (area != vaddr) {
2690 fprintf(stderr, "Could not remap addr: "
2691 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2695 memory_try_enable_merging(vaddr, length);
2696 qemu_ram_setup_dump(vaddr, length);
2702 #endif /* !_WIN32 */
2704 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2705 With the exception of the softmmu code in this file, this should
2706 only be used for local memory (e.g. video ram) that the device owns,
2707 and knows it isn't going to access beyond the end of the block.
2709 It should not be used for general purpose DMA.
2710 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2712 void *qemu_get_ram_ptr(ram_addr_t addr)
2716 QLIST_FOREACH(block, &ram_list.blocks, next) {
2717 if (addr - block->offset < block->length) {
2718 /* Move this entry to to start of the list. */
2719 if (block != QLIST_FIRST(&ram_list.blocks)) {
2720 QLIST_REMOVE(block, next);
2721 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2723 if (xen_enabled()) {
2724 /* We need to check if the requested address is in the RAM
2725 * because we don't want to map the entire memory in QEMU.
2726 * In that case just map until the end of the page.
2728 if (block->offset == 0) {
2729 return xen_map_cache(addr, 0, 0);
2730 } else if (block->host == NULL) {
2732 xen_map_cache(block->offset, block->length, 1);
2735 return block->host + (addr - block->offset);
2739 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2745 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2746 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2748 void *qemu_safe_ram_ptr(ram_addr_t addr)
2752 QLIST_FOREACH(block, &ram_list.blocks, next) {
2753 if (addr - block->offset < block->length) {
2754 if (xen_enabled()) {
2755 /* We need to check if the requested address is in the RAM
2756 * because we don't want to map the entire memory in QEMU.
2757 * In that case just map until the end of the page.
2759 if (block->offset == 0) {
2760 return xen_map_cache(addr, 0, 0);
2761 } else if (block->host == NULL) {
2763 xen_map_cache(block->offset, block->length, 1);
2766 return block->host + (addr - block->offset);
2770 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2776 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2777 * but takes a size argument */
2778 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2783 if (xen_enabled()) {
2784 return xen_map_cache(addr, *size, 1);
2788 QLIST_FOREACH(block, &ram_list.blocks, next) {
2789 if (addr - block->offset < block->length) {
2790 if (addr - block->offset + *size > block->length)
2791 *size = block->length - addr + block->offset;
2792 return block->host + (addr - block->offset);
2796 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2801 void qemu_put_ram_ptr(void *addr)
2803 trace_qemu_put_ram_ptr(addr);
2806 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2809 uint8_t *host = ptr;
2811 if (xen_enabled()) {
2812 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2816 QLIST_FOREACH(block, &ram_list.blocks, next) {
2817 /* This case append when the block is not mapped. */
2818 if (block->host == NULL) {
2821 if (host - block->host < block->length) {
2822 *ram_addr = block->offset + (host - block->host);
2830 /* Some of the softmmu routines need to translate from a host pointer
2831 (typically a TLB entry) back to a ram offset. */
2832 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2834 ram_addr_t ram_addr;
2836 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2837 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2843 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2846 #ifdef DEBUG_UNASSIGNED
2847 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2849 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2850 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2855 static void unassigned_mem_write(void *opaque, hwaddr addr,
2856 uint64_t val, unsigned size)
2858 #ifdef DEBUG_UNASSIGNED
2859 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2861 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2862 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2866 static const MemoryRegionOps unassigned_mem_ops = {
2867 .read = unassigned_mem_read,
2868 .write = unassigned_mem_write,
2869 .endianness = DEVICE_NATIVE_ENDIAN,
2872 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2878 static void error_mem_write(void *opaque, hwaddr addr,
2879 uint64_t value, unsigned size)
2884 static const MemoryRegionOps error_mem_ops = {
2885 .read = error_mem_read,
2886 .write = error_mem_write,
2887 .endianness = DEVICE_NATIVE_ENDIAN,
2890 static const MemoryRegionOps rom_mem_ops = {
2891 .read = error_mem_read,
2892 .write = unassigned_mem_write,
2893 .endianness = DEVICE_NATIVE_ENDIAN,
2896 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2897 uint64_t val, unsigned size)
2900 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2901 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2902 #if !defined(CONFIG_USER_ONLY)
2903 tb_invalidate_phys_page_fast(ram_addr, size);
2904 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2909 stb_p(qemu_get_ram_ptr(ram_addr), val);
2912 stw_p(qemu_get_ram_ptr(ram_addr), val);
2915 stl_p(qemu_get_ram_ptr(ram_addr), val);
2920 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2921 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2922 /* we remove the notdirty callback only if the code has been
2924 if (dirty_flags == 0xff)
2925 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2928 static const MemoryRegionOps notdirty_mem_ops = {
2929 .read = error_mem_read,
2930 .write = notdirty_mem_write,
2931 .endianness = DEVICE_NATIVE_ENDIAN,
2934 /* Generate a debug exception if a watchpoint has been hit. */
2935 static void check_watchpoint(int offset, int len_mask, int flags)
2937 CPUArchState *env = cpu_single_env;
2938 target_ulong pc, cs_base;
2939 TranslationBlock *tb;
2944 if (env->watchpoint_hit) {
2945 /* We re-entered the check after replacing the TB. Now raise
2946 * the debug interrupt so that is will trigger after the
2947 * current instruction. */
2948 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2951 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2952 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2953 if ((vaddr == (wp->vaddr & len_mask) ||
2954 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2955 wp->flags |= BP_WATCHPOINT_HIT;
2956 if (!env->watchpoint_hit) {
2957 env->watchpoint_hit = wp;
2958 tb = tb_find_pc(env->mem_io_pc);
2960 cpu_abort(env, "check_watchpoint: could not find TB for "
2961 "pc=%p", (void *)env->mem_io_pc);
2963 cpu_restore_state(tb, env, env->mem_io_pc);
2964 tb_phys_invalidate(tb, -1);
2965 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2966 env->exception_index = EXCP_DEBUG;
2969 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2970 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2971 cpu_resume_from_signal(env, NULL);
2975 wp->flags &= ~BP_WATCHPOINT_HIT;
2980 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2981 so these check for a hit then pass through to the normal out-of-line
2983 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2986 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2988 case 1: return ldub_phys(addr);
2989 case 2: return lduw_phys(addr);
2990 case 4: return ldl_phys(addr);
2995 static void watch_mem_write(void *opaque, hwaddr addr,
2996 uint64_t val, unsigned size)
2998 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
3001 stb_phys(addr, val);
3004 stw_phys(addr, val);
3007 stl_phys(addr, val);
3013 static const MemoryRegionOps watch_mem_ops = {
3014 .read = watch_mem_read,
3015 .write = watch_mem_write,
3016 .endianness = DEVICE_NATIVE_ENDIAN,
3019 static uint64_t subpage_read(void *opaque, hwaddr addr,
3022 subpage_t *mmio = opaque;
3023 unsigned int idx = SUBPAGE_IDX(addr);
3024 MemoryRegionSection *section;
3025 #if defined(DEBUG_SUBPAGE)
3026 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3027 mmio, len, addr, idx);
3030 section = &phys_sections[mmio->sub_section[idx]];
3032 addr -= section->offset_within_address_space;
3033 addr += section->offset_within_region;
3034 return io_mem_read(section->mr, addr, len);
3037 static void subpage_write(void *opaque, hwaddr addr,
3038 uint64_t value, unsigned len)
3040 subpage_t *mmio = opaque;
3041 unsigned int idx = SUBPAGE_IDX(addr);
3042 MemoryRegionSection *section;
3043 #if defined(DEBUG_SUBPAGE)
3044 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3045 " idx %d value %"PRIx64"\n",
3046 __func__, mmio, len, addr, idx, value);
3049 section = &phys_sections[mmio->sub_section[idx]];
3051 addr -= section->offset_within_address_space;
3052 addr += section->offset_within_region;
3053 io_mem_write(section->mr, addr, value, len);
3056 static const MemoryRegionOps subpage_ops = {
3057 .read = subpage_read,
3058 .write = subpage_write,
3059 .endianness = DEVICE_NATIVE_ENDIAN,
3062 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3065 ram_addr_t raddr = addr;
3066 void *ptr = qemu_get_ram_ptr(raddr);
3068 case 1: return ldub_p(ptr);
3069 case 2: return lduw_p(ptr);
3070 case 4: return ldl_p(ptr);
3075 static void subpage_ram_write(void *opaque, hwaddr addr,
3076 uint64_t value, unsigned size)
3078 ram_addr_t raddr = addr;
3079 void *ptr = qemu_get_ram_ptr(raddr);
3081 case 1: return stb_p(ptr, value);
3082 case 2: return stw_p(ptr, value);
3083 case 4: return stl_p(ptr, value);
3088 static const MemoryRegionOps subpage_ram_ops = {
3089 .read = subpage_ram_read,
3090 .write = subpage_ram_write,
3091 .endianness = DEVICE_NATIVE_ENDIAN,
3094 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3099 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3101 idx = SUBPAGE_IDX(start);
3102 eidx = SUBPAGE_IDX(end);
3103 #if defined(DEBUG_SUBPAGE)
3104 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3105 mmio, start, end, idx, eidx, memory);
3107 if (memory_region_is_ram(phys_sections[section].mr)) {
3108 MemoryRegionSection new_section = phys_sections[section];
3109 new_section.mr = &io_mem_subpage_ram;
3110 section = phys_section_add(&new_section);
3112 for (; idx <= eidx; idx++) {
3113 mmio->sub_section[idx] = section;
3119 static subpage_t *subpage_init(hwaddr base)
3123 mmio = g_malloc0(sizeof(subpage_t));
3126 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3127 "subpage", TARGET_PAGE_SIZE);
3128 mmio->iomem.subpage = true;
3129 #if defined(DEBUG_SUBPAGE)
3130 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3131 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3133 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3138 static uint16_t dummy_section(MemoryRegion *mr)
3140 MemoryRegionSection section = {
3142 .offset_within_address_space = 0,
3143 .offset_within_region = 0,
3147 return phys_section_add(§ion);
3150 MemoryRegion *iotlb_to_region(hwaddr index)
3152 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3155 static void io_mem_init(void)
3157 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3158 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3159 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3160 "unassigned", UINT64_MAX);
3161 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
3162 "notdirty", UINT64_MAX);
3163 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3164 "subpage-ram", UINT64_MAX);
3165 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3166 "watch", UINT64_MAX);
3169 static void mem_begin(MemoryListener *listener)
3171 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3173 destroy_all_mappings(d);
3174 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3177 static void core_begin(MemoryListener *listener)
3179 phys_sections_clear();
3180 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3181 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3182 phys_section_rom = dummy_section(&io_mem_rom);
3183 phys_section_watch = dummy_section(&io_mem_watch);
3186 static void tcg_commit(MemoryListener *listener)
3190 /* since each CPU stores ram addresses in its TLB cache, we must
3191 reset the modified entries */
3193 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3198 static void core_log_global_start(MemoryListener *listener)
3200 cpu_physical_memory_set_dirty_tracking(1);
3203 static void core_log_global_stop(MemoryListener *listener)
3205 cpu_physical_memory_set_dirty_tracking(0);
3208 static void io_region_add(MemoryListener *listener,
3209 MemoryRegionSection *section)
3211 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3213 mrio->mr = section->mr;
3214 mrio->offset = section->offset_within_region;
3215 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3216 section->offset_within_address_space, section->size);
3217 ioport_register(&mrio->iorange);
3220 static void io_region_del(MemoryListener *listener,
3221 MemoryRegionSection *section)
3223 isa_unassign_ioport(section->offset_within_address_space, section->size);
3226 static MemoryListener core_memory_listener = {
3227 .begin = core_begin,
3228 .log_global_start = core_log_global_start,
3229 .log_global_stop = core_log_global_stop,
3233 static MemoryListener io_memory_listener = {
3234 .region_add = io_region_add,
3235 .region_del = io_region_del,
3239 static MemoryListener tcg_memory_listener = {
3240 .commit = tcg_commit,
3243 void address_space_init_dispatch(AddressSpace *as)
3245 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3247 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3248 d->listener = (MemoryListener) {
3250 .region_add = mem_add,
3251 .region_nop = mem_add,
3255 memory_listener_register(&d->listener, as);
3258 void address_space_destroy_dispatch(AddressSpace *as)
3260 AddressSpaceDispatch *d = as->dispatch;
3262 memory_listener_unregister(&d->listener);
3263 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3265 as->dispatch = NULL;
3268 static void memory_map_init(void)
3270 system_memory = g_malloc(sizeof(*system_memory));
3271 memory_region_init(system_memory, "system", INT64_MAX);
3272 address_space_init(&address_space_memory, system_memory);
3273 address_space_memory.name = "memory";
3275 system_io = g_malloc(sizeof(*system_io));
3276 memory_region_init(system_io, "io", 65536);
3277 address_space_init(&address_space_io, system_io);
3278 address_space_io.name = "I/O";
3280 memory_listener_register(&core_memory_listener, &address_space_memory);
3281 memory_listener_register(&io_memory_listener, &address_space_io);
3282 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3285 MemoryRegion *get_system_memory(void)
3287 return system_memory;
3290 MemoryRegion *get_system_io(void)
3295 #endif /* !defined(CONFIG_USER_ONLY) */
3297 /* physical memory access (slow version, mainly for debug) */
3298 #if defined(CONFIG_USER_ONLY)
3299 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3300 uint8_t *buf, int len, int is_write)
3307 page = addr & TARGET_PAGE_MASK;
3308 l = (page + TARGET_PAGE_SIZE) - addr;
3311 flags = page_get_flags(page);
3312 if (!(flags & PAGE_VALID))
3315 if (!(flags & PAGE_WRITE))
3317 /* XXX: this code should not depend on lock_user */
3318 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3321 unlock_user(p, addr, l);
3323 if (!(flags & PAGE_READ))
3325 /* XXX: this code should not depend on lock_user */
3326 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3329 unlock_user(p, addr, 0);
3340 static void invalidate_and_set_dirty(hwaddr addr,
3343 if (!cpu_physical_memory_is_dirty(addr)) {
3344 /* invalidate code */
3345 tb_invalidate_phys_page_range(addr, addr + length, 0);
3347 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3349 xen_modified_memory(addr, length);
3352 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3353 int len, bool is_write)
3355 AddressSpaceDispatch *d = as->dispatch;
3360 MemoryRegionSection *section;
3363 page = addr & TARGET_PAGE_MASK;
3364 l = (page + TARGET_PAGE_SIZE) - addr;
3367 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3370 if (!memory_region_is_ram(section->mr)) {
3372 addr1 = memory_region_section_addr(section, addr);
3373 /* XXX: could force cpu_single_env to NULL to avoid
3375 if (l >= 4 && ((addr1 & 3) == 0)) {
3376 /* 32 bit write access */
3378 io_mem_write(section->mr, addr1, val, 4);
3380 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3381 /* 16 bit write access */
3383 io_mem_write(section->mr, addr1, val, 2);
3386 /* 8 bit write access */
3388 io_mem_write(section->mr, addr1, val, 1);
3391 } else if (!section->readonly) {
3393 addr1 = memory_region_get_ram_addr(section->mr)
3394 + memory_region_section_addr(section, addr);
3396 ptr = qemu_get_ram_ptr(addr1);
3397 memcpy(ptr, buf, l);
3398 invalidate_and_set_dirty(addr1, l);
3399 qemu_put_ram_ptr(ptr);
3402 if (!(memory_region_is_ram(section->mr) ||
3403 memory_region_is_romd(section->mr))) {
3406 addr1 = memory_region_section_addr(section, addr);
3407 if (l >= 4 && ((addr1 & 3) == 0)) {
3408 /* 32 bit read access */
3409 val = io_mem_read(section->mr, addr1, 4);
3412 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3413 /* 16 bit read access */
3414 val = io_mem_read(section->mr, addr1, 2);
3418 /* 8 bit read access */
3419 val = io_mem_read(section->mr, addr1, 1);
3425 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3426 + memory_region_section_addr(section,
3428 memcpy(buf, ptr, l);
3429 qemu_put_ram_ptr(ptr);
3438 void address_space_write(AddressSpace *as, hwaddr addr,
3439 const uint8_t *buf, int len)
3441 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3445 * address_space_read: read from an address space.
3447 * @as: #AddressSpace to be accessed
3448 * @addr: address within that address space
3449 * @buf: buffer with the data transferred
3451 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3453 address_space_rw(as, addr, buf, len, false);
3457 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3458 int len, int is_write)
3460 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3463 /* used for ROM loading : can write in RAM and ROM */
3464 void cpu_physical_memory_write_rom(hwaddr addr,
3465 const uint8_t *buf, int len)
3467 AddressSpaceDispatch *d = address_space_memory.dispatch;
3471 MemoryRegionSection *section;
3474 page = addr & TARGET_PAGE_MASK;
3475 l = (page + TARGET_PAGE_SIZE) - addr;
3478 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3480 if (!(memory_region_is_ram(section->mr) ||
3481 memory_region_is_romd(section->mr))) {
3484 unsigned long addr1;
3485 addr1 = memory_region_get_ram_addr(section->mr)
3486 + memory_region_section_addr(section, addr);
3488 ptr = qemu_get_ram_ptr(addr1);
3489 memcpy(ptr, buf, l);
3490 invalidate_and_set_dirty(addr1, l);
3491 qemu_put_ram_ptr(ptr);
3505 static BounceBuffer bounce;
3507 typedef struct MapClient {
3509 void (*callback)(void *opaque);
3510 QLIST_ENTRY(MapClient) link;
3513 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3514 = QLIST_HEAD_INITIALIZER(map_client_list);
3516 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3518 MapClient *client = g_malloc(sizeof(*client));
3520 client->opaque = opaque;
3521 client->callback = callback;
3522 QLIST_INSERT_HEAD(&map_client_list, client, link);
3526 void cpu_unregister_map_client(void *_client)
3528 MapClient *client = (MapClient *)_client;
3530 QLIST_REMOVE(client, link);
3534 static void cpu_notify_map_clients(void)
3538 while (!QLIST_EMPTY(&map_client_list)) {
3539 client = QLIST_FIRST(&map_client_list);
3540 client->callback(client->opaque);
3541 cpu_unregister_map_client(client);
3545 /* Map a physical memory region into a host virtual address.
3546 * May map a subset of the requested range, given by and returned in *plen.
3547 * May return NULL if resources needed to perform the mapping are exhausted.
3548 * Use only for reads OR writes - not for read-modify-write operations.
3549 * Use cpu_register_map_client() to know when retrying the map operation is
3550 * likely to succeed.
3552 void *address_space_map(AddressSpace *as,
3557 AddressSpaceDispatch *d = as->dispatch;
3562 MemoryRegionSection *section;
3563 ram_addr_t raddr = RAM_ADDR_MAX;
3568 page = addr & TARGET_PAGE_MASK;
3569 l = (page + TARGET_PAGE_SIZE) - addr;
3572 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3574 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3575 if (todo || bounce.buffer) {
3578 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3582 address_space_read(as, addr, bounce.buffer, l);
3586 return bounce.buffer;
3589 raddr = memory_region_get_ram_addr(section->mr)
3590 + memory_region_section_addr(section, addr);
3598 ret = qemu_ram_ptr_length(raddr, &rlen);
3603 /* Unmaps a memory region previously mapped by address_space_map().
3604 * Will also mark the memory as dirty if is_write == 1. access_len gives
3605 * the amount of memory that was actually read or written by the caller.
3607 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3608 int is_write, hwaddr access_len)
3610 if (buffer != bounce.buffer) {
3612 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3613 while (access_len) {
3615 l = TARGET_PAGE_SIZE;
3618 invalidate_and_set_dirty(addr1, l);
3623 if (xen_enabled()) {
3624 xen_invalidate_map_cache_entry(buffer);
3629 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3631 qemu_vfree(bounce.buffer);
3632 bounce.buffer = NULL;
3633 cpu_notify_map_clients();
3636 void *cpu_physical_memory_map(hwaddr addr,
3640 return address_space_map(&address_space_memory, addr, plen, is_write);
3643 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3644 int is_write, hwaddr access_len)
3646 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3649 /* warning: addr must be aligned */
3650 static inline uint32_t ldl_phys_internal(hwaddr addr,
3651 enum device_endian endian)
3655 MemoryRegionSection *section;
3657 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3659 if (!(memory_region_is_ram(section->mr) ||
3660 memory_region_is_romd(section->mr))) {
3662 addr = memory_region_section_addr(section, addr);
3663 val = io_mem_read(section->mr, addr, 4);
3664 #if defined(TARGET_WORDS_BIGENDIAN)
3665 if (endian == DEVICE_LITTLE_ENDIAN) {
3669 if (endian == DEVICE_BIG_ENDIAN) {
3675 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3677 + memory_region_section_addr(section, addr));
3679 case DEVICE_LITTLE_ENDIAN:
3680 val = ldl_le_p(ptr);
3682 case DEVICE_BIG_ENDIAN:
3683 val = ldl_be_p(ptr);
3693 uint32_t ldl_phys(hwaddr addr)
3695 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3698 uint32_t ldl_le_phys(hwaddr addr)
3700 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3703 uint32_t ldl_be_phys(hwaddr addr)
3705 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3708 /* warning: addr must be aligned */
3709 static inline uint64_t ldq_phys_internal(hwaddr addr,
3710 enum device_endian endian)
3714 MemoryRegionSection *section;
3716 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3718 if (!(memory_region_is_ram(section->mr) ||
3719 memory_region_is_romd(section->mr))) {
3721 addr = memory_region_section_addr(section, addr);
3723 /* XXX This is broken when device endian != cpu endian.
3724 Fix and add "endian" variable check */
3725 #ifdef TARGET_WORDS_BIGENDIAN
3726 val = io_mem_read(section->mr, addr, 4) << 32;
3727 val |= io_mem_read(section->mr, addr + 4, 4);
3729 val = io_mem_read(section->mr, addr, 4);
3730 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3734 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3736 + memory_region_section_addr(section, addr));
3738 case DEVICE_LITTLE_ENDIAN:
3739 val = ldq_le_p(ptr);
3741 case DEVICE_BIG_ENDIAN:
3742 val = ldq_be_p(ptr);
3752 uint64_t ldq_phys(hwaddr addr)
3754 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3757 uint64_t ldq_le_phys(hwaddr addr)
3759 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3762 uint64_t ldq_be_phys(hwaddr addr)
3764 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3768 uint32_t ldub_phys(hwaddr addr)
3771 cpu_physical_memory_read(addr, &val, 1);
3775 /* warning: addr must be aligned */
3776 static inline uint32_t lduw_phys_internal(hwaddr addr,
3777 enum device_endian endian)
3781 MemoryRegionSection *section;
3783 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3785 if (!(memory_region_is_ram(section->mr) ||
3786 memory_region_is_romd(section->mr))) {
3788 addr = memory_region_section_addr(section, addr);
3789 val = io_mem_read(section->mr, addr, 2);
3790 #if defined(TARGET_WORDS_BIGENDIAN)
3791 if (endian == DEVICE_LITTLE_ENDIAN) {
3795 if (endian == DEVICE_BIG_ENDIAN) {
3801 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3803 + memory_region_section_addr(section, addr));
3805 case DEVICE_LITTLE_ENDIAN:
3806 val = lduw_le_p(ptr);
3808 case DEVICE_BIG_ENDIAN:
3809 val = lduw_be_p(ptr);
3819 uint32_t lduw_phys(hwaddr addr)
3821 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3824 uint32_t lduw_le_phys(hwaddr addr)
3826 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3829 uint32_t lduw_be_phys(hwaddr addr)
3831 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3834 /* warning: addr must be aligned. The ram page is not masked as dirty
3835 and the code inside is not invalidated. It is useful if the dirty
3836 bits are used to track modified PTEs */
3837 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3840 MemoryRegionSection *section;
3842 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3844 if (!memory_region_is_ram(section->mr) || section->readonly) {
3845 addr = memory_region_section_addr(section, addr);
3846 if (memory_region_is_ram(section->mr)) {
3847 section = &phys_sections[phys_section_rom];
3849 io_mem_write(section->mr, addr, val, 4);
3851 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3853 + memory_region_section_addr(section, addr);
3854 ptr = qemu_get_ram_ptr(addr1);
3857 if (unlikely(in_migration)) {
3858 if (!cpu_physical_memory_is_dirty(addr1)) {
3859 /* invalidate code */
3860 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3862 cpu_physical_memory_set_dirty_flags(
3863 addr1, (0xff & ~CODE_DIRTY_FLAG));
3869 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3872 MemoryRegionSection *section;
3874 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3876 if (!memory_region_is_ram(section->mr) || section->readonly) {
3877 addr = memory_region_section_addr(section, addr);
3878 if (memory_region_is_ram(section->mr)) {
3879 section = &phys_sections[phys_section_rom];
3881 #ifdef TARGET_WORDS_BIGENDIAN
3882 io_mem_write(section->mr, addr, val >> 32, 4);
3883 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3885 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3886 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3889 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3891 + memory_region_section_addr(section, addr));
3896 /* warning: addr must be aligned */
3897 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3898 enum device_endian endian)
3901 MemoryRegionSection *section;
3903 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3905 if (!memory_region_is_ram(section->mr) || section->readonly) {
3906 addr = memory_region_section_addr(section, addr);
3907 if (memory_region_is_ram(section->mr)) {
3908 section = &phys_sections[phys_section_rom];
3910 #if defined(TARGET_WORDS_BIGENDIAN)
3911 if (endian == DEVICE_LITTLE_ENDIAN) {
3915 if (endian == DEVICE_BIG_ENDIAN) {
3919 io_mem_write(section->mr, addr, val, 4);
3921 unsigned long addr1;
3922 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3923 + memory_region_section_addr(section, addr);
3925 ptr = qemu_get_ram_ptr(addr1);
3927 case DEVICE_LITTLE_ENDIAN:
3930 case DEVICE_BIG_ENDIAN:
3937 invalidate_and_set_dirty(addr1, 4);
3941 void stl_phys(hwaddr addr, uint32_t val)
3943 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3946 void stl_le_phys(hwaddr addr, uint32_t val)
3948 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3951 void stl_be_phys(hwaddr addr, uint32_t val)
3953 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3957 void stb_phys(hwaddr addr, uint32_t val)
3960 cpu_physical_memory_write(addr, &v, 1);
3963 /* warning: addr must be aligned */
3964 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3965 enum device_endian endian)
3968 MemoryRegionSection *section;
3970 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3972 if (!memory_region_is_ram(section->mr) || section->readonly) {
3973 addr = memory_region_section_addr(section, addr);
3974 if (memory_region_is_ram(section->mr)) {
3975 section = &phys_sections[phys_section_rom];
3977 #if defined(TARGET_WORDS_BIGENDIAN)
3978 if (endian == DEVICE_LITTLE_ENDIAN) {
3982 if (endian == DEVICE_BIG_ENDIAN) {
3986 io_mem_write(section->mr, addr, val, 2);
3988 unsigned long addr1;
3989 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3990 + memory_region_section_addr(section, addr);
3992 ptr = qemu_get_ram_ptr(addr1);
3994 case DEVICE_LITTLE_ENDIAN:
3997 case DEVICE_BIG_ENDIAN:
4004 invalidate_and_set_dirty(addr1, 2);
4008 void stw_phys(hwaddr addr, uint32_t val)
4010 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4013 void stw_le_phys(hwaddr addr, uint32_t val)
4015 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4018 void stw_be_phys(hwaddr addr, uint32_t val)
4020 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4024 void stq_phys(hwaddr addr, uint64_t val)
4027 cpu_physical_memory_write(addr, &val, 8);
4030 void stq_le_phys(hwaddr addr, uint64_t val)
4032 val = cpu_to_le64(val);
4033 cpu_physical_memory_write(addr, &val, 8);
4036 void stq_be_phys(hwaddr addr, uint64_t val)
4038 val = cpu_to_be64(val);
4039 cpu_physical_memory_write(addr, &val, 8);
4042 /* virtual memory access for debug (includes writing to ROM) */
4043 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4044 uint8_t *buf, int len, int is_write)
4051 page = addr & TARGET_PAGE_MASK;
4052 phys_addr = cpu_get_phys_page_debug(env, page);
4053 /* if no physical page mapped, return an error */
4054 if (phys_addr == -1)
4056 l = (page + TARGET_PAGE_SIZE) - addr;
4059 phys_addr += (addr & ~TARGET_PAGE_MASK);
4061 cpu_physical_memory_write_rom(phys_addr, buf, l);
4063 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4072 /* in deterministic execution mode, instructions doing device I/Os
4073 must be at the end of the TB */
4074 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4076 TranslationBlock *tb;
4078 target_ulong pc, cs_base;
4081 tb = tb_find_pc(retaddr);
4083 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4086 n = env->icount_decr.u16.low + tb->icount;
4087 cpu_restore_state(tb, env, retaddr);
4088 /* Calculate how many instructions had been executed before the fault
4090 n = n - env->icount_decr.u16.low;
4091 /* Generate a new TB ending on the I/O insn. */
4093 /* On MIPS and SH, delay slot instructions can only be restarted if
4094 they were already the first instruction in the TB. If this is not
4095 the first instruction in a TB then re-execute the preceding
4097 #if defined(TARGET_MIPS)
4098 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4099 env->active_tc.PC -= 4;
4100 env->icount_decr.u16.low++;
4101 env->hflags &= ~MIPS_HFLAG_BMASK;
4103 #elif defined(TARGET_SH4)
4104 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4107 env->icount_decr.u16.low++;
4108 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4111 /* This should never happen. */
4112 if (n > CF_COUNT_MASK)
4113 cpu_abort(env, "TB too big during recompile");
4115 cflags = n | CF_LAST_IO;
4117 cs_base = tb->cs_base;
4119 tb_phys_invalidate(tb, -1);
4120 /* FIXME: In theory this could raise an exception. In practice
4121 we have already translated the block once so it's probably ok. */
4122 tb_gen_code(env, pc, cs_base, flags, cflags);
4123 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4124 the first in the TB) then we end up generating a whole new TB and
4125 repeating the fault, which is horribly inefficient.
4126 Better would be to execute just this insn uncached, or generate a
4128 cpu_resume_from_signal(env, NULL);
4131 #if !defined(CONFIG_USER_ONLY)
4133 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4135 int i, target_code_size, max_target_code_size;
4136 int direct_jmp_count, direct_jmp2_count, cross_page;
4137 TranslationBlock *tb;
4139 target_code_size = 0;
4140 max_target_code_size = 0;
4142 direct_jmp_count = 0;
4143 direct_jmp2_count = 0;
4144 for(i = 0; i < nb_tbs; i++) {
4146 target_code_size += tb->size;
4147 if (tb->size > max_target_code_size)
4148 max_target_code_size = tb->size;
4149 if (tb->page_addr[1] != -1)
4151 if (tb->tb_next_offset[0] != 0xffff) {
4153 if (tb->tb_next_offset[1] != 0xffff) {
4154 direct_jmp2_count++;
4158 /* XXX: avoid using doubles ? */
4159 cpu_fprintf(f, "Translation buffer state:\n");
4160 cpu_fprintf(f, "gen code size %td/%zd\n",
4161 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4162 cpu_fprintf(f, "TB count %d/%d\n",
4163 nb_tbs, code_gen_max_blocks);
4164 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4165 nb_tbs ? target_code_size / nb_tbs : 0,
4166 max_target_code_size);
4167 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4168 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4169 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4170 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4172 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4173 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4175 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4177 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4178 cpu_fprintf(f, "\nStatistics:\n");
4179 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4180 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4181 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4182 tcg_dump_info(f, cpu_fprintf);
4186 * A helper function for the _utterly broken_ virtio device model to find out if
4187 * it's running on a big endian machine. Don't do this at home kids!
4189 bool virtio_is_big_endian(void);
4190 bool virtio_is_big_endian(void)
4192 #if defined(TARGET_WORDS_BIGENDIAN)
4201 #ifndef CONFIG_USER_ONLY
4202 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4204 MemoryRegionSection *section;
4206 section = phys_page_find(address_space_memory.dispatch,
4207 phys_addr >> TARGET_PAGE_BITS);
4209 return !(memory_region_is_ram(section->mr) ||
4210 memory_region_is_romd(section->mr));