2 * virtual page mapping and translated block handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
23 #include <sys/types.h>
27 #include "qemu-common.h"
35 #include "qemu-timer.h"
37 #include "exec-memory.h"
38 #if defined(CONFIG_USER_ONLY)
40 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
41 #include <sys/param.h>
42 #if __FreeBSD_version >= 700104
43 #define HAVE_KINFO_GETVMMAP
44 #define sigqueue sigqueue_freebsd /* avoid redefinition */
47 #include <machine/profile.h>
55 #else /* !CONFIG_USER_ONLY */
56 #include "xen-mapcache.h"
62 #include "memory-internal.h"
64 //#define DEBUG_TB_INVALIDATE
66 //#define DEBUG_UNASSIGNED
68 /* make various TB consistency checks */
69 //#define DEBUG_TB_CHECK
71 //#define DEBUG_IOPORT
72 //#define DEBUG_SUBPAGE
74 #if !defined(CONFIG_USER_ONLY)
75 /* TB consistency checks only implemented for usermode emulation. */
79 #define SMC_BITMAP_USE_THRESHOLD 10
81 static TranslationBlock *tbs;
82 static int code_gen_max_blocks;
83 TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
85 /* any access to the tbs or the page table must use this lock */
86 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
88 uint8_t *code_gen_prologue;
89 static uint8_t *code_gen_buffer;
90 static size_t code_gen_buffer_size;
91 /* threshold to flush the translated code buffer */
92 static size_t code_gen_buffer_max_size;
93 static uint8_t *code_gen_ptr;
95 #if !defined(CONFIG_USER_ONLY)
97 static int in_migration;
99 RAMList ram_list = { .blocks = QLIST_HEAD_INITIALIZER(ram_list.blocks) };
101 static MemoryRegion *system_memory;
102 static MemoryRegion *system_io;
104 AddressSpace address_space_io;
105 AddressSpace address_space_memory;
107 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
108 static MemoryRegion io_mem_subpage_ram;
112 CPUArchState *first_cpu;
113 /* current CPU in the current thread. It is only valid inside
115 DEFINE_TLS(CPUArchState *,cpu_single_env);
116 /* 0 = Do not count executed instructions.
117 1 = Precise instruction counting.
118 2 = Adaptive rate instruction counting. */
121 typedef struct PageDesc {
122 /* list of TBs intersecting this ram page */
123 TranslationBlock *first_tb;
124 /* in order to optimize self modifying code, we count the number
125 of lookups we do to a given page to use a bitmap */
126 unsigned int code_write_count;
127 uint8_t *code_bitmap;
128 #if defined(CONFIG_USER_ONLY)
133 /* In system mode we want L1_MAP to be based on ram offsets,
134 while in user mode we want it to be based on virtual addresses. */
135 #if !defined(CONFIG_USER_ONLY)
136 #if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
137 # define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
139 # define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
142 # define L1_MAP_ADDR_SPACE_BITS TARGET_VIRT_ADDR_SPACE_BITS
145 /* Size of the L2 (and L3, etc) page tables. */
147 #define L2_SIZE (1 << L2_BITS)
149 #define P_L2_LEVELS \
150 (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
152 /* The bits remaining after N lower levels of page tables. */
153 #define V_L1_BITS_REM \
154 ((L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % L2_BITS)
156 #if V_L1_BITS_REM < 4
157 #define V_L1_BITS (V_L1_BITS_REM + L2_BITS)
159 #define V_L1_BITS V_L1_BITS_REM
162 #define V_L1_SIZE ((target_ulong)1 << V_L1_BITS)
164 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
166 uintptr_t qemu_real_host_page_size;
167 uintptr_t qemu_host_page_size;
168 uintptr_t qemu_host_page_mask;
170 /* This is a multi-level map on the virtual address space.
171 The bottom level has pointers to PageDesc. */
172 static void *l1_map[V_L1_SIZE];
174 #if !defined(CONFIG_USER_ONLY)
176 static MemoryRegionSection *phys_sections;
177 static unsigned phys_sections_nb, phys_sections_nb_alloc;
178 static uint16_t phys_section_unassigned;
179 static uint16_t phys_section_notdirty;
180 static uint16_t phys_section_rom;
181 static uint16_t phys_section_watch;
183 /* Simple allocator for PhysPageEntry nodes */
184 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
185 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
187 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
189 static void io_mem_init(void);
190 static void memory_map_init(void);
192 static MemoryRegion io_mem_watch;
196 static int tb_flush_count;
197 static int tb_phys_invalidate_count;
200 static inline void map_exec(void *addr, long size)
203 VirtualProtect(addr, size,
204 PAGE_EXECUTE_READWRITE, &old_protect);
208 static inline void map_exec(void *addr, long size)
210 unsigned long start, end, page_size;
212 page_size = getpagesize();
213 start = (unsigned long)addr;
214 start &= ~(page_size - 1);
216 end = (unsigned long)addr + size;
217 end += page_size - 1;
218 end &= ~(page_size - 1);
220 mprotect((void *)start, end - start,
221 PROT_READ | PROT_WRITE | PROT_EXEC);
225 static void page_init(void)
227 /* NOTE: we can always suppose that qemu_host_page_size >=
231 SYSTEM_INFO system_info;
233 GetSystemInfo(&system_info);
234 qemu_real_host_page_size = system_info.dwPageSize;
237 qemu_real_host_page_size = getpagesize();
239 if (qemu_host_page_size == 0)
240 qemu_host_page_size = qemu_real_host_page_size;
241 if (qemu_host_page_size < TARGET_PAGE_SIZE)
242 qemu_host_page_size = TARGET_PAGE_SIZE;
243 qemu_host_page_mask = ~(qemu_host_page_size - 1);
245 #if defined(CONFIG_BSD) && defined(CONFIG_USER_ONLY)
247 #ifdef HAVE_KINFO_GETVMMAP
248 struct kinfo_vmentry *freep;
251 freep = kinfo_getvmmap(getpid(), &cnt);
254 for (i = 0; i < cnt; i++) {
255 unsigned long startaddr, endaddr;
257 startaddr = freep[i].kve_start;
258 endaddr = freep[i].kve_end;
259 if (h2g_valid(startaddr)) {
260 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
262 if (h2g_valid(endaddr)) {
263 endaddr = h2g(endaddr);
264 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
266 #if TARGET_ABI_BITS <= L1_MAP_ADDR_SPACE_BITS
268 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
279 last_brk = (unsigned long)sbrk(0);
281 f = fopen("/compat/linux/proc/self/maps", "r");
286 unsigned long startaddr, endaddr;
289 n = fscanf (f, "%lx-%lx %*[^\n]\n", &startaddr, &endaddr);
291 if (n == 2 && h2g_valid(startaddr)) {
292 startaddr = h2g(startaddr) & TARGET_PAGE_MASK;
294 if (h2g_valid(endaddr)) {
295 endaddr = h2g(endaddr);
299 page_set_flags(startaddr, endaddr, PAGE_RESERVED);
311 static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc)
317 #if defined(CONFIG_USER_ONLY)
318 /* We can't use g_malloc because it may recurse into a locked mutex. */
319 # define ALLOC(P, SIZE) \
321 P = mmap(NULL, SIZE, PROT_READ | PROT_WRITE, \
322 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); \
325 # define ALLOC(P, SIZE) \
326 do { P = g_malloc0(SIZE); } while (0)
329 /* Level 1. Always allocated. */
330 lp = l1_map + ((index >> V_L1_SHIFT) & (V_L1_SIZE - 1));
333 for (i = V_L1_SHIFT / L2_BITS - 1; i > 0; i--) {
340 ALLOC(p, sizeof(void *) * L2_SIZE);
344 lp = p + ((index >> (i * L2_BITS)) & (L2_SIZE - 1));
352 ALLOC(pd, sizeof(PageDesc) * L2_SIZE);
358 return pd + (index & (L2_SIZE - 1));
361 static inline PageDesc *page_find(tb_page_addr_t index)
363 return page_find_alloc(index, 0);
366 #if !defined(CONFIG_USER_ONLY)
368 static void phys_map_node_reserve(unsigned nodes)
370 if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
371 typedef PhysPageEntry Node[L2_SIZE];
372 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
373 phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
374 phys_map_nodes_nb + nodes);
375 phys_map_nodes = g_renew(Node, phys_map_nodes,
376 phys_map_nodes_nb_alloc);
380 static uint16_t phys_map_node_alloc(void)
385 ret = phys_map_nodes_nb++;
386 assert(ret != PHYS_MAP_NODE_NIL);
387 assert(ret != phys_map_nodes_nb_alloc);
388 for (i = 0; i < L2_SIZE; ++i) {
389 phys_map_nodes[ret][i].is_leaf = 0;
390 phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
395 static void phys_map_nodes_reset(void)
397 phys_map_nodes_nb = 0;
401 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
402 hwaddr *nb, uint16_t leaf,
407 hwaddr step = (hwaddr)1 << (level * L2_BITS);
409 if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
410 lp->ptr = phys_map_node_alloc();
411 p = phys_map_nodes[lp->ptr];
413 for (i = 0; i < L2_SIZE; i++) {
415 p[i].ptr = phys_section_unassigned;
419 p = phys_map_nodes[lp->ptr];
421 lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
423 while (*nb && lp < &p[L2_SIZE]) {
424 if ((*index & (step - 1)) == 0 && *nb >= step) {
430 phys_page_set_level(lp, index, nb, leaf, level - 1);
436 static void phys_page_set(AddressSpaceDispatch *d,
437 hwaddr index, hwaddr nb,
440 /* Wildly overreserve - it doesn't matter much. */
441 phys_map_node_reserve(3 * P_L2_LEVELS);
443 phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
446 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
448 PhysPageEntry lp = d->phys_map;
451 uint16_t s_index = phys_section_unassigned;
453 for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
454 if (lp.ptr == PHYS_MAP_NODE_NIL) {
457 p = phys_map_nodes[lp.ptr];
458 lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
463 return &phys_sections[s_index];
466 bool memory_region_is_unassigned(MemoryRegion *mr)
468 return mr != &io_mem_ram && mr != &io_mem_rom
469 && mr != &io_mem_notdirty && !mr->rom_device
470 && mr != &io_mem_watch;
473 #define mmap_lock() do { } while(0)
474 #define mmap_unlock() do { } while(0)
477 #if defined(CONFIG_USER_ONLY)
478 /* Currently it is not recommended to allocate big chunks of data in
479 user mode. It will change when a dedicated libc will be used. */
480 /* ??? 64-bit hosts ought to have no problem mmaping data outside the
481 region in which the guest needs to run. Revisit this. */
482 #define USE_STATIC_CODE_GEN_BUFFER
485 /* ??? Should configure for this, not list operating systems here. */
486 #if (defined(__linux__) \
487 || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
488 || defined(__DragonFly__) || defined(__OpenBSD__) \
489 || defined(__NetBSD__))
493 /* Minimum size of the code gen buffer. This number is randomly chosen,
494 but not so small that we can't have a fair number of TB's live. */
495 #define MIN_CODE_GEN_BUFFER_SIZE (1024u * 1024)
497 /* Maximum size of the code gen buffer we'd like to use. Unless otherwise
498 indicated, this is constrained by the range of direct branches on the
499 host cpu, as used by the TCG implementation of goto_tb. */
500 #if defined(__x86_64__)
501 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
502 #elif defined(__sparc__)
503 # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
504 #elif defined(__arm__)
505 # define MAX_CODE_GEN_BUFFER_SIZE (16u * 1024 * 1024)
506 #elif defined(__s390x__)
507 /* We have a +- 4GB range on the branches; leave some slop. */
508 # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
510 # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
513 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
515 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
516 (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
517 ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
519 static inline size_t size_code_gen_buffer(size_t tb_size)
521 /* Size the buffer. */
523 #ifdef USE_STATIC_CODE_GEN_BUFFER
524 tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
526 /* ??? Needs adjustments. */
527 /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
528 static buffer, we could size this on RESERVED_VA, on the text
529 segment size of the executable, or continue to use the default. */
530 tb_size = (unsigned long)(ram_size / 4);
533 if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
534 tb_size = MIN_CODE_GEN_BUFFER_SIZE;
536 if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
537 tb_size = MAX_CODE_GEN_BUFFER_SIZE;
539 code_gen_buffer_size = tb_size;
543 #ifdef USE_STATIC_CODE_GEN_BUFFER
544 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
545 __attribute__((aligned(CODE_GEN_ALIGN)));
547 static inline void *alloc_code_gen_buffer(void)
549 map_exec(static_code_gen_buffer, code_gen_buffer_size);
550 return static_code_gen_buffer;
552 #elif defined(USE_MMAP)
553 static inline void *alloc_code_gen_buffer(void)
555 int flags = MAP_PRIVATE | MAP_ANONYMOUS;
559 /* Constrain the position of the buffer based on the host cpu.
560 Note that these addresses are chosen in concert with the
561 addresses assigned in the relevant linker script file. */
562 # if defined(__PIE__) || defined(__PIC__)
563 /* Don't bother setting a preferred location if we're building
564 a position-independent executable. We're more likely to get
565 an address near the main executable if we let the kernel
566 choose the address. */
567 # elif defined(__x86_64__) && defined(MAP_32BIT)
568 /* Force the memory down into low memory with the executable.
569 Leave the choice of exact location with the kernel. */
571 /* Cannot expect to map more than 800MB in low memory. */
572 if (code_gen_buffer_size > 800u * 1024 * 1024) {
573 code_gen_buffer_size = 800u * 1024 * 1024;
575 # elif defined(__sparc__)
576 start = 0x40000000ul;
577 # elif defined(__s390x__)
578 start = 0x90000000ul;
581 buf = mmap((void *)start, code_gen_buffer_size,
582 PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
583 return buf == MAP_FAILED ? NULL : buf;
586 static inline void *alloc_code_gen_buffer(void)
588 void *buf = g_malloc(code_gen_buffer_size);
590 map_exec(buf, code_gen_buffer_size);
594 #endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
596 static inline void code_gen_alloc(size_t tb_size)
598 code_gen_buffer_size = size_code_gen_buffer(tb_size);
599 code_gen_buffer = alloc_code_gen_buffer();
600 if (code_gen_buffer == NULL) {
601 fprintf(stderr, "Could not allocate dynamic translator buffer\n");
605 /* Steal room for the prologue at the end of the buffer. This ensures
606 (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
607 from TB's to the prologue are going to be in range. It also means
608 that we don't need to mark (additional) portions of the data segment
610 code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
611 code_gen_buffer_size -= 1024;
613 code_gen_buffer_max_size = code_gen_buffer_size -
614 (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
615 code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
616 tbs = g_malloc(code_gen_max_blocks * sizeof(TranslationBlock));
619 /* Must be called before using the QEMU cpus. 'tb_size' is the size
620 (in bytes) allocated to the translation buffer. Zero means default
622 void tcg_exec_init(unsigned long tb_size)
625 code_gen_alloc(tb_size);
626 code_gen_ptr = code_gen_buffer;
627 tcg_register_jit(code_gen_buffer, code_gen_buffer_size);
629 #if !defined(CONFIG_USER_ONLY) || !defined(CONFIG_USE_GUEST_BASE)
630 /* There's no guest base to take into account, so go ahead and
631 initialize the prologue now. */
632 tcg_prologue_init(&tcg_ctx);
636 bool tcg_enabled(void)
638 return code_gen_buffer != NULL;
641 void cpu_exec_init_all(void)
643 #if !defined(CONFIG_USER_ONLY)
649 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
651 static int cpu_common_post_load(void *opaque, int version_id)
653 CPUArchState *env = opaque;
655 /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
656 version_id is increased. */
657 env->interrupt_request &= ~0x01;
663 static const VMStateDescription vmstate_cpu_common = {
664 .name = "cpu_common",
666 .minimum_version_id = 1,
667 .minimum_version_id_old = 1,
668 .post_load = cpu_common_post_load,
669 .fields = (VMStateField []) {
670 VMSTATE_UINT32(halted, CPUArchState),
671 VMSTATE_UINT32(interrupt_request, CPUArchState),
672 VMSTATE_END_OF_LIST()
677 CPUArchState *qemu_get_cpu(int cpu)
679 CPUArchState *env = first_cpu;
682 if (env->cpu_index == cpu)
690 void cpu_exec_init(CPUArchState *env)
695 #if defined(CONFIG_USER_ONLY)
698 env->next_cpu = NULL;
701 while (*penv != NULL) {
702 penv = &(*penv)->next_cpu;
705 env->cpu_index = cpu_index;
707 QTAILQ_INIT(&env->breakpoints);
708 QTAILQ_INIT(&env->watchpoints);
709 #ifndef CONFIG_USER_ONLY
710 env->thread_id = qemu_get_thread_id();
713 #if defined(CONFIG_USER_ONLY)
716 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
717 vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
718 register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
719 cpu_save, cpu_load, env);
723 /* Allocate a new translation block. Flush the translation buffer if
724 too many translation blocks or too much generated code. */
725 static TranslationBlock *tb_alloc(target_ulong pc)
727 TranslationBlock *tb;
729 if (nb_tbs >= code_gen_max_blocks ||
730 (code_gen_ptr - code_gen_buffer) >= code_gen_buffer_max_size)
738 void tb_free(TranslationBlock *tb)
740 /* In practice this is mostly used for single use temporary TB
741 Ignore the hard cases and just back up if this TB happens to
742 be the last one generated. */
743 if (nb_tbs > 0 && tb == &tbs[nb_tbs - 1]) {
744 code_gen_ptr = tb->tc_ptr;
749 static inline void invalidate_page_bitmap(PageDesc *p)
751 if (p->code_bitmap) {
752 g_free(p->code_bitmap);
753 p->code_bitmap = NULL;
755 p->code_write_count = 0;
758 /* Set to NULL all the 'first_tb' fields in all PageDescs. */
760 static void page_flush_tb_1 (int level, void **lp)
769 for (i = 0; i < L2_SIZE; ++i) {
770 pd[i].first_tb = NULL;
771 invalidate_page_bitmap(pd + i);
775 for (i = 0; i < L2_SIZE; ++i) {
776 page_flush_tb_1 (level - 1, pp + i);
781 static void page_flush_tb(void)
784 for (i = 0; i < V_L1_SIZE; i++) {
785 page_flush_tb_1(V_L1_SHIFT / L2_BITS - 1, l1_map + i);
789 /* flush all the translation blocks */
790 /* XXX: tb_flush is currently not thread safe */
791 void tb_flush(CPUArchState *env1)
794 #if defined(DEBUG_FLUSH)
795 printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
796 (unsigned long)(code_gen_ptr - code_gen_buffer),
798 ((unsigned long)(code_gen_ptr - code_gen_buffer)) / nb_tbs : 0);
800 if ((unsigned long)(code_gen_ptr - code_gen_buffer) > code_gen_buffer_size)
801 cpu_abort(env1, "Internal error: code buffer overflow\n");
805 for(env = first_cpu; env != NULL; env = env->next_cpu) {
806 memset (env->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof (void *));
809 memset (tb_phys_hash, 0, CODE_GEN_PHYS_HASH_SIZE * sizeof (void *));
812 code_gen_ptr = code_gen_buffer;
813 /* XXX: flush processor icache at this point if cache flush is
818 #ifdef DEBUG_TB_CHECK
820 static void tb_invalidate_check(target_ulong address)
822 TranslationBlock *tb;
824 address &= TARGET_PAGE_MASK;
825 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
826 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
827 if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
828 address >= tb->pc + tb->size)) {
829 printf("ERROR invalidate: address=" TARGET_FMT_lx
830 " PC=%08lx size=%04x\n",
831 address, (long)tb->pc, tb->size);
837 /* verify that all the pages have correct rights for code */
838 static void tb_page_check(void)
840 TranslationBlock *tb;
841 int i, flags1, flags2;
843 for(i = 0;i < CODE_GEN_PHYS_HASH_SIZE; i++) {
844 for(tb = tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
845 flags1 = page_get_flags(tb->pc);
846 flags2 = page_get_flags(tb->pc + tb->size - 1);
847 if ((flags1 & PAGE_WRITE) || (flags2 & PAGE_WRITE)) {
848 printf("ERROR page flags: PC=%08lx size=%04x f1=%x f2=%x\n",
849 (long)tb->pc, tb->size, flags1, flags2);
857 /* invalidate one TB */
858 static inline void tb_remove(TranslationBlock **ptb, TranslationBlock *tb,
861 TranslationBlock *tb1;
865 *ptb = *(TranslationBlock **)((char *)tb1 + next_offset);
868 ptb = (TranslationBlock **)((char *)tb1 + next_offset);
872 static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
874 TranslationBlock *tb1;
879 n1 = (uintptr_t)tb1 & 3;
880 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
882 *ptb = tb1->page_next[n1];
885 ptb = &tb1->page_next[n1];
889 static inline void tb_jmp_remove(TranslationBlock *tb, int n)
891 TranslationBlock *tb1, **ptb;
894 ptb = &tb->jmp_next[n];
897 /* find tb(n) in circular list */
900 n1 = (uintptr_t)tb1 & 3;
901 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
902 if (n1 == n && tb1 == tb)
905 ptb = &tb1->jmp_first;
907 ptb = &tb1->jmp_next[n1];
910 /* now we can suppress tb(n) from the list */
911 *ptb = tb->jmp_next[n];
913 tb->jmp_next[n] = NULL;
917 /* reset the jump entry 'n' of a TB so that it is not chained to
919 static inline void tb_reset_jump(TranslationBlock *tb, int n)
921 tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + tb->tb_next_offset[n]));
924 void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
929 tb_page_addr_t phys_pc;
930 TranslationBlock *tb1, *tb2;
932 /* remove the TB from the hash list */
933 phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
934 h = tb_phys_hash_func(phys_pc);
935 tb_remove(&tb_phys_hash[h], tb,
936 offsetof(TranslationBlock, phys_hash_next));
938 /* remove the TB from the page list */
939 if (tb->page_addr[0] != page_addr) {
940 p = page_find(tb->page_addr[0] >> TARGET_PAGE_BITS);
941 tb_page_remove(&p->first_tb, tb);
942 invalidate_page_bitmap(p);
944 if (tb->page_addr[1] != -1 && tb->page_addr[1] != page_addr) {
945 p = page_find(tb->page_addr[1] >> TARGET_PAGE_BITS);
946 tb_page_remove(&p->first_tb, tb);
947 invalidate_page_bitmap(p);
950 tb_invalidated_flag = 1;
952 /* remove the TB from the hash list */
953 h = tb_jmp_cache_hash_func(tb->pc);
954 for(env = first_cpu; env != NULL; env = env->next_cpu) {
955 if (env->tb_jmp_cache[h] == tb)
956 env->tb_jmp_cache[h] = NULL;
959 /* suppress this TB from the two jump lists */
960 tb_jmp_remove(tb, 0);
961 tb_jmp_remove(tb, 1);
963 /* suppress any remaining jumps to this TB */
966 n1 = (uintptr_t)tb1 & 3;
969 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
970 tb2 = tb1->jmp_next[n1];
971 tb_reset_jump(tb1, n1);
972 tb1->jmp_next[n1] = NULL;
975 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
977 tb_phys_invalidate_count++;
980 static inline void set_bits(uint8_t *tab, int start, int len)
986 mask = 0xff << (start & 7);
987 if ((start & ~7) == (end & ~7)) {
989 mask &= ~(0xff << (end & 7));
994 start = (start + 8) & ~7;
996 while (start < end1) {
1001 mask = ~(0xff << (end & 7));
1007 static void build_page_bitmap(PageDesc *p)
1009 int n, tb_start, tb_end;
1010 TranslationBlock *tb;
1012 p->code_bitmap = g_malloc0(TARGET_PAGE_SIZE / 8);
1015 while (tb != NULL) {
1016 n = (uintptr_t)tb & 3;
1017 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1018 /* NOTE: this is subtle as a TB may span two physical pages */
1020 /* NOTE: tb_end may be after the end of the page, but
1021 it is not a problem */
1022 tb_start = tb->pc & ~TARGET_PAGE_MASK;
1023 tb_end = tb_start + tb->size;
1024 if (tb_end > TARGET_PAGE_SIZE)
1025 tb_end = TARGET_PAGE_SIZE;
1028 tb_end = ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1030 set_bits(p->code_bitmap, tb_start, tb_end - tb_start);
1031 tb = tb->page_next[n];
1035 TranslationBlock *tb_gen_code(CPUArchState *env,
1036 target_ulong pc, target_ulong cs_base,
1037 int flags, int cflags)
1039 TranslationBlock *tb;
1041 tb_page_addr_t phys_pc, phys_page2;
1042 target_ulong virt_page2;
1045 phys_pc = get_page_addr_code(env, pc);
1048 /* flush must be done */
1050 /* cannot fail at this point */
1052 /* Don't forget to invalidate previous TB info. */
1053 tb_invalidated_flag = 1;
1055 tc_ptr = code_gen_ptr;
1056 tb->tc_ptr = tc_ptr;
1057 tb->cs_base = cs_base;
1059 tb->cflags = cflags;
1060 cpu_gen_code(env, tb, &code_gen_size);
1061 code_gen_ptr = (void *)(((uintptr_t)code_gen_ptr + code_gen_size +
1062 CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
1064 /* check next page if needed */
1065 virt_page2 = (pc + tb->size - 1) & TARGET_PAGE_MASK;
1067 if ((pc & TARGET_PAGE_MASK) != virt_page2) {
1068 phys_page2 = get_page_addr_code(env, virt_page2);
1070 tb_link_page(tb, phys_pc, phys_page2);
1075 * Invalidate all TBs which intersect with the target physical address range
1076 * [start;end[. NOTE: start and end may refer to *different* physical pages.
1077 * 'is_cpu_write_access' should be true if called from a real cpu write
1078 * access: the virtual CPU will exit the current TB if code is modified inside
1081 void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end,
1082 int is_cpu_write_access)
1084 while (start < end) {
1085 tb_invalidate_phys_page_range(start, end, is_cpu_write_access);
1086 start &= TARGET_PAGE_MASK;
1087 start += TARGET_PAGE_SIZE;
1092 * Invalidate all TBs which intersect with the target physical address range
1093 * [start;end[. NOTE: start and end must refer to the *same* physical page.
1094 * 'is_cpu_write_access' should be true if called from a real cpu write
1095 * access: the virtual CPU will exit the current TB if code is modified inside
1098 void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
1099 int is_cpu_write_access)
1101 TranslationBlock *tb, *tb_next, *saved_tb;
1102 CPUArchState *env = cpu_single_env;
1103 tb_page_addr_t tb_start, tb_end;
1106 #ifdef TARGET_HAS_PRECISE_SMC
1107 int current_tb_not_found = is_cpu_write_access;
1108 TranslationBlock *current_tb = NULL;
1109 int current_tb_modified = 0;
1110 target_ulong current_pc = 0;
1111 target_ulong current_cs_base = 0;
1112 int current_flags = 0;
1113 #endif /* TARGET_HAS_PRECISE_SMC */
1115 p = page_find(start >> TARGET_PAGE_BITS);
1118 if (!p->code_bitmap &&
1119 ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD &&
1120 is_cpu_write_access) {
1121 /* build code bitmap */
1122 build_page_bitmap(p);
1125 /* we remove all the TBs in the range [start, end[ */
1126 /* XXX: see if in some cases it could be faster to invalidate all the code */
1128 while (tb != NULL) {
1129 n = (uintptr_t)tb & 3;
1130 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1131 tb_next = tb->page_next[n];
1132 /* NOTE: this is subtle as a TB may span two physical pages */
1134 /* NOTE: tb_end may be after the end of the page, but
1135 it is not a problem */
1136 tb_start = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
1137 tb_end = tb_start + tb->size;
1139 tb_start = tb->page_addr[1];
1140 tb_end = tb_start + ((tb->pc + tb->size) & ~TARGET_PAGE_MASK);
1142 if (!(tb_end <= start || tb_start >= end)) {
1143 #ifdef TARGET_HAS_PRECISE_SMC
1144 if (current_tb_not_found) {
1145 current_tb_not_found = 0;
1147 if (env->mem_io_pc) {
1148 /* now we have a real cpu fault */
1149 current_tb = tb_find_pc(env->mem_io_pc);
1152 if (current_tb == tb &&
1153 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1154 /* If we are modifying the current TB, we must stop
1155 its execution. We could be more precise by checking
1156 that the modification is after the current PC, but it
1157 would require a specialized function to partially
1158 restore the CPU state */
1160 current_tb_modified = 1;
1161 cpu_restore_state(current_tb, env, env->mem_io_pc);
1162 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1165 #endif /* TARGET_HAS_PRECISE_SMC */
1166 /* we need to do that to handle the case where a signal
1167 occurs while doing tb_phys_invalidate() */
1170 saved_tb = env->current_tb;
1171 env->current_tb = NULL;
1173 tb_phys_invalidate(tb, -1);
1175 env->current_tb = saved_tb;
1176 if (env->interrupt_request && env->current_tb)
1177 cpu_interrupt(env, env->interrupt_request);
1182 #if !defined(CONFIG_USER_ONLY)
1183 /* if no code remaining, no need to continue to use slow writes */
1185 invalidate_page_bitmap(p);
1186 if (is_cpu_write_access) {
1187 tlb_unprotect_code_phys(env, start, env->mem_io_vaddr);
1191 #ifdef TARGET_HAS_PRECISE_SMC
1192 if (current_tb_modified) {
1193 /* we generate a block containing just the instruction
1194 modifying the memory. It will ensure that it cannot modify
1196 env->current_tb = NULL;
1197 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1198 cpu_resume_from_signal(env, NULL);
1203 /* len must be <= 8 and start must be a multiple of len */
1204 static inline void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len)
1210 qemu_log("modifying code at 0x%x size=%d EIP=%x PC=%08x\n",
1211 cpu_single_env->mem_io_vaddr, len,
1212 cpu_single_env->eip,
1213 cpu_single_env->eip +
1214 (intptr_t)cpu_single_env->segs[R_CS].base);
1217 p = page_find(start >> TARGET_PAGE_BITS);
1220 if (p->code_bitmap) {
1221 offset = start & ~TARGET_PAGE_MASK;
1222 b = p->code_bitmap[offset >> 3] >> (offset & 7);
1223 if (b & ((1 << len) - 1))
1227 tb_invalidate_phys_page_range(start, start + len, 1);
1231 #if !defined(CONFIG_SOFTMMU)
1232 static void tb_invalidate_phys_page(tb_page_addr_t addr,
1233 uintptr_t pc, void *puc)
1235 TranslationBlock *tb;
1238 #ifdef TARGET_HAS_PRECISE_SMC
1239 TranslationBlock *current_tb = NULL;
1240 CPUArchState *env = cpu_single_env;
1241 int current_tb_modified = 0;
1242 target_ulong current_pc = 0;
1243 target_ulong current_cs_base = 0;
1244 int current_flags = 0;
1247 addr &= TARGET_PAGE_MASK;
1248 p = page_find(addr >> TARGET_PAGE_BITS);
1252 #ifdef TARGET_HAS_PRECISE_SMC
1253 if (tb && pc != 0) {
1254 current_tb = tb_find_pc(pc);
1257 while (tb != NULL) {
1258 n = (uintptr_t)tb & 3;
1259 tb = (TranslationBlock *)((uintptr_t)tb & ~3);
1260 #ifdef TARGET_HAS_PRECISE_SMC
1261 if (current_tb == tb &&
1262 (current_tb->cflags & CF_COUNT_MASK) != 1) {
1263 /* If we are modifying the current TB, we must stop
1264 its execution. We could be more precise by checking
1265 that the modification is after the current PC, but it
1266 would require a specialized function to partially
1267 restore the CPU state */
1269 current_tb_modified = 1;
1270 cpu_restore_state(current_tb, env, pc);
1271 cpu_get_tb_cpu_state(env, ¤t_pc, ¤t_cs_base,
1274 #endif /* TARGET_HAS_PRECISE_SMC */
1275 tb_phys_invalidate(tb, addr);
1276 tb = tb->page_next[n];
1279 #ifdef TARGET_HAS_PRECISE_SMC
1280 if (current_tb_modified) {
1281 /* we generate a block containing just the instruction
1282 modifying the memory. It will ensure that it cannot modify
1284 env->current_tb = NULL;
1285 tb_gen_code(env, current_pc, current_cs_base, current_flags, 1);
1286 cpu_resume_from_signal(env, puc);
1292 /* add the tb in the target page and protect it if necessary */
1293 static inline void tb_alloc_page(TranslationBlock *tb,
1294 unsigned int n, tb_page_addr_t page_addr)
1297 #ifndef CONFIG_USER_ONLY
1298 bool page_already_protected;
1301 tb->page_addr[n] = page_addr;
1302 p = page_find_alloc(page_addr >> TARGET_PAGE_BITS, 1);
1303 tb->page_next[n] = p->first_tb;
1304 #ifndef CONFIG_USER_ONLY
1305 page_already_protected = p->first_tb != NULL;
1307 p->first_tb = (TranslationBlock *)((uintptr_t)tb | n);
1308 invalidate_page_bitmap(p);
1310 #if defined(TARGET_HAS_SMC) || 1
1312 #if defined(CONFIG_USER_ONLY)
1313 if (p->flags & PAGE_WRITE) {
1318 /* force the host page as non writable (writes will have a
1319 page fault + mprotect overhead) */
1320 page_addr &= qemu_host_page_mask;
1322 for(addr = page_addr; addr < page_addr + qemu_host_page_size;
1323 addr += TARGET_PAGE_SIZE) {
1325 p2 = page_find (addr >> TARGET_PAGE_BITS);
1329 p2->flags &= ~PAGE_WRITE;
1331 mprotect(g2h(page_addr), qemu_host_page_size,
1332 (prot & PAGE_BITS) & ~PAGE_WRITE);
1333 #ifdef DEBUG_TB_INVALIDATE
1334 printf("protecting code page: 0x" TARGET_FMT_lx "\n",
1339 /* if some code is already present, then the pages are already
1340 protected. So we handle the case where only the first TB is
1341 allocated in a physical page */
1342 if (!page_already_protected) {
1343 tlb_protect_code(page_addr);
1347 #endif /* TARGET_HAS_SMC */
1350 /* add a new TB and link it to the physical page tables. phys_page2 is
1351 (-1) to indicate that only one page contains the TB. */
1352 void tb_link_page(TranslationBlock *tb,
1353 tb_page_addr_t phys_pc, tb_page_addr_t phys_page2)
1356 TranslationBlock **ptb;
1358 /* Grab the mmap lock to stop another thread invalidating this TB
1359 before we are done. */
1361 /* add in the physical hash table */
1362 h = tb_phys_hash_func(phys_pc);
1363 ptb = &tb_phys_hash[h];
1364 tb->phys_hash_next = *ptb;
1367 /* add in the page list */
1368 tb_alloc_page(tb, 0, phys_pc & TARGET_PAGE_MASK);
1369 if (phys_page2 != -1)
1370 tb_alloc_page(tb, 1, phys_page2);
1372 tb->page_addr[1] = -1;
1374 tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2);
1375 tb->jmp_next[0] = NULL;
1376 tb->jmp_next[1] = NULL;
1378 /* init original jump addresses */
1379 if (tb->tb_next_offset[0] != 0xffff)
1380 tb_reset_jump(tb, 0);
1381 if (tb->tb_next_offset[1] != 0xffff)
1382 tb_reset_jump(tb, 1);
1384 #ifdef DEBUG_TB_CHECK
1390 /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
1391 tb[1].tc_ptr. Return NULL if not found */
1392 TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
1394 int m_min, m_max, m;
1396 TranslationBlock *tb;
1400 if (tc_ptr < (uintptr_t)code_gen_buffer ||
1401 tc_ptr >= (uintptr_t)code_gen_ptr) {
1404 /* binary search (cf Knuth) */
1407 while (m_min <= m_max) {
1408 m = (m_min + m_max) >> 1;
1410 v = (uintptr_t)tb->tc_ptr;
1413 else if (tc_ptr < v) {
1422 static void tb_reset_jump_recursive(TranslationBlock *tb);
1424 static inline void tb_reset_jump_recursive2(TranslationBlock *tb, int n)
1426 TranslationBlock *tb1, *tb_next, **ptb;
1429 tb1 = tb->jmp_next[n];
1431 /* find head of list */
1433 n1 = (uintptr_t)tb1 & 3;
1434 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1437 tb1 = tb1->jmp_next[n1];
1439 /* we are now sure now that tb jumps to tb1 */
1442 /* remove tb from the jmp_first list */
1443 ptb = &tb_next->jmp_first;
1446 n1 = (uintptr_t)tb1 & 3;
1447 tb1 = (TranslationBlock *)((uintptr_t)tb1 & ~3);
1448 if (n1 == n && tb1 == tb)
1450 ptb = &tb1->jmp_next[n1];
1452 *ptb = tb->jmp_next[n];
1453 tb->jmp_next[n] = NULL;
1455 /* suppress the jump to next tb in generated code */
1456 tb_reset_jump(tb, n);
1458 /* suppress jumps in the tb on which we could have jumped */
1459 tb_reset_jump_recursive(tb_next);
1463 static void tb_reset_jump_recursive(TranslationBlock *tb)
1465 tb_reset_jump_recursive2(tb, 0);
1466 tb_reset_jump_recursive2(tb, 1);
1469 #if defined(TARGET_HAS_ICE)
1470 #if defined(CONFIG_USER_ONLY)
1471 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1473 tb_invalidate_phys_page_range(pc, pc + 1, 0);
1476 void tb_invalidate_phys_addr(hwaddr addr)
1478 ram_addr_t ram_addr;
1479 MemoryRegionSection *section;
1481 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
1482 if (!(memory_region_is_ram(section->mr)
1483 || (section->mr->rom_device && section->mr->readable))) {
1486 ram_addr = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1487 + memory_region_section_addr(section, addr);
1488 tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
1491 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
1493 tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
1494 (pc & ~TARGET_PAGE_MASK));
1497 #endif /* TARGET_HAS_ICE */
1499 #if defined(CONFIG_USER_ONLY)
1500 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1505 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1506 int flags, CPUWatchpoint **watchpoint)
1511 /* Add a watchpoint. */
1512 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
1513 int flags, CPUWatchpoint **watchpoint)
1515 target_ulong len_mask = ~(len - 1);
1518 /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
1519 if ((len & (len - 1)) || (addr & ~len_mask) ||
1520 len == 0 || len > TARGET_PAGE_SIZE) {
1521 fprintf(stderr, "qemu: tried to set invalid watchpoint at "
1522 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
1525 wp = g_malloc(sizeof(*wp));
1528 wp->len_mask = len_mask;
1531 /* keep all GDB-injected watchpoints in front */
1533 QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
1535 QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
1537 tlb_flush_page(env, addr);
1544 /* Remove a specific watchpoint. */
1545 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
1548 target_ulong len_mask = ~(len - 1);
1551 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1552 if (addr == wp->vaddr && len_mask == wp->len_mask
1553 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
1554 cpu_watchpoint_remove_by_ref(env, wp);
1561 /* Remove a specific watchpoint by reference. */
1562 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
1564 QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
1566 tlb_flush_page(env, watchpoint->vaddr);
1571 /* Remove all matching watchpoints. */
1572 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
1574 CPUWatchpoint *wp, *next;
1576 QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
1577 if (wp->flags & mask)
1578 cpu_watchpoint_remove_by_ref(env, wp);
1583 /* Add a breakpoint. */
1584 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
1585 CPUBreakpoint **breakpoint)
1587 #if defined(TARGET_HAS_ICE)
1590 bp = g_malloc(sizeof(*bp));
1595 /* keep all GDB-injected breakpoints in front */
1597 QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
1599 QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
1601 breakpoint_invalidate(env, pc);
1611 /* Remove a specific breakpoint. */
1612 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
1614 #if defined(TARGET_HAS_ICE)
1617 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1618 if (bp->pc == pc && bp->flags == flags) {
1619 cpu_breakpoint_remove_by_ref(env, bp);
1629 /* Remove a specific breakpoint by reference. */
1630 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
1632 #if defined(TARGET_HAS_ICE)
1633 QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
1635 breakpoint_invalidate(env, breakpoint->pc);
1641 /* Remove all matching breakpoints. */
1642 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
1644 #if defined(TARGET_HAS_ICE)
1645 CPUBreakpoint *bp, *next;
1647 QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
1648 if (bp->flags & mask)
1649 cpu_breakpoint_remove_by_ref(env, bp);
1654 /* enable or disable single step mode. EXCP_DEBUG is returned by the
1655 CPU loop after each instruction */
1656 void cpu_single_step(CPUArchState *env, int enabled)
1658 #if defined(TARGET_HAS_ICE)
1659 if (env->singlestep_enabled != enabled) {
1660 env->singlestep_enabled = enabled;
1662 kvm_update_guest_debug(env, 0);
1664 /* must flush all the translated code to avoid inconsistencies */
1665 /* XXX: only flush what is necessary */
1672 static void cpu_unlink_tb(CPUArchState *env)
1674 /* FIXME: TB unchaining isn't SMP safe. For now just ignore the
1675 problem and hope the cpu will stop of its own accord. For userspace
1676 emulation this often isn't actually as bad as it sounds. Often
1677 signals are used primarily to interrupt blocking syscalls. */
1678 TranslationBlock *tb;
1679 static spinlock_t interrupt_lock = SPIN_LOCK_UNLOCKED;
1681 spin_lock(&interrupt_lock);
1682 tb = env->current_tb;
1683 /* if the cpu is currently executing code, we must unlink it and
1684 all the potentially executing TB */
1686 env->current_tb = NULL;
1687 tb_reset_jump_recursive(tb);
1689 spin_unlock(&interrupt_lock);
1692 #ifndef CONFIG_USER_ONLY
1693 /* mask must never be zero, except for A20 change call */
1694 static void tcg_handle_interrupt(CPUArchState *env, int mask)
1696 CPUState *cpu = ENV_GET_CPU(env);
1699 old_mask = env->interrupt_request;
1700 env->interrupt_request |= mask;
1703 * If called from iothread context, wake the target cpu in
1706 if (!qemu_cpu_is_self(cpu)) {
1712 env->icount_decr.u16.high = 0xffff;
1714 && (mask & ~old_mask) != 0) {
1715 cpu_abort(env, "Raised interrupt while not in I/O function");
1722 CPUInterruptHandler cpu_interrupt_handler = tcg_handle_interrupt;
1724 #else /* CONFIG_USER_ONLY */
1726 void cpu_interrupt(CPUArchState *env, int mask)
1728 env->interrupt_request |= mask;
1731 #endif /* CONFIG_USER_ONLY */
1733 void cpu_reset_interrupt(CPUArchState *env, int mask)
1735 env->interrupt_request &= ~mask;
1738 void cpu_exit(CPUArchState *env)
1740 env->exit_request = 1;
1744 void cpu_abort(CPUArchState *env, const char *fmt, ...)
1751 fprintf(stderr, "qemu: fatal: ");
1752 vfprintf(stderr, fmt, ap);
1753 fprintf(stderr, "\n");
1754 cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1755 if (qemu_log_enabled()) {
1756 qemu_log("qemu: fatal: ");
1757 qemu_log_vprintf(fmt, ap2);
1759 log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
1765 #if defined(CONFIG_USER_ONLY)
1767 struct sigaction act;
1768 sigfillset(&act.sa_mask);
1769 act.sa_handler = SIG_DFL;
1770 sigaction(SIGABRT, &act, NULL);
1776 CPUArchState *cpu_copy(CPUArchState *env)
1778 CPUArchState *new_env = cpu_init(env->cpu_model_str);
1779 CPUArchState *next_cpu = new_env->next_cpu;
1780 int cpu_index = new_env->cpu_index;
1781 #if defined(TARGET_HAS_ICE)
1786 memcpy(new_env, env, sizeof(CPUArchState));
1788 /* Preserve chaining and index. */
1789 new_env->next_cpu = next_cpu;
1790 new_env->cpu_index = cpu_index;
1792 /* Clone all break/watchpoints.
1793 Note: Once we support ptrace with hw-debug register access, make sure
1794 BP_CPU break/watchpoints are handled correctly on clone. */
1795 QTAILQ_INIT(&env->breakpoints);
1796 QTAILQ_INIT(&env->watchpoints);
1797 #if defined(TARGET_HAS_ICE)
1798 QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
1799 cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
1801 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1802 cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
1810 #if !defined(CONFIG_USER_ONLY)
1811 void tb_flush_jmp_cache(CPUArchState *env, target_ulong addr)
1815 /* Discard jump cache entries for any tb which might potentially
1816 overlap the flushed page. */
1817 i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
1818 memset (&env->tb_jmp_cache[i], 0,
1819 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1821 i = tb_jmp_cache_hash_page(addr);
1822 memset (&env->tb_jmp_cache[i], 0,
1823 TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
1826 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
1831 /* we modify the TLB cache so that the dirty bit will be set again
1832 when accessing the range */
1833 start1 = (uintptr_t)qemu_safe_ram_ptr(start);
1834 /* Check that we don't span multiple blocks - this breaks the
1835 address comparisons below. */
1836 if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
1837 != (end - 1) - start) {
1840 cpu_tlb_reset_dirty_all(start1, length);
1844 /* Note: start and end must be within the same ram block. */
1845 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
1850 start &= TARGET_PAGE_MASK;
1851 end = TARGET_PAGE_ALIGN(end);
1853 length = end - start;
1856 cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
1858 if (tcg_enabled()) {
1859 tlb_reset_dirty_range_all(start, end, length);
1863 int cpu_physical_memory_set_dirty_tracking(int enable)
1866 in_migration = enable;
1870 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
1871 MemoryRegionSection *section,
1875 target_ulong *address)
1880 if (memory_region_is_ram(section->mr)) {
1882 iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
1883 + memory_region_section_addr(section, paddr);
1884 if (!section->readonly) {
1885 iotlb |= phys_section_notdirty;
1887 iotlb |= phys_section_rom;
1890 /* IO handlers are currently passed a physical address.
1891 It would be nice to pass an offset from the base address
1892 of that region. This would avoid having to special case RAM,
1893 and avoid full address decoding in every device.
1894 We can't use the high bits of pd for this because
1895 IO_MEM_ROMD uses these as a ram address. */
1896 iotlb = section - phys_sections;
1897 iotlb += memory_region_section_addr(section, paddr);
1900 /* Make accesses to pages with watchpoints go via the
1901 watchpoint trap routines. */
1902 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1903 if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
1904 /* Avoid trapping reads of pages with a write breakpoint. */
1905 if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
1906 iotlb = phys_section_watch + paddr;
1907 *address |= TLB_MMIO;
1918 * Walks guest process memory "regions" one by one
1919 * and calls callback function 'fn' for each region.
1922 struct walk_memory_regions_data
1924 walk_memory_regions_fn fn;
1930 static int walk_memory_regions_end(struct walk_memory_regions_data *data,
1931 abi_ulong end, int new_prot)
1933 if (data->start != -1ul) {
1934 int rc = data->fn(data->priv, data->start, end, data->prot);
1940 data->start = (new_prot ? end : -1ul);
1941 data->prot = new_prot;
1946 static int walk_memory_regions_1(struct walk_memory_regions_data *data,
1947 abi_ulong base, int level, void **lp)
1953 return walk_memory_regions_end(data, base, 0);
1958 for (i = 0; i < L2_SIZE; ++i) {
1959 int prot = pd[i].flags;
1961 pa = base | (i << TARGET_PAGE_BITS);
1962 if (prot != data->prot) {
1963 rc = walk_memory_regions_end(data, pa, prot);
1971 for (i = 0; i < L2_SIZE; ++i) {
1972 pa = base | ((abi_ulong)i <<
1973 (TARGET_PAGE_BITS + L2_BITS * level));
1974 rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
1984 int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
1986 struct walk_memory_regions_data data;
1994 for (i = 0; i < V_L1_SIZE; i++) {
1995 int rc = walk_memory_regions_1(&data, (abi_ulong)i << V_L1_SHIFT,
1996 V_L1_SHIFT / L2_BITS - 1, l1_map + i);
2002 return walk_memory_regions_end(&data, 0, 0);
2005 static int dump_region(void *priv, abi_ulong start,
2006 abi_ulong end, unsigned long prot)
2008 FILE *f = (FILE *)priv;
2010 (void) fprintf(f, TARGET_ABI_FMT_lx"-"TARGET_ABI_FMT_lx
2011 " "TARGET_ABI_FMT_lx" %c%c%c\n",
2012 start, end, end - start,
2013 ((prot & PAGE_READ) ? 'r' : '-'),
2014 ((prot & PAGE_WRITE) ? 'w' : '-'),
2015 ((prot & PAGE_EXEC) ? 'x' : '-'));
2020 /* dump memory mappings */
2021 void page_dump(FILE *f)
2023 (void) fprintf(f, "%-8s %-8s %-8s %s\n",
2024 "start", "end", "size", "prot");
2025 walk_memory_regions(f, dump_region);
2028 int page_get_flags(target_ulong address)
2032 p = page_find(address >> TARGET_PAGE_BITS);
2038 /* Modify the flags of a page and invalidate the code if necessary.
2039 The flag PAGE_WRITE_ORG is positioned automatically depending
2040 on PAGE_WRITE. The mmap_lock should already be held. */
2041 void page_set_flags(target_ulong start, target_ulong end, int flags)
2043 target_ulong addr, len;
2045 /* This function should never be called with addresses outside the
2046 guest address space. If this assert fires, it probably indicates
2047 a missing call to h2g_valid. */
2048 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2049 assert(end < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2051 assert(start < end);
2053 start = start & TARGET_PAGE_MASK;
2054 end = TARGET_PAGE_ALIGN(end);
2056 if (flags & PAGE_WRITE) {
2057 flags |= PAGE_WRITE_ORG;
2060 for (addr = start, len = end - start;
2062 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2063 PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, 1);
2065 /* If the write protection bit is set, then we invalidate
2067 if (!(p->flags & PAGE_WRITE) &&
2068 (flags & PAGE_WRITE) &&
2070 tb_invalidate_phys_page(addr, 0, NULL);
2076 int page_check_range(target_ulong start, target_ulong len, int flags)
2082 /* This function should never be called with addresses outside the
2083 guest address space. If this assert fires, it probably indicates
2084 a missing call to h2g_valid. */
2085 #if TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS
2086 assert(start < ((abi_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
2092 if (start + len - 1 < start) {
2093 /* We've wrapped around. */
2097 end = TARGET_PAGE_ALIGN(start+len); /* must do before we loose bits in the next step */
2098 start = start & TARGET_PAGE_MASK;
2100 for (addr = start, len = end - start;
2102 len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
2103 p = page_find(addr >> TARGET_PAGE_BITS);
2106 if( !(p->flags & PAGE_VALID) )
2109 if ((flags & PAGE_READ) && !(p->flags & PAGE_READ))
2111 if (flags & PAGE_WRITE) {
2112 if (!(p->flags & PAGE_WRITE_ORG))
2114 /* unprotect the page if it was put read-only because it
2115 contains translated code */
2116 if (!(p->flags & PAGE_WRITE)) {
2117 if (!page_unprotect(addr, 0, NULL))
2126 /* called from signal handler: invalidate the code and unprotect the
2127 page. Return TRUE if the fault was successfully handled. */
2128 int page_unprotect(target_ulong address, uintptr_t pc, void *puc)
2132 target_ulong host_start, host_end, addr;
2134 /* Technically this isn't safe inside a signal handler. However we
2135 know this only ever happens in a synchronous SEGV handler, so in
2136 practice it seems to be ok. */
2139 p = page_find(address >> TARGET_PAGE_BITS);
2145 /* if the page was really writable, then we change its
2146 protection back to writable */
2147 if ((p->flags & PAGE_WRITE_ORG) && !(p->flags & PAGE_WRITE)) {
2148 host_start = address & qemu_host_page_mask;
2149 host_end = host_start + qemu_host_page_size;
2152 for (addr = host_start ; addr < host_end ; addr += TARGET_PAGE_SIZE) {
2153 p = page_find(addr >> TARGET_PAGE_BITS);
2154 p->flags |= PAGE_WRITE;
2157 /* and since the content will be modified, we must invalidate
2158 the corresponding translated code. */
2159 tb_invalidate_phys_page(addr, pc, puc);
2160 #ifdef DEBUG_TB_CHECK
2161 tb_invalidate_check(addr);
2164 mprotect((void *)g2h(host_start), qemu_host_page_size,
2173 #endif /* defined(CONFIG_USER_ONLY) */
2175 #if !defined(CONFIG_USER_ONLY)
2177 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
2178 typedef struct subpage_t {
2181 uint16_t sub_section[TARGET_PAGE_SIZE];
2184 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
2186 static subpage_t *subpage_init(hwaddr base);
2187 static void destroy_page_desc(uint16_t section_index)
2189 MemoryRegionSection *section = &phys_sections[section_index];
2190 MemoryRegion *mr = section->mr;
2193 subpage_t *subpage = container_of(mr, subpage_t, iomem);
2194 memory_region_destroy(&subpage->iomem);
2199 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
2204 if (lp->ptr == PHYS_MAP_NODE_NIL) {
2208 p = phys_map_nodes[lp->ptr];
2209 for (i = 0; i < L2_SIZE; ++i) {
2210 if (!p[i].is_leaf) {
2211 destroy_l2_mapping(&p[i], level - 1);
2213 destroy_page_desc(p[i].ptr);
2217 lp->ptr = PHYS_MAP_NODE_NIL;
2220 static void destroy_all_mappings(AddressSpaceDispatch *d)
2222 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
2223 phys_map_nodes_reset();
2226 static uint16_t phys_section_add(MemoryRegionSection *section)
2228 if (phys_sections_nb == phys_sections_nb_alloc) {
2229 phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
2230 phys_sections = g_renew(MemoryRegionSection, phys_sections,
2231 phys_sections_nb_alloc);
2233 phys_sections[phys_sections_nb] = *section;
2234 return phys_sections_nb++;
2237 static void phys_sections_clear(void)
2239 phys_sections_nb = 0;
2242 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2245 hwaddr base = section->offset_within_address_space
2247 MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
2248 MemoryRegionSection subsection = {
2249 .offset_within_address_space = base,
2250 .size = TARGET_PAGE_SIZE,
2254 assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
2256 if (!(existing->mr->subpage)) {
2257 subpage = subpage_init(base);
2258 subsection.mr = &subpage->iomem;
2259 phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
2260 phys_section_add(&subsection));
2262 subpage = container_of(existing->mr, subpage_t, iomem);
2264 start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
2265 end = start + section->size - 1;
2266 subpage_register(subpage, start, end, phys_section_add(section));
2270 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
2272 hwaddr start_addr = section->offset_within_address_space;
2273 ram_addr_t size = section->size;
2275 uint16_t section_index = phys_section_add(section);
2280 phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
2284 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
2286 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
2287 MemoryRegionSection now = *section, remain = *section;
2289 if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
2290 || (now.size < TARGET_PAGE_SIZE)) {
2291 now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
2292 - now.offset_within_address_space,
2294 register_subpage(d, &now);
2295 remain.size -= now.size;
2296 remain.offset_within_address_space += now.size;
2297 remain.offset_within_region += now.size;
2299 while (remain.size >= TARGET_PAGE_SIZE) {
2301 if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
2302 now.size = TARGET_PAGE_SIZE;
2303 register_subpage(d, &now);
2305 now.size &= TARGET_PAGE_MASK;
2306 register_multipage(d, &now);
2308 remain.size -= now.size;
2309 remain.offset_within_address_space += now.size;
2310 remain.offset_within_region += now.size;
2314 register_subpage(d, &now);
2318 void qemu_flush_coalesced_mmio_buffer(void)
2321 kvm_flush_coalesced_mmio_buffer();
2324 #if defined(__linux__) && !defined(TARGET_S390X)
2326 #include <sys/vfs.h>
2328 #define HUGETLBFS_MAGIC 0x958458f6
2330 static long gethugepagesize(const char *path)
2336 ret = statfs(path, &fs);
2337 } while (ret != 0 && errno == EINTR);
2344 if (fs.f_type != HUGETLBFS_MAGIC)
2345 fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
2350 static void *file_ram_alloc(RAMBlock *block,
2360 unsigned long hpagesize;
2362 hpagesize = gethugepagesize(path);
2367 if (memory < hpagesize) {
2371 if (kvm_enabled() && !kvm_has_sync_mmu()) {
2372 fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
2376 if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
2380 fd = mkstemp(filename);
2382 perror("unable to create backing store for hugepages");
2389 memory = (memory+hpagesize-1) & ~(hpagesize-1);
2392 * ftruncate is not supported by hugetlbfs in older
2393 * hosts, so don't bother bailing out on errors.
2394 * If anything goes wrong with it under other filesystems,
2397 if (ftruncate(fd, memory))
2398 perror("ftruncate");
2401 /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
2402 * MAP_PRIVATE is requested. For mem_prealloc we mmap as MAP_SHARED
2403 * to sidestep this quirk.
2405 flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
2406 area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
2408 area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
2410 if (area == MAP_FAILED) {
2411 perror("file_ram_alloc: can't mmap RAM pages");
2420 static ram_addr_t find_ram_offset(ram_addr_t size)
2422 RAMBlock *block, *next_block;
2423 ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
2425 if (QLIST_EMPTY(&ram_list.blocks))
2428 QLIST_FOREACH(block, &ram_list.blocks, next) {
2429 ram_addr_t end, next = RAM_ADDR_MAX;
2431 end = block->offset + block->length;
2433 QLIST_FOREACH(next_block, &ram_list.blocks, next) {
2434 if (next_block->offset >= end) {
2435 next = MIN(next, next_block->offset);
2438 if (next - end >= size && next - end < mingap) {
2440 mingap = next - end;
2444 if (offset == RAM_ADDR_MAX) {
2445 fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
2453 ram_addr_t last_ram_offset(void)
2456 ram_addr_t last = 0;
2458 QLIST_FOREACH(block, &ram_list.blocks, next)
2459 last = MAX(last, block->offset + block->length);
2464 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
2467 QemuOpts *machine_opts;
2469 /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
2470 machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2472 !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
2473 ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
2475 perror("qemu_madvise");
2476 fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
2477 "but dump_guest_core=off specified\n");
2482 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
2484 RAMBlock *new_block, *block;
2487 QLIST_FOREACH(block, &ram_list.blocks, next) {
2488 if (block->offset == addr) {
2494 assert(!new_block->idstr[0]);
2497 char *id = qdev_get_dev_path(dev);
2499 snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
2503 pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
2505 QLIST_FOREACH(block, &ram_list.blocks, next) {
2506 if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
2507 fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
2514 static int memory_try_enable_merging(void *addr, size_t len)
2518 opts = qemu_opts_find(qemu_find_opts("machine"), 0);
2519 if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
2520 /* disabled by the user */
2524 return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
2527 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
2530 RAMBlock *new_block;
2532 size = TARGET_PAGE_ALIGN(size);
2533 new_block = g_malloc0(sizeof(*new_block));
2536 new_block->offset = find_ram_offset(size);
2538 new_block->host = host;
2539 new_block->flags |= RAM_PREALLOC_MASK;
2542 #if defined (__linux__) && !defined(TARGET_S390X)
2543 new_block->host = file_ram_alloc(new_block, size, mem_path);
2544 if (!new_block->host) {
2545 new_block->host = qemu_vmalloc(size);
2546 memory_try_enable_merging(new_block->host, size);
2549 fprintf(stderr, "-mem-path option unsupported\n");
2553 if (xen_enabled()) {
2554 xen_ram_alloc(new_block->offset, size, mr);
2555 } else if (kvm_enabled()) {
2556 /* some s390/kvm configurations have special constraints */
2557 new_block->host = kvm_vmalloc(size);
2559 new_block->host = qemu_vmalloc(size);
2561 memory_try_enable_merging(new_block->host, size);
2564 new_block->length = size;
2566 QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
2568 ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
2569 last_ram_offset() >> TARGET_PAGE_BITS);
2570 memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
2571 0, size >> TARGET_PAGE_BITS);
2572 cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
2574 qemu_ram_setup_dump(new_block->host, size);
2575 qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
2578 kvm_setup_guest_memory(new_block->host, size);
2580 return new_block->offset;
2583 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
2585 return qemu_ram_alloc_from_ptr(size, NULL, mr);
2588 void qemu_ram_free_from_ptr(ram_addr_t addr)
2592 QLIST_FOREACH(block, &ram_list.blocks, next) {
2593 if (addr == block->offset) {
2594 QLIST_REMOVE(block, next);
2601 void qemu_ram_free(ram_addr_t addr)
2605 QLIST_FOREACH(block, &ram_list.blocks, next) {
2606 if (addr == block->offset) {
2607 QLIST_REMOVE(block, next);
2608 if (block->flags & RAM_PREALLOC_MASK) {
2610 } else if (mem_path) {
2611 #if defined (__linux__) && !defined(TARGET_S390X)
2613 munmap(block->host, block->length);
2616 qemu_vfree(block->host);
2622 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2623 munmap(block->host, block->length);
2625 if (xen_enabled()) {
2626 xen_invalidate_map_cache_entry(block->host);
2628 qemu_vfree(block->host);
2640 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
2647 QLIST_FOREACH(block, &ram_list.blocks, next) {
2648 offset = addr - block->offset;
2649 if (offset < block->length) {
2650 vaddr = block->host + offset;
2651 if (block->flags & RAM_PREALLOC_MASK) {
2655 munmap(vaddr, length);
2657 #if defined(__linux__) && !defined(TARGET_S390X)
2660 flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
2663 flags |= MAP_PRIVATE;
2665 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2666 flags, block->fd, offset);
2668 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2669 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2676 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
2677 flags |= MAP_SHARED | MAP_ANONYMOUS;
2678 area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
2681 flags |= MAP_PRIVATE | MAP_ANONYMOUS;
2682 area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
2686 if (area != vaddr) {
2687 fprintf(stderr, "Could not remap addr: "
2688 RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
2692 memory_try_enable_merging(vaddr, length);
2693 qemu_ram_setup_dump(vaddr, length);
2699 #endif /* !_WIN32 */
2701 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2702 With the exception of the softmmu code in this file, this should
2703 only be used for local memory (e.g. video ram) that the device owns,
2704 and knows it isn't going to access beyond the end of the block.
2706 It should not be used for general purpose DMA.
2707 Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
2709 void *qemu_get_ram_ptr(ram_addr_t addr)
2713 QLIST_FOREACH(block, &ram_list.blocks, next) {
2714 if (addr - block->offset < block->length) {
2715 /* Move this entry to to start of the list. */
2716 if (block != QLIST_FIRST(&ram_list.blocks)) {
2717 QLIST_REMOVE(block, next);
2718 QLIST_INSERT_HEAD(&ram_list.blocks, block, next);
2720 if (xen_enabled()) {
2721 /* We need to check if the requested address is in the RAM
2722 * because we don't want to map the entire memory in QEMU.
2723 * In that case just map until the end of the page.
2725 if (block->offset == 0) {
2726 return xen_map_cache(addr, 0, 0);
2727 } else if (block->host == NULL) {
2729 xen_map_cache(block->offset, block->length, 1);
2732 return block->host + (addr - block->offset);
2736 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2742 /* Return a host pointer to ram allocated with qemu_ram_alloc.
2743 * Same as qemu_get_ram_ptr but avoid reordering ramblocks.
2745 void *qemu_safe_ram_ptr(ram_addr_t addr)
2749 QLIST_FOREACH(block, &ram_list.blocks, next) {
2750 if (addr - block->offset < block->length) {
2751 if (xen_enabled()) {
2752 /* We need to check if the requested address is in the RAM
2753 * because we don't want to map the entire memory in QEMU.
2754 * In that case just map until the end of the page.
2756 if (block->offset == 0) {
2757 return xen_map_cache(addr, 0, 0);
2758 } else if (block->host == NULL) {
2760 xen_map_cache(block->offset, block->length, 1);
2763 return block->host + (addr - block->offset);
2767 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2773 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
2774 * but takes a size argument */
2775 void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
2780 if (xen_enabled()) {
2781 return xen_map_cache(addr, *size, 1);
2785 QLIST_FOREACH(block, &ram_list.blocks, next) {
2786 if (addr - block->offset < block->length) {
2787 if (addr - block->offset + *size > block->length)
2788 *size = block->length - addr + block->offset;
2789 return block->host + (addr - block->offset);
2793 fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
2798 void qemu_put_ram_ptr(void *addr)
2800 trace_qemu_put_ram_ptr(addr);
2803 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
2806 uint8_t *host = ptr;
2808 if (xen_enabled()) {
2809 *ram_addr = xen_ram_addr_from_mapcache(ptr);
2813 QLIST_FOREACH(block, &ram_list.blocks, next) {
2814 /* This case append when the block is not mapped. */
2815 if (block->host == NULL) {
2818 if (host - block->host < block->length) {
2819 *ram_addr = block->offset + (host - block->host);
2827 /* Some of the softmmu routines need to translate from a host pointer
2828 (typically a TLB entry) back to a ram offset. */
2829 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
2831 ram_addr_t ram_addr;
2833 if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
2834 fprintf(stderr, "Bad ram pointer %p\n", ptr);
2840 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
2843 #ifdef DEBUG_UNASSIGNED
2844 printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
2846 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2847 cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
2852 static void unassigned_mem_write(void *opaque, hwaddr addr,
2853 uint64_t val, unsigned size)
2855 #ifdef DEBUG_UNASSIGNED
2856 printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
2858 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
2859 cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
2863 static const MemoryRegionOps unassigned_mem_ops = {
2864 .read = unassigned_mem_read,
2865 .write = unassigned_mem_write,
2866 .endianness = DEVICE_NATIVE_ENDIAN,
2869 static uint64_t error_mem_read(void *opaque, hwaddr addr,
2875 static void error_mem_write(void *opaque, hwaddr addr,
2876 uint64_t value, unsigned size)
2881 static const MemoryRegionOps error_mem_ops = {
2882 .read = error_mem_read,
2883 .write = error_mem_write,
2884 .endianness = DEVICE_NATIVE_ENDIAN,
2887 static const MemoryRegionOps rom_mem_ops = {
2888 .read = error_mem_read,
2889 .write = unassigned_mem_write,
2890 .endianness = DEVICE_NATIVE_ENDIAN,
2893 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
2894 uint64_t val, unsigned size)
2897 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2898 if (!(dirty_flags & CODE_DIRTY_FLAG)) {
2899 #if !defined(CONFIG_USER_ONLY)
2900 tb_invalidate_phys_page_fast(ram_addr, size);
2901 dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
2906 stb_p(qemu_get_ram_ptr(ram_addr), val);
2909 stw_p(qemu_get_ram_ptr(ram_addr), val);
2912 stl_p(qemu_get_ram_ptr(ram_addr), val);
2917 dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
2918 cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
2919 /* we remove the notdirty callback only if the code has been
2921 if (dirty_flags == 0xff)
2922 tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
2925 static const MemoryRegionOps notdirty_mem_ops = {
2926 .read = error_mem_read,
2927 .write = notdirty_mem_write,
2928 .endianness = DEVICE_NATIVE_ENDIAN,
2931 /* Generate a debug exception if a watchpoint has been hit. */
2932 static void check_watchpoint(int offset, int len_mask, int flags)
2934 CPUArchState *env = cpu_single_env;
2935 target_ulong pc, cs_base;
2936 TranslationBlock *tb;
2941 if (env->watchpoint_hit) {
2942 /* We re-entered the check after replacing the TB. Now raise
2943 * the debug interrupt so that is will trigger after the
2944 * current instruction. */
2945 cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
2948 vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
2949 QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
2950 if ((vaddr == (wp->vaddr & len_mask) ||
2951 (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
2952 wp->flags |= BP_WATCHPOINT_HIT;
2953 if (!env->watchpoint_hit) {
2954 env->watchpoint_hit = wp;
2955 tb = tb_find_pc(env->mem_io_pc);
2957 cpu_abort(env, "check_watchpoint: could not find TB for "
2958 "pc=%p", (void *)env->mem_io_pc);
2960 cpu_restore_state(tb, env, env->mem_io_pc);
2961 tb_phys_invalidate(tb, -1);
2962 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
2963 env->exception_index = EXCP_DEBUG;
2966 cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
2967 tb_gen_code(env, pc, cs_base, cpu_flags, 1);
2968 cpu_resume_from_signal(env, NULL);
2972 wp->flags &= ~BP_WATCHPOINT_HIT;
2977 /* Watchpoint access routines. Watchpoints are inserted using TLB tricks,
2978 so these check for a hit then pass through to the normal out-of-line
2980 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
2983 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
2985 case 1: return ldub_phys(addr);
2986 case 2: return lduw_phys(addr);
2987 case 4: return ldl_phys(addr);
2992 static void watch_mem_write(void *opaque, hwaddr addr,
2993 uint64_t val, unsigned size)
2995 check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
2998 stb_phys(addr, val);
3001 stw_phys(addr, val);
3004 stl_phys(addr, val);
3010 static const MemoryRegionOps watch_mem_ops = {
3011 .read = watch_mem_read,
3012 .write = watch_mem_write,
3013 .endianness = DEVICE_NATIVE_ENDIAN,
3016 static uint64_t subpage_read(void *opaque, hwaddr addr,
3019 subpage_t *mmio = opaque;
3020 unsigned int idx = SUBPAGE_IDX(addr);
3021 MemoryRegionSection *section;
3022 #if defined(DEBUG_SUBPAGE)
3023 printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
3024 mmio, len, addr, idx);
3027 section = &phys_sections[mmio->sub_section[idx]];
3029 addr -= section->offset_within_address_space;
3030 addr += section->offset_within_region;
3031 return io_mem_read(section->mr, addr, len);
3034 static void subpage_write(void *opaque, hwaddr addr,
3035 uint64_t value, unsigned len)
3037 subpage_t *mmio = opaque;
3038 unsigned int idx = SUBPAGE_IDX(addr);
3039 MemoryRegionSection *section;
3040 #if defined(DEBUG_SUBPAGE)
3041 printf("%s: subpage %p len %d addr " TARGET_FMT_plx
3042 " idx %d value %"PRIx64"\n",
3043 __func__, mmio, len, addr, idx, value);
3046 section = &phys_sections[mmio->sub_section[idx]];
3048 addr -= section->offset_within_address_space;
3049 addr += section->offset_within_region;
3050 io_mem_write(section->mr, addr, value, len);
3053 static const MemoryRegionOps subpage_ops = {
3054 .read = subpage_read,
3055 .write = subpage_write,
3056 .endianness = DEVICE_NATIVE_ENDIAN,
3059 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
3062 ram_addr_t raddr = addr;
3063 void *ptr = qemu_get_ram_ptr(raddr);
3065 case 1: return ldub_p(ptr);
3066 case 2: return lduw_p(ptr);
3067 case 4: return ldl_p(ptr);
3072 static void subpage_ram_write(void *opaque, hwaddr addr,
3073 uint64_t value, unsigned size)
3075 ram_addr_t raddr = addr;
3076 void *ptr = qemu_get_ram_ptr(raddr);
3078 case 1: return stb_p(ptr, value);
3079 case 2: return stw_p(ptr, value);
3080 case 4: return stl_p(ptr, value);
3085 static const MemoryRegionOps subpage_ram_ops = {
3086 .read = subpage_ram_read,
3087 .write = subpage_ram_write,
3088 .endianness = DEVICE_NATIVE_ENDIAN,
3091 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
3096 if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
3098 idx = SUBPAGE_IDX(start);
3099 eidx = SUBPAGE_IDX(end);
3100 #if defined(DEBUG_SUBPAGE)
3101 printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
3102 mmio, start, end, idx, eidx, memory);
3104 if (memory_region_is_ram(phys_sections[section].mr)) {
3105 MemoryRegionSection new_section = phys_sections[section];
3106 new_section.mr = &io_mem_subpage_ram;
3107 section = phys_section_add(&new_section);
3109 for (; idx <= eidx; idx++) {
3110 mmio->sub_section[idx] = section;
3116 static subpage_t *subpage_init(hwaddr base)
3120 mmio = g_malloc0(sizeof(subpage_t));
3123 memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
3124 "subpage", TARGET_PAGE_SIZE);
3125 mmio->iomem.subpage = true;
3126 #if defined(DEBUG_SUBPAGE)
3127 printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
3128 mmio, base, TARGET_PAGE_SIZE, subpage_memory);
3130 subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
3135 static uint16_t dummy_section(MemoryRegion *mr)
3137 MemoryRegionSection section = {
3139 .offset_within_address_space = 0,
3140 .offset_within_region = 0,
3144 return phys_section_add(§ion);
3147 MemoryRegion *iotlb_to_region(hwaddr index)
3149 return phys_sections[index & ~TARGET_PAGE_MASK].mr;
3152 static void io_mem_init(void)
3154 memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
3155 memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
3156 memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
3157 "unassigned", UINT64_MAX);
3158 memory_region_init_io(&io_mem_notdirty, ¬dirty_mem_ops, NULL,
3159 "notdirty", UINT64_MAX);
3160 memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
3161 "subpage-ram", UINT64_MAX);
3162 memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
3163 "watch", UINT64_MAX);
3166 static void mem_begin(MemoryListener *listener)
3168 AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
3170 destroy_all_mappings(d);
3171 d->phys_map.ptr = PHYS_MAP_NODE_NIL;
3174 static void core_begin(MemoryListener *listener)
3176 phys_sections_clear();
3177 phys_section_unassigned = dummy_section(&io_mem_unassigned);
3178 phys_section_notdirty = dummy_section(&io_mem_notdirty);
3179 phys_section_rom = dummy_section(&io_mem_rom);
3180 phys_section_watch = dummy_section(&io_mem_watch);
3183 static void tcg_commit(MemoryListener *listener)
3187 /* since each CPU stores ram addresses in its TLB cache, we must
3188 reset the modified entries */
3190 for(env = first_cpu; env != NULL; env = env->next_cpu) {
3195 static void core_log_global_start(MemoryListener *listener)
3197 cpu_physical_memory_set_dirty_tracking(1);
3200 static void core_log_global_stop(MemoryListener *listener)
3202 cpu_physical_memory_set_dirty_tracking(0);
3205 static void io_region_add(MemoryListener *listener,
3206 MemoryRegionSection *section)
3208 MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
3210 mrio->mr = section->mr;
3211 mrio->offset = section->offset_within_region;
3212 iorange_init(&mrio->iorange, &memory_region_iorange_ops,
3213 section->offset_within_address_space, section->size);
3214 ioport_register(&mrio->iorange);
3217 static void io_region_del(MemoryListener *listener,
3218 MemoryRegionSection *section)
3220 isa_unassign_ioport(section->offset_within_address_space, section->size);
3223 static MemoryListener core_memory_listener = {
3224 .begin = core_begin,
3225 .log_global_start = core_log_global_start,
3226 .log_global_stop = core_log_global_stop,
3230 static MemoryListener io_memory_listener = {
3231 .region_add = io_region_add,
3232 .region_del = io_region_del,
3236 static MemoryListener tcg_memory_listener = {
3237 .commit = tcg_commit,
3240 void address_space_init_dispatch(AddressSpace *as)
3242 AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
3244 d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
3245 d->listener = (MemoryListener) {
3247 .region_add = mem_add,
3248 .region_nop = mem_add,
3252 memory_listener_register(&d->listener, as);
3255 void address_space_destroy_dispatch(AddressSpace *as)
3257 AddressSpaceDispatch *d = as->dispatch;
3259 memory_listener_unregister(&d->listener);
3260 destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
3262 as->dispatch = NULL;
3265 static void memory_map_init(void)
3267 system_memory = g_malloc(sizeof(*system_memory));
3268 memory_region_init(system_memory, "system", INT64_MAX);
3269 address_space_init(&address_space_memory, system_memory);
3270 address_space_memory.name = "memory";
3272 system_io = g_malloc(sizeof(*system_io));
3273 memory_region_init(system_io, "io", 65536);
3274 address_space_init(&address_space_io, system_io);
3275 address_space_io.name = "I/O";
3277 memory_listener_register(&core_memory_listener, &address_space_memory);
3278 memory_listener_register(&io_memory_listener, &address_space_io);
3279 memory_listener_register(&tcg_memory_listener, &address_space_memory);
3282 MemoryRegion *get_system_memory(void)
3284 return system_memory;
3287 MemoryRegion *get_system_io(void)
3292 #endif /* !defined(CONFIG_USER_ONLY) */
3294 /* physical memory access (slow version, mainly for debug) */
3295 #if defined(CONFIG_USER_ONLY)
3296 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
3297 uint8_t *buf, int len, int is_write)
3304 page = addr & TARGET_PAGE_MASK;
3305 l = (page + TARGET_PAGE_SIZE) - addr;
3308 flags = page_get_flags(page);
3309 if (!(flags & PAGE_VALID))
3312 if (!(flags & PAGE_WRITE))
3314 /* XXX: this code should not depend on lock_user */
3315 if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
3318 unlock_user(p, addr, l);
3320 if (!(flags & PAGE_READ))
3322 /* XXX: this code should not depend on lock_user */
3323 if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
3326 unlock_user(p, addr, 0);
3337 static void invalidate_and_set_dirty(hwaddr addr,
3340 if (!cpu_physical_memory_is_dirty(addr)) {
3341 /* invalidate code */
3342 tb_invalidate_phys_page_range(addr, addr + length, 0);
3344 cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
3346 xen_modified_memory(addr, length);
3349 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
3350 int len, bool is_write)
3352 AddressSpaceDispatch *d = as->dispatch;
3357 MemoryRegionSection *section;
3360 page = addr & TARGET_PAGE_MASK;
3361 l = (page + TARGET_PAGE_SIZE) - addr;
3364 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3367 if (!memory_region_is_ram(section->mr)) {
3369 addr1 = memory_region_section_addr(section, addr);
3370 /* XXX: could force cpu_single_env to NULL to avoid
3372 if (l >= 4 && ((addr1 & 3) == 0)) {
3373 /* 32 bit write access */
3375 io_mem_write(section->mr, addr1, val, 4);
3377 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3378 /* 16 bit write access */
3380 io_mem_write(section->mr, addr1, val, 2);
3383 /* 8 bit write access */
3385 io_mem_write(section->mr, addr1, val, 1);
3388 } else if (!section->readonly) {
3390 addr1 = memory_region_get_ram_addr(section->mr)
3391 + memory_region_section_addr(section, addr);
3393 ptr = qemu_get_ram_ptr(addr1);
3394 memcpy(ptr, buf, l);
3395 invalidate_and_set_dirty(addr1, l);
3396 qemu_put_ram_ptr(ptr);
3399 if (!(memory_region_is_ram(section->mr) ||
3400 memory_region_is_romd(section->mr))) {
3403 addr1 = memory_region_section_addr(section, addr);
3404 if (l >= 4 && ((addr1 & 3) == 0)) {
3405 /* 32 bit read access */
3406 val = io_mem_read(section->mr, addr1, 4);
3409 } else if (l >= 2 && ((addr1 & 1) == 0)) {
3410 /* 16 bit read access */
3411 val = io_mem_read(section->mr, addr1, 2);
3415 /* 8 bit read access */
3416 val = io_mem_read(section->mr, addr1, 1);
3422 ptr = qemu_get_ram_ptr(section->mr->ram_addr
3423 + memory_region_section_addr(section,
3425 memcpy(buf, ptr, l);
3426 qemu_put_ram_ptr(ptr);
3435 void address_space_write(AddressSpace *as, hwaddr addr,
3436 const uint8_t *buf, int len)
3438 address_space_rw(as, addr, (uint8_t *)buf, len, true);
3442 * address_space_read: read from an address space.
3444 * @as: #AddressSpace to be accessed
3445 * @addr: address within that address space
3446 * @buf: buffer with the data transferred
3448 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
3450 address_space_rw(as, addr, buf, len, false);
3454 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
3455 int len, int is_write)
3457 return address_space_rw(&address_space_memory, addr, buf, len, is_write);
3460 /* used for ROM loading : can write in RAM and ROM */
3461 void cpu_physical_memory_write_rom(hwaddr addr,
3462 const uint8_t *buf, int len)
3464 AddressSpaceDispatch *d = address_space_memory.dispatch;
3468 MemoryRegionSection *section;
3471 page = addr & TARGET_PAGE_MASK;
3472 l = (page + TARGET_PAGE_SIZE) - addr;
3475 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3477 if (!(memory_region_is_ram(section->mr) ||
3478 memory_region_is_romd(section->mr))) {
3481 unsigned long addr1;
3482 addr1 = memory_region_get_ram_addr(section->mr)
3483 + memory_region_section_addr(section, addr);
3485 ptr = qemu_get_ram_ptr(addr1);
3486 memcpy(ptr, buf, l);
3487 invalidate_and_set_dirty(addr1, l);
3488 qemu_put_ram_ptr(ptr);
3502 static BounceBuffer bounce;
3504 typedef struct MapClient {
3506 void (*callback)(void *opaque);
3507 QLIST_ENTRY(MapClient) link;
3510 static QLIST_HEAD(map_client_list, MapClient) map_client_list
3511 = QLIST_HEAD_INITIALIZER(map_client_list);
3513 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
3515 MapClient *client = g_malloc(sizeof(*client));
3517 client->opaque = opaque;
3518 client->callback = callback;
3519 QLIST_INSERT_HEAD(&map_client_list, client, link);
3523 void cpu_unregister_map_client(void *_client)
3525 MapClient *client = (MapClient *)_client;
3527 QLIST_REMOVE(client, link);
3531 static void cpu_notify_map_clients(void)
3535 while (!QLIST_EMPTY(&map_client_list)) {
3536 client = QLIST_FIRST(&map_client_list);
3537 client->callback(client->opaque);
3538 cpu_unregister_map_client(client);
3542 /* Map a physical memory region into a host virtual address.
3543 * May map a subset of the requested range, given by and returned in *plen.
3544 * May return NULL if resources needed to perform the mapping are exhausted.
3545 * Use only for reads OR writes - not for read-modify-write operations.
3546 * Use cpu_register_map_client() to know when retrying the map operation is
3547 * likely to succeed.
3549 void *address_space_map(AddressSpace *as,
3554 AddressSpaceDispatch *d = as->dispatch;
3559 MemoryRegionSection *section;
3560 ram_addr_t raddr = RAM_ADDR_MAX;
3565 page = addr & TARGET_PAGE_MASK;
3566 l = (page + TARGET_PAGE_SIZE) - addr;
3569 section = phys_page_find(d, page >> TARGET_PAGE_BITS);
3571 if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
3572 if (todo || bounce.buffer) {
3575 bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
3579 address_space_read(as, addr, bounce.buffer, l);
3583 return bounce.buffer;
3586 raddr = memory_region_get_ram_addr(section->mr)
3587 + memory_region_section_addr(section, addr);
3595 ret = qemu_ram_ptr_length(raddr, &rlen);
3600 /* Unmaps a memory region previously mapped by address_space_map().
3601 * Will also mark the memory as dirty if is_write == 1. access_len gives
3602 * the amount of memory that was actually read or written by the caller.
3604 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
3605 int is_write, hwaddr access_len)
3607 if (buffer != bounce.buffer) {
3609 ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
3610 while (access_len) {
3612 l = TARGET_PAGE_SIZE;
3615 invalidate_and_set_dirty(addr1, l);
3620 if (xen_enabled()) {
3621 xen_invalidate_map_cache_entry(buffer);
3626 address_space_write(as, bounce.addr, bounce.buffer, access_len);
3628 qemu_vfree(bounce.buffer);
3629 bounce.buffer = NULL;
3630 cpu_notify_map_clients();
3633 void *cpu_physical_memory_map(hwaddr addr,
3637 return address_space_map(&address_space_memory, addr, plen, is_write);
3640 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
3641 int is_write, hwaddr access_len)
3643 return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
3646 /* warning: addr must be aligned */
3647 static inline uint32_t ldl_phys_internal(hwaddr addr,
3648 enum device_endian endian)
3652 MemoryRegionSection *section;
3654 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3656 if (!(memory_region_is_ram(section->mr) ||
3657 memory_region_is_romd(section->mr))) {
3659 addr = memory_region_section_addr(section, addr);
3660 val = io_mem_read(section->mr, addr, 4);
3661 #if defined(TARGET_WORDS_BIGENDIAN)
3662 if (endian == DEVICE_LITTLE_ENDIAN) {
3666 if (endian == DEVICE_BIG_ENDIAN) {
3672 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3674 + memory_region_section_addr(section, addr));
3676 case DEVICE_LITTLE_ENDIAN:
3677 val = ldl_le_p(ptr);
3679 case DEVICE_BIG_ENDIAN:
3680 val = ldl_be_p(ptr);
3690 uint32_t ldl_phys(hwaddr addr)
3692 return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3695 uint32_t ldl_le_phys(hwaddr addr)
3697 return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3700 uint32_t ldl_be_phys(hwaddr addr)
3702 return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
3705 /* warning: addr must be aligned */
3706 static inline uint64_t ldq_phys_internal(hwaddr addr,
3707 enum device_endian endian)
3711 MemoryRegionSection *section;
3713 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3715 if (!(memory_region_is_ram(section->mr) ||
3716 memory_region_is_romd(section->mr))) {
3718 addr = memory_region_section_addr(section, addr);
3720 /* XXX This is broken when device endian != cpu endian.
3721 Fix and add "endian" variable check */
3722 #ifdef TARGET_WORDS_BIGENDIAN
3723 val = io_mem_read(section->mr, addr, 4) << 32;
3724 val |= io_mem_read(section->mr, addr + 4, 4);
3726 val = io_mem_read(section->mr, addr, 4);
3727 val |= io_mem_read(section->mr, addr + 4, 4) << 32;
3731 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3733 + memory_region_section_addr(section, addr));
3735 case DEVICE_LITTLE_ENDIAN:
3736 val = ldq_le_p(ptr);
3738 case DEVICE_BIG_ENDIAN:
3739 val = ldq_be_p(ptr);
3749 uint64_t ldq_phys(hwaddr addr)
3751 return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3754 uint64_t ldq_le_phys(hwaddr addr)
3756 return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3759 uint64_t ldq_be_phys(hwaddr addr)
3761 return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
3765 uint32_t ldub_phys(hwaddr addr)
3768 cpu_physical_memory_read(addr, &val, 1);
3772 /* warning: addr must be aligned */
3773 static inline uint32_t lduw_phys_internal(hwaddr addr,
3774 enum device_endian endian)
3778 MemoryRegionSection *section;
3780 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3782 if (!(memory_region_is_ram(section->mr) ||
3783 memory_region_is_romd(section->mr))) {
3785 addr = memory_region_section_addr(section, addr);
3786 val = io_mem_read(section->mr, addr, 2);
3787 #if defined(TARGET_WORDS_BIGENDIAN)
3788 if (endian == DEVICE_LITTLE_ENDIAN) {
3792 if (endian == DEVICE_BIG_ENDIAN) {
3798 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3800 + memory_region_section_addr(section, addr));
3802 case DEVICE_LITTLE_ENDIAN:
3803 val = lduw_le_p(ptr);
3805 case DEVICE_BIG_ENDIAN:
3806 val = lduw_be_p(ptr);
3816 uint32_t lduw_phys(hwaddr addr)
3818 return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
3821 uint32_t lduw_le_phys(hwaddr addr)
3823 return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
3826 uint32_t lduw_be_phys(hwaddr addr)
3828 return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
3831 /* warning: addr must be aligned. The ram page is not masked as dirty
3832 and the code inside is not invalidated. It is useful if the dirty
3833 bits are used to track modified PTEs */
3834 void stl_phys_notdirty(hwaddr addr, uint32_t val)
3837 MemoryRegionSection *section;
3839 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3841 if (!memory_region_is_ram(section->mr) || section->readonly) {
3842 addr = memory_region_section_addr(section, addr);
3843 if (memory_region_is_ram(section->mr)) {
3844 section = &phys_sections[phys_section_rom];
3846 io_mem_write(section->mr, addr, val, 4);
3848 unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
3850 + memory_region_section_addr(section, addr);
3851 ptr = qemu_get_ram_ptr(addr1);
3854 if (unlikely(in_migration)) {
3855 if (!cpu_physical_memory_is_dirty(addr1)) {
3856 /* invalidate code */
3857 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
3859 cpu_physical_memory_set_dirty_flags(
3860 addr1, (0xff & ~CODE_DIRTY_FLAG));
3866 void stq_phys_notdirty(hwaddr addr, uint64_t val)
3869 MemoryRegionSection *section;
3871 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3873 if (!memory_region_is_ram(section->mr) || section->readonly) {
3874 addr = memory_region_section_addr(section, addr);
3875 if (memory_region_is_ram(section->mr)) {
3876 section = &phys_sections[phys_section_rom];
3878 #ifdef TARGET_WORDS_BIGENDIAN
3879 io_mem_write(section->mr, addr, val >> 32, 4);
3880 io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
3882 io_mem_write(section->mr, addr, (uint32_t)val, 4);
3883 io_mem_write(section->mr, addr + 4, val >> 32, 4);
3886 ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
3888 + memory_region_section_addr(section, addr));
3893 /* warning: addr must be aligned */
3894 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
3895 enum device_endian endian)
3898 MemoryRegionSection *section;
3900 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3902 if (!memory_region_is_ram(section->mr) || section->readonly) {
3903 addr = memory_region_section_addr(section, addr);
3904 if (memory_region_is_ram(section->mr)) {
3905 section = &phys_sections[phys_section_rom];
3907 #if defined(TARGET_WORDS_BIGENDIAN)
3908 if (endian == DEVICE_LITTLE_ENDIAN) {
3912 if (endian == DEVICE_BIG_ENDIAN) {
3916 io_mem_write(section->mr, addr, val, 4);
3918 unsigned long addr1;
3919 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3920 + memory_region_section_addr(section, addr);
3922 ptr = qemu_get_ram_ptr(addr1);
3924 case DEVICE_LITTLE_ENDIAN:
3927 case DEVICE_BIG_ENDIAN:
3934 invalidate_and_set_dirty(addr1, 4);
3938 void stl_phys(hwaddr addr, uint32_t val)
3940 stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
3943 void stl_le_phys(hwaddr addr, uint32_t val)
3945 stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
3948 void stl_be_phys(hwaddr addr, uint32_t val)
3950 stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
3954 void stb_phys(hwaddr addr, uint32_t val)
3957 cpu_physical_memory_write(addr, &v, 1);
3960 /* warning: addr must be aligned */
3961 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
3962 enum device_endian endian)
3965 MemoryRegionSection *section;
3967 section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
3969 if (!memory_region_is_ram(section->mr) || section->readonly) {
3970 addr = memory_region_section_addr(section, addr);
3971 if (memory_region_is_ram(section->mr)) {
3972 section = &phys_sections[phys_section_rom];
3974 #if defined(TARGET_WORDS_BIGENDIAN)
3975 if (endian == DEVICE_LITTLE_ENDIAN) {
3979 if (endian == DEVICE_BIG_ENDIAN) {
3983 io_mem_write(section->mr, addr, val, 2);
3985 unsigned long addr1;
3986 addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
3987 + memory_region_section_addr(section, addr);
3989 ptr = qemu_get_ram_ptr(addr1);
3991 case DEVICE_LITTLE_ENDIAN:
3994 case DEVICE_BIG_ENDIAN:
4001 invalidate_and_set_dirty(addr1, 2);
4005 void stw_phys(hwaddr addr, uint32_t val)
4007 stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
4010 void stw_le_phys(hwaddr addr, uint32_t val)
4012 stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
4015 void stw_be_phys(hwaddr addr, uint32_t val)
4017 stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
4021 void stq_phys(hwaddr addr, uint64_t val)
4024 cpu_physical_memory_write(addr, &val, 8);
4027 void stq_le_phys(hwaddr addr, uint64_t val)
4029 val = cpu_to_le64(val);
4030 cpu_physical_memory_write(addr, &val, 8);
4033 void stq_be_phys(hwaddr addr, uint64_t val)
4035 val = cpu_to_be64(val);
4036 cpu_physical_memory_write(addr, &val, 8);
4039 /* virtual memory access for debug (includes writing to ROM) */
4040 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
4041 uint8_t *buf, int len, int is_write)
4048 page = addr & TARGET_PAGE_MASK;
4049 phys_addr = cpu_get_phys_page_debug(env, page);
4050 /* if no physical page mapped, return an error */
4051 if (phys_addr == -1)
4053 l = (page + TARGET_PAGE_SIZE) - addr;
4056 phys_addr += (addr & ~TARGET_PAGE_MASK);
4058 cpu_physical_memory_write_rom(phys_addr, buf, l);
4060 cpu_physical_memory_rw(phys_addr, buf, l, is_write);
4069 /* in deterministic execution mode, instructions doing device I/Os
4070 must be at the end of the TB */
4071 void cpu_io_recompile(CPUArchState *env, uintptr_t retaddr)
4073 TranslationBlock *tb;
4075 target_ulong pc, cs_base;
4078 tb = tb_find_pc(retaddr);
4080 cpu_abort(env, "cpu_io_recompile: could not find TB for pc=%p",
4083 n = env->icount_decr.u16.low + tb->icount;
4084 cpu_restore_state(tb, env, retaddr);
4085 /* Calculate how many instructions had been executed before the fault
4087 n = n - env->icount_decr.u16.low;
4088 /* Generate a new TB ending on the I/O insn. */
4090 /* On MIPS and SH, delay slot instructions can only be restarted if
4091 they were already the first instruction in the TB. If this is not
4092 the first instruction in a TB then re-execute the preceding
4094 #if defined(TARGET_MIPS)
4095 if ((env->hflags & MIPS_HFLAG_BMASK) != 0 && n > 1) {
4096 env->active_tc.PC -= 4;
4097 env->icount_decr.u16.low++;
4098 env->hflags &= ~MIPS_HFLAG_BMASK;
4100 #elif defined(TARGET_SH4)
4101 if ((env->flags & ((DELAY_SLOT | DELAY_SLOT_CONDITIONAL))) != 0
4104 env->icount_decr.u16.low++;
4105 env->flags &= ~(DELAY_SLOT | DELAY_SLOT_CONDITIONAL);
4108 /* This should never happen. */
4109 if (n > CF_COUNT_MASK)
4110 cpu_abort(env, "TB too big during recompile");
4112 cflags = n | CF_LAST_IO;
4114 cs_base = tb->cs_base;
4116 tb_phys_invalidate(tb, -1);
4117 /* FIXME: In theory this could raise an exception. In practice
4118 we have already translated the block once so it's probably ok. */
4119 tb_gen_code(env, pc, cs_base, flags, cflags);
4120 /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
4121 the first in the TB) then we end up generating a whole new TB and
4122 repeating the fault, which is horribly inefficient.
4123 Better would be to execute just this insn uncached, or generate a
4125 cpu_resume_from_signal(env, NULL);
4128 #if !defined(CONFIG_USER_ONLY)
4130 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
4132 int i, target_code_size, max_target_code_size;
4133 int direct_jmp_count, direct_jmp2_count, cross_page;
4134 TranslationBlock *tb;
4136 target_code_size = 0;
4137 max_target_code_size = 0;
4139 direct_jmp_count = 0;
4140 direct_jmp2_count = 0;
4141 for(i = 0; i < nb_tbs; i++) {
4143 target_code_size += tb->size;
4144 if (tb->size > max_target_code_size)
4145 max_target_code_size = tb->size;
4146 if (tb->page_addr[1] != -1)
4148 if (tb->tb_next_offset[0] != 0xffff) {
4150 if (tb->tb_next_offset[1] != 0xffff) {
4151 direct_jmp2_count++;
4155 /* XXX: avoid using doubles ? */
4156 cpu_fprintf(f, "Translation buffer state:\n");
4157 cpu_fprintf(f, "gen code size %td/%zd\n",
4158 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
4159 cpu_fprintf(f, "TB count %d/%d\n",
4160 nb_tbs, code_gen_max_blocks);
4161 cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
4162 nb_tbs ? target_code_size / nb_tbs : 0,
4163 max_target_code_size);
4164 cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
4165 nb_tbs ? (code_gen_ptr - code_gen_buffer) / nb_tbs : 0,
4166 target_code_size ? (double) (code_gen_ptr - code_gen_buffer) / target_code_size : 0);
4167 cpu_fprintf(f, "cross page TB count %d (%d%%)\n",
4169 nb_tbs ? (cross_page * 100) / nb_tbs : 0);
4170 cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
4172 nb_tbs ? (direct_jmp_count * 100) / nb_tbs : 0,
4174 nb_tbs ? (direct_jmp2_count * 100) / nb_tbs : 0);
4175 cpu_fprintf(f, "\nStatistics:\n");
4176 cpu_fprintf(f, "TB flush count %d\n", tb_flush_count);
4177 cpu_fprintf(f, "TB invalidate count %d\n", tb_phys_invalidate_count);
4178 cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
4179 tcg_dump_info(f, cpu_fprintf);
4183 * A helper function for the _utterly broken_ virtio device model to find out if
4184 * it's running on a big endian machine. Don't do this at home kids!
4186 bool virtio_is_big_endian(void);
4187 bool virtio_is_big_endian(void)
4189 #if defined(TARGET_WORDS_BIGENDIAN)
4198 #ifndef CONFIG_USER_ONLY
4199 bool cpu_physical_memory_is_io(hwaddr phys_addr)
4201 MemoryRegionSection *section;
4203 section = phys_page_find(address_space_memory.dispatch,
4204 phys_addr >> TARGET_PAGE_BITS);
4206 return !(memory_region_is_ram(section->mr) ||
4207 memory_region_is_romd(section->mr));