]> Git Repo - qemu.git/blob - exec.c
block-migration: remove useless calls to blk_mig_cleanup
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUArchState *env = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     env->interrupt_request &= ~0x01;
231     tlb_flush(env, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUArchState),
244         VMSTATE_UINT32(interrupt_request, CPUArchState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #endif
249
250 CPUState *qemu_get_cpu(int index)
251 {
252     CPUArchState *env = first_cpu;
253     CPUState *cpu = NULL;
254
255     while (env) {
256         cpu = ENV_GET_CPU(env);
257         if (cpu->cpu_index == index) {
258             break;
259         }
260         env = env->next_cpu;
261     }
262
263     return cpu;
264 }
265
266 void cpu_exec_init(CPUArchState *env)
267 {
268     CPUState *cpu = ENV_GET_CPU(env);
269     CPUArchState **penv;
270     int cpu_index;
271
272 #if defined(CONFIG_USER_ONLY)
273     cpu_list_lock();
274 #endif
275     env->next_cpu = NULL;
276     penv = &first_cpu;
277     cpu_index = 0;
278     while (*penv != NULL) {
279         penv = &(*penv)->next_cpu;
280         cpu_index++;
281     }
282     cpu->cpu_index = cpu_index;
283     cpu->numa_node = 0;
284     QTAILQ_INIT(&env->breakpoints);
285     QTAILQ_INIT(&env->watchpoints);
286 #ifndef CONFIG_USER_ONLY
287     cpu->thread_id = qemu_get_thread_id();
288 #endif
289     *penv = env;
290 #if defined(CONFIG_USER_ONLY)
291     cpu_list_unlock();
292 #endif
293 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
294     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
295     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
296                     cpu_save, cpu_load, env);
297 #endif
298 }
299
300 #if defined(TARGET_HAS_ICE)
301 #if defined(CONFIG_USER_ONLY)
302 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
303 {
304     tb_invalidate_phys_page_range(pc, pc + 1, 0);
305 }
306 #else
307 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
308 {
309     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
310             (pc & ~TARGET_PAGE_MASK));
311 }
312 #endif
313 #endif /* TARGET_HAS_ICE */
314
315 #if defined(CONFIG_USER_ONLY)
316 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
317
318 {
319 }
320
321 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
322                           int flags, CPUWatchpoint **watchpoint)
323 {
324     return -ENOSYS;
325 }
326 #else
327 /* Add a watchpoint.  */
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     target_ulong len_mask = ~(len - 1);
332     CPUWatchpoint *wp;
333
334     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
335     if ((len & (len - 1)) || (addr & ~len_mask) ||
336             len == 0 || len > TARGET_PAGE_SIZE) {
337         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
338                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
339         return -EINVAL;
340     }
341     wp = g_malloc(sizeof(*wp));
342
343     wp->vaddr = addr;
344     wp->len_mask = len_mask;
345     wp->flags = flags;
346
347     /* keep all GDB-injected watchpoints in front */
348     if (flags & BP_GDB)
349         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
350     else
351         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
352
353     tlb_flush_page(env, addr);
354
355     if (watchpoint)
356         *watchpoint = wp;
357     return 0;
358 }
359
360 /* Remove a specific watchpoint.  */
361 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
362                           int flags)
363 {
364     target_ulong len_mask = ~(len - 1);
365     CPUWatchpoint *wp;
366
367     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
368         if (addr == wp->vaddr && len_mask == wp->len_mask
369                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
370             cpu_watchpoint_remove_by_ref(env, wp);
371             return 0;
372         }
373     }
374     return -ENOENT;
375 }
376
377 /* Remove a specific watchpoint by reference.  */
378 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
379 {
380     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
381
382     tlb_flush_page(env, watchpoint->vaddr);
383
384     g_free(watchpoint);
385 }
386
387 /* Remove all matching watchpoints.  */
388 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
389 {
390     CPUWatchpoint *wp, *next;
391
392     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
393         if (wp->flags & mask)
394             cpu_watchpoint_remove_by_ref(env, wp);
395     }
396 }
397 #endif
398
399 /* Add a breakpoint.  */
400 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
401                           CPUBreakpoint **breakpoint)
402 {
403 #if defined(TARGET_HAS_ICE)
404     CPUBreakpoint *bp;
405
406     bp = g_malloc(sizeof(*bp));
407
408     bp->pc = pc;
409     bp->flags = flags;
410
411     /* keep all GDB-injected breakpoints in front */
412     if (flags & BP_GDB)
413         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
414     else
415         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
416
417     breakpoint_invalidate(env, pc);
418
419     if (breakpoint)
420         *breakpoint = bp;
421     return 0;
422 #else
423     return -ENOSYS;
424 #endif
425 }
426
427 /* Remove a specific breakpoint.  */
428 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
429 {
430 #if defined(TARGET_HAS_ICE)
431     CPUBreakpoint *bp;
432
433     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
434         if (bp->pc == pc && bp->flags == flags) {
435             cpu_breakpoint_remove_by_ref(env, bp);
436             return 0;
437         }
438     }
439     return -ENOENT;
440 #else
441     return -ENOSYS;
442 #endif
443 }
444
445 /* Remove a specific breakpoint by reference.  */
446 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
447 {
448 #if defined(TARGET_HAS_ICE)
449     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
450
451     breakpoint_invalidate(env, breakpoint->pc);
452
453     g_free(breakpoint);
454 #endif
455 }
456
457 /* Remove all matching breakpoints. */
458 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
459 {
460 #if defined(TARGET_HAS_ICE)
461     CPUBreakpoint *bp, *next;
462
463     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
464         if (bp->flags & mask)
465             cpu_breakpoint_remove_by_ref(env, bp);
466     }
467 #endif
468 }
469
470 /* enable or disable single step mode. EXCP_DEBUG is returned by the
471    CPU loop after each instruction */
472 void cpu_single_step(CPUArchState *env, int enabled)
473 {
474 #if defined(TARGET_HAS_ICE)
475     if (env->singlestep_enabled != enabled) {
476         env->singlestep_enabled = enabled;
477         if (kvm_enabled())
478             kvm_update_guest_debug(env, 0);
479         else {
480             /* must flush all the translated code to avoid inconsistencies */
481             /* XXX: only flush what is necessary */
482             tb_flush(env);
483         }
484     }
485 #endif
486 }
487
488 void cpu_reset_interrupt(CPUArchState *env, int mask)
489 {
490     env->interrupt_request &= ~mask;
491 }
492
493 void cpu_exit(CPUArchState *env)
494 {
495     CPUState *cpu = ENV_GET_CPU(env);
496
497     cpu->exit_request = 1;
498     cpu->tcg_exit_req = 1;
499 }
500
501 void cpu_abort(CPUArchState *env, const char *fmt, ...)
502 {
503     va_list ap;
504     va_list ap2;
505
506     va_start(ap, fmt);
507     va_copy(ap2, ap);
508     fprintf(stderr, "qemu: fatal: ");
509     vfprintf(stderr, fmt, ap);
510     fprintf(stderr, "\n");
511     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
512     if (qemu_log_enabled()) {
513         qemu_log("qemu: fatal: ");
514         qemu_log_vprintf(fmt, ap2);
515         qemu_log("\n");
516         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
517         qemu_log_flush();
518         qemu_log_close();
519     }
520     va_end(ap2);
521     va_end(ap);
522 #if defined(CONFIG_USER_ONLY)
523     {
524         struct sigaction act;
525         sigfillset(&act.sa_mask);
526         act.sa_handler = SIG_DFL;
527         sigaction(SIGABRT, &act, NULL);
528     }
529 #endif
530     abort();
531 }
532
533 CPUArchState *cpu_copy(CPUArchState *env)
534 {
535     CPUArchState *new_env = cpu_init(env->cpu_model_str);
536     CPUArchState *next_cpu = new_env->next_cpu;
537 #if defined(TARGET_HAS_ICE)
538     CPUBreakpoint *bp;
539     CPUWatchpoint *wp;
540 #endif
541
542     memcpy(new_env, env, sizeof(CPUArchState));
543
544     /* Preserve chaining. */
545     new_env->next_cpu = next_cpu;
546
547     /* Clone all break/watchpoints.
548        Note: Once we support ptrace with hw-debug register access, make sure
549        BP_CPU break/watchpoints are handled correctly on clone. */
550     QTAILQ_INIT(&env->breakpoints);
551     QTAILQ_INIT(&env->watchpoints);
552 #if defined(TARGET_HAS_ICE)
553     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
554         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
555     }
556     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
557         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
558                               wp->flags, NULL);
559     }
560 #endif
561
562     return new_env;
563 }
564
565 #if !defined(CONFIG_USER_ONLY)
566 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
567                                       uintptr_t length)
568 {
569     uintptr_t start1;
570
571     /* we modify the TLB cache so that the dirty bit will be set again
572        when accessing the range */
573     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
574     /* Check that we don't span multiple blocks - this breaks the
575        address comparisons below.  */
576     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
577             != (end - 1) - start) {
578         abort();
579     }
580     cpu_tlb_reset_dirty_all(start1, length);
581
582 }
583
584 /* Note: start and end must be within the same ram block.  */
585 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
586                                      int dirty_flags)
587 {
588     uintptr_t length;
589
590     start &= TARGET_PAGE_MASK;
591     end = TARGET_PAGE_ALIGN(end);
592
593     length = end - start;
594     if (length == 0)
595         return;
596     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
597
598     if (tcg_enabled()) {
599         tlb_reset_dirty_range_all(start, end, length);
600     }
601 }
602
603 static int cpu_physical_memory_set_dirty_tracking(int enable)
604 {
605     int ret = 0;
606     in_migration = enable;
607     return ret;
608 }
609
610 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
611                                                    MemoryRegionSection *section,
612                                                    target_ulong vaddr,
613                                                    hwaddr paddr,
614                                                    int prot,
615                                                    target_ulong *address)
616 {
617     hwaddr iotlb;
618     CPUWatchpoint *wp;
619
620     if (memory_region_is_ram(section->mr)) {
621         /* Normal RAM.  */
622         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
623             + memory_region_section_addr(section, paddr);
624         if (!section->readonly) {
625             iotlb |= phys_section_notdirty;
626         } else {
627             iotlb |= phys_section_rom;
628         }
629     } else {
630         /* IO handlers are currently passed a physical address.
631            It would be nice to pass an offset from the base address
632            of that region.  This would avoid having to special case RAM,
633            and avoid full address decoding in every device.
634            We can't use the high bits of pd for this because
635            IO_MEM_ROMD uses these as a ram address.  */
636         iotlb = section - phys_sections;
637         iotlb += memory_region_section_addr(section, paddr);
638     }
639
640     /* Make accesses to pages with watchpoints go via the
641        watchpoint trap routines.  */
642     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
643         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
644             /* Avoid trapping reads of pages with a write breakpoint. */
645             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
646                 iotlb = phys_section_watch + paddr;
647                 *address |= TLB_MMIO;
648                 break;
649             }
650         }
651     }
652
653     return iotlb;
654 }
655 #endif /* defined(CONFIG_USER_ONLY) */
656
657 #if !defined(CONFIG_USER_ONLY)
658
659 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
660 typedef struct subpage_t {
661     MemoryRegion iomem;
662     hwaddr base;
663     uint16_t sub_section[TARGET_PAGE_SIZE];
664 } subpage_t;
665
666 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
667                              uint16_t section);
668 static subpage_t *subpage_init(hwaddr base);
669 static void destroy_page_desc(uint16_t section_index)
670 {
671     MemoryRegionSection *section = &phys_sections[section_index];
672     MemoryRegion *mr = section->mr;
673
674     if (mr->subpage) {
675         subpage_t *subpage = container_of(mr, subpage_t, iomem);
676         memory_region_destroy(&subpage->iomem);
677         g_free(subpage);
678     }
679 }
680
681 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
682 {
683     unsigned i;
684     PhysPageEntry *p;
685
686     if (lp->ptr == PHYS_MAP_NODE_NIL) {
687         return;
688     }
689
690     p = phys_map_nodes[lp->ptr];
691     for (i = 0; i < L2_SIZE; ++i) {
692         if (!p[i].is_leaf) {
693             destroy_l2_mapping(&p[i], level - 1);
694         } else {
695             destroy_page_desc(p[i].ptr);
696         }
697     }
698     lp->is_leaf = 0;
699     lp->ptr = PHYS_MAP_NODE_NIL;
700 }
701
702 static void destroy_all_mappings(AddressSpaceDispatch *d)
703 {
704     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
705     phys_map_nodes_reset();
706 }
707
708 static uint16_t phys_section_add(MemoryRegionSection *section)
709 {
710     if (phys_sections_nb == phys_sections_nb_alloc) {
711         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
712         phys_sections = g_renew(MemoryRegionSection, phys_sections,
713                                 phys_sections_nb_alloc);
714     }
715     phys_sections[phys_sections_nb] = *section;
716     return phys_sections_nb++;
717 }
718
719 static void phys_sections_clear(void)
720 {
721     phys_sections_nb = 0;
722 }
723
724 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
725 {
726     subpage_t *subpage;
727     hwaddr base = section->offset_within_address_space
728         & TARGET_PAGE_MASK;
729     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
730     MemoryRegionSection subsection = {
731         .offset_within_address_space = base,
732         .size = TARGET_PAGE_SIZE,
733     };
734     hwaddr start, end;
735
736     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
737
738     if (!(existing->mr->subpage)) {
739         subpage = subpage_init(base);
740         subsection.mr = &subpage->iomem;
741         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
742                       phys_section_add(&subsection));
743     } else {
744         subpage = container_of(existing->mr, subpage_t, iomem);
745     }
746     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
747     end = start + section->size - 1;
748     subpage_register(subpage, start, end, phys_section_add(section));
749 }
750
751
752 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
753 {
754     hwaddr start_addr = section->offset_within_address_space;
755     ram_addr_t size = section->size;
756     hwaddr addr;
757     uint16_t section_index = phys_section_add(section);
758
759     assert(size);
760
761     addr = start_addr;
762     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
763                   section_index);
764 }
765
766 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
767 {
768     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
769     MemoryRegionSection now = *section, remain = *section;
770
771     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
772         || (now.size < TARGET_PAGE_SIZE)) {
773         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
774                        - now.offset_within_address_space,
775                        now.size);
776         register_subpage(d, &now);
777         remain.size -= now.size;
778         remain.offset_within_address_space += now.size;
779         remain.offset_within_region += now.size;
780     }
781     while (remain.size >= TARGET_PAGE_SIZE) {
782         now = remain;
783         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
784             now.size = TARGET_PAGE_SIZE;
785             register_subpage(d, &now);
786         } else {
787             now.size &= TARGET_PAGE_MASK;
788             register_multipage(d, &now);
789         }
790         remain.size -= now.size;
791         remain.offset_within_address_space += now.size;
792         remain.offset_within_region += now.size;
793     }
794     now = remain;
795     if (now.size) {
796         register_subpage(d, &now);
797     }
798 }
799
800 void qemu_flush_coalesced_mmio_buffer(void)
801 {
802     if (kvm_enabled())
803         kvm_flush_coalesced_mmio_buffer();
804 }
805
806 void qemu_mutex_lock_ramlist(void)
807 {
808     qemu_mutex_lock(&ram_list.mutex);
809 }
810
811 void qemu_mutex_unlock_ramlist(void)
812 {
813     qemu_mutex_unlock(&ram_list.mutex);
814 }
815
816 #if defined(__linux__) && !defined(TARGET_S390X)
817
818 #include <sys/vfs.h>
819
820 #define HUGETLBFS_MAGIC       0x958458f6
821
822 static long gethugepagesize(const char *path)
823 {
824     struct statfs fs;
825     int ret;
826
827     do {
828         ret = statfs(path, &fs);
829     } while (ret != 0 && errno == EINTR);
830
831     if (ret != 0) {
832         perror(path);
833         return 0;
834     }
835
836     if (fs.f_type != HUGETLBFS_MAGIC)
837         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
838
839     return fs.f_bsize;
840 }
841
842 static void *file_ram_alloc(RAMBlock *block,
843                             ram_addr_t memory,
844                             const char *path)
845 {
846     char *filename;
847     void *area;
848     int fd;
849 #ifdef MAP_POPULATE
850     int flags;
851 #endif
852     unsigned long hpagesize;
853
854     hpagesize = gethugepagesize(path);
855     if (!hpagesize) {
856         return NULL;
857     }
858
859     if (memory < hpagesize) {
860         return NULL;
861     }
862
863     if (kvm_enabled() && !kvm_has_sync_mmu()) {
864         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
865         return NULL;
866     }
867
868     filename = g_strdup_printf("%s/qemu_back_mem.XXXXXX", path);
869
870     fd = mkstemp(filename);
871     if (fd < 0) {
872         perror("unable to create backing store for hugepages");
873         g_free(filename);
874         return NULL;
875     }
876     unlink(filename);
877     g_free(filename);
878
879     memory = (memory+hpagesize-1) & ~(hpagesize-1);
880
881     /*
882      * ftruncate is not supported by hugetlbfs in older
883      * hosts, so don't bother bailing out on errors.
884      * If anything goes wrong with it under other filesystems,
885      * mmap will fail.
886      */
887     if (ftruncate(fd, memory))
888         perror("ftruncate");
889
890 #ifdef MAP_POPULATE
891     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
892      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
893      * to sidestep this quirk.
894      */
895     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
896     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
897 #else
898     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
899 #endif
900     if (area == MAP_FAILED) {
901         perror("file_ram_alloc: can't mmap RAM pages");
902         close(fd);
903         return (NULL);
904     }
905     block->fd = fd;
906     return area;
907 }
908 #endif
909
910 static ram_addr_t find_ram_offset(ram_addr_t size)
911 {
912     RAMBlock *block, *next_block;
913     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
914
915     if (QTAILQ_EMPTY(&ram_list.blocks))
916         return 0;
917
918     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
919         ram_addr_t end, next = RAM_ADDR_MAX;
920
921         end = block->offset + block->length;
922
923         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
924             if (next_block->offset >= end) {
925                 next = MIN(next, next_block->offset);
926             }
927         }
928         if (next - end >= size && next - end < mingap) {
929             offset = end;
930             mingap = next - end;
931         }
932     }
933
934     if (offset == RAM_ADDR_MAX) {
935         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
936                 (uint64_t)size);
937         abort();
938     }
939
940     return offset;
941 }
942
943 ram_addr_t last_ram_offset(void)
944 {
945     RAMBlock *block;
946     ram_addr_t last = 0;
947
948     QTAILQ_FOREACH(block, &ram_list.blocks, next)
949         last = MAX(last, block->offset + block->length);
950
951     return last;
952 }
953
954 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
955 {
956     int ret;
957     QemuOpts *machine_opts;
958
959     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
960     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
961     if (machine_opts &&
962         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
963         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
964         if (ret) {
965             perror("qemu_madvise");
966             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
967                             "but dump_guest_core=off specified\n");
968         }
969     }
970 }
971
972 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
973 {
974     RAMBlock *new_block, *block;
975
976     new_block = NULL;
977     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
978         if (block->offset == addr) {
979             new_block = block;
980             break;
981         }
982     }
983     assert(new_block);
984     assert(!new_block->idstr[0]);
985
986     if (dev) {
987         char *id = qdev_get_dev_path(dev);
988         if (id) {
989             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
990             g_free(id);
991         }
992     }
993     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
994
995     /* This assumes the iothread lock is taken here too.  */
996     qemu_mutex_lock_ramlist();
997     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
998         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
999             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1000                     new_block->idstr);
1001             abort();
1002         }
1003     }
1004     qemu_mutex_unlock_ramlist();
1005 }
1006
1007 static int memory_try_enable_merging(void *addr, size_t len)
1008 {
1009     QemuOpts *opts;
1010
1011     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1012     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1013         /* disabled by the user */
1014         return 0;
1015     }
1016
1017     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1018 }
1019
1020 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1021                                    MemoryRegion *mr)
1022 {
1023     RAMBlock *block, *new_block;
1024
1025     size = TARGET_PAGE_ALIGN(size);
1026     new_block = g_malloc0(sizeof(*new_block));
1027
1028     /* This assumes the iothread lock is taken here too.  */
1029     qemu_mutex_lock_ramlist();
1030     new_block->mr = mr;
1031     new_block->offset = find_ram_offset(size);
1032     if (host) {
1033         new_block->host = host;
1034         new_block->flags |= RAM_PREALLOC_MASK;
1035     } else {
1036         if (mem_path) {
1037 #if defined (__linux__) && !defined(TARGET_S390X)
1038             new_block->host = file_ram_alloc(new_block, size, mem_path);
1039             if (!new_block->host) {
1040                 new_block->host = qemu_vmalloc(size);
1041                 memory_try_enable_merging(new_block->host, size);
1042             }
1043 #else
1044             fprintf(stderr, "-mem-path option unsupported\n");
1045             exit(1);
1046 #endif
1047         } else {
1048             if (xen_enabled()) {
1049                 xen_ram_alloc(new_block->offset, size, mr);
1050             } else if (kvm_enabled()) {
1051                 /* some s390/kvm configurations have special constraints */
1052                 new_block->host = kvm_vmalloc(size);
1053             } else {
1054                 new_block->host = qemu_vmalloc(size);
1055             }
1056             memory_try_enable_merging(new_block->host, size);
1057         }
1058     }
1059     new_block->length = size;
1060
1061     /* Keep the list sorted from biggest to smallest block.  */
1062     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1063         if (block->length < new_block->length) {
1064             break;
1065         }
1066     }
1067     if (block) {
1068         QTAILQ_INSERT_BEFORE(block, new_block, next);
1069     } else {
1070         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1071     }
1072     ram_list.mru_block = NULL;
1073
1074     ram_list.version++;
1075     qemu_mutex_unlock_ramlist();
1076
1077     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1078                                        last_ram_offset() >> TARGET_PAGE_BITS);
1079     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1080            0, size >> TARGET_PAGE_BITS);
1081     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1082
1083     qemu_ram_setup_dump(new_block->host, size);
1084     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1085
1086     if (kvm_enabled())
1087         kvm_setup_guest_memory(new_block->host, size);
1088
1089     return new_block->offset;
1090 }
1091
1092 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1093 {
1094     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1095 }
1096
1097 void qemu_ram_free_from_ptr(ram_addr_t addr)
1098 {
1099     RAMBlock *block;
1100
1101     /* This assumes the iothread lock is taken here too.  */
1102     qemu_mutex_lock_ramlist();
1103     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1104         if (addr == block->offset) {
1105             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1106             ram_list.mru_block = NULL;
1107             ram_list.version++;
1108             g_free(block);
1109             break;
1110         }
1111     }
1112     qemu_mutex_unlock_ramlist();
1113 }
1114
1115 void qemu_ram_free(ram_addr_t addr)
1116 {
1117     RAMBlock *block;
1118
1119     /* This assumes the iothread lock is taken here too.  */
1120     qemu_mutex_lock_ramlist();
1121     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1122         if (addr == block->offset) {
1123             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1124             ram_list.mru_block = NULL;
1125             ram_list.version++;
1126             if (block->flags & RAM_PREALLOC_MASK) {
1127                 ;
1128             } else if (mem_path) {
1129 #if defined (__linux__) && !defined(TARGET_S390X)
1130                 if (block->fd) {
1131                     munmap(block->host, block->length);
1132                     close(block->fd);
1133                 } else {
1134                     qemu_vfree(block->host);
1135                 }
1136 #else
1137                 abort();
1138 #endif
1139             } else {
1140 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1141                 munmap(block->host, block->length);
1142 #else
1143                 if (xen_enabled()) {
1144                     xen_invalidate_map_cache_entry(block->host);
1145                 } else {
1146                     qemu_vfree(block->host);
1147                 }
1148 #endif
1149             }
1150             g_free(block);
1151             break;
1152         }
1153     }
1154     qemu_mutex_unlock_ramlist();
1155
1156 }
1157
1158 #ifndef _WIN32
1159 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1160 {
1161     RAMBlock *block;
1162     ram_addr_t offset;
1163     int flags;
1164     void *area, *vaddr;
1165
1166     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167         offset = addr - block->offset;
1168         if (offset < block->length) {
1169             vaddr = block->host + offset;
1170             if (block->flags & RAM_PREALLOC_MASK) {
1171                 ;
1172             } else {
1173                 flags = MAP_FIXED;
1174                 munmap(vaddr, length);
1175                 if (mem_path) {
1176 #if defined(__linux__) && !defined(TARGET_S390X)
1177                     if (block->fd) {
1178 #ifdef MAP_POPULATE
1179                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1180                             MAP_PRIVATE;
1181 #else
1182                         flags |= MAP_PRIVATE;
1183 #endif
1184                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1185                                     flags, block->fd, offset);
1186                     } else {
1187                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1188                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1189                                     flags, -1, 0);
1190                     }
1191 #else
1192                     abort();
1193 #endif
1194                 } else {
1195 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1196                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1197                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1198                                 flags, -1, 0);
1199 #else
1200                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1201                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1202                                 flags, -1, 0);
1203 #endif
1204                 }
1205                 if (area != vaddr) {
1206                     fprintf(stderr, "Could not remap addr: "
1207                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1208                             length, addr);
1209                     exit(1);
1210                 }
1211                 memory_try_enable_merging(vaddr, length);
1212                 qemu_ram_setup_dump(vaddr, length);
1213             }
1214             return;
1215         }
1216     }
1217 }
1218 #endif /* !_WIN32 */
1219
1220 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1221    With the exception of the softmmu code in this file, this should
1222    only be used for local memory (e.g. video ram) that the device owns,
1223    and knows it isn't going to access beyond the end of the block.
1224
1225    It should not be used for general purpose DMA.
1226    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1227  */
1228 void *qemu_get_ram_ptr(ram_addr_t addr)
1229 {
1230     RAMBlock *block;
1231
1232     /* The list is protected by the iothread lock here.  */
1233     block = ram_list.mru_block;
1234     if (block && addr - block->offset < block->length) {
1235         goto found;
1236     }
1237     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1238         if (addr - block->offset < block->length) {
1239             goto found;
1240         }
1241     }
1242
1243     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1244     abort();
1245
1246 found:
1247     ram_list.mru_block = block;
1248     if (xen_enabled()) {
1249         /* We need to check if the requested address is in the RAM
1250          * because we don't want to map the entire memory in QEMU.
1251          * In that case just map until the end of the page.
1252          */
1253         if (block->offset == 0) {
1254             return xen_map_cache(addr, 0, 0);
1255         } else if (block->host == NULL) {
1256             block->host =
1257                 xen_map_cache(block->offset, block->length, 1);
1258         }
1259     }
1260     return block->host + (addr - block->offset);
1261 }
1262
1263 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1264  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1265  *
1266  * ??? Is this still necessary?
1267  */
1268 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1269 {
1270     RAMBlock *block;
1271
1272     /* The list is protected by the iothread lock here.  */
1273     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1274         if (addr - block->offset < block->length) {
1275             if (xen_enabled()) {
1276                 /* We need to check if the requested address is in the RAM
1277                  * because we don't want to map the entire memory in QEMU.
1278                  * In that case just map until the end of the page.
1279                  */
1280                 if (block->offset == 0) {
1281                     return xen_map_cache(addr, 0, 0);
1282                 } else if (block->host == NULL) {
1283                     block->host =
1284                         xen_map_cache(block->offset, block->length, 1);
1285                 }
1286             }
1287             return block->host + (addr - block->offset);
1288         }
1289     }
1290
1291     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1292     abort();
1293
1294     return NULL;
1295 }
1296
1297 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1298  * but takes a size argument */
1299 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1300 {
1301     if (*size == 0) {
1302         return NULL;
1303     }
1304     if (xen_enabled()) {
1305         return xen_map_cache(addr, *size, 1);
1306     } else {
1307         RAMBlock *block;
1308
1309         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1310             if (addr - block->offset < block->length) {
1311                 if (addr - block->offset + *size > block->length)
1312                     *size = block->length - addr + block->offset;
1313                 return block->host + (addr - block->offset);
1314             }
1315         }
1316
1317         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1318         abort();
1319     }
1320 }
1321
1322 void qemu_put_ram_ptr(void *addr)
1323 {
1324     trace_qemu_put_ram_ptr(addr);
1325 }
1326
1327 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1328 {
1329     RAMBlock *block;
1330     uint8_t *host = ptr;
1331
1332     if (xen_enabled()) {
1333         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1334         return 0;
1335     }
1336
1337     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1338         /* This case append when the block is not mapped. */
1339         if (block->host == NULL) {
1340             continue;
1341         }
1342         if (host - block->host < block->length) {
1343             *ram_addr = block->offset + (host - block->host);
1344             return 0;
1345         }
1346     }
1347
1348     return -1;
1349 }
1350
1351 /* Some of the softmmu routines need to translate from a host pointer
1352    (typically a TLB entry) back to a ram offset.  */
1353 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1354 {
1355     ram_addr_t ram_addr;
1356
1357     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1358         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1359         abort();
1360     }
1361     return ram_addr;
1362 }
1363
1364 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1365                                     unsigned size)
1366 {
1367 #ifdef DEBUG_UNASSIGNED
1368     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1369 #endif
1370 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1371     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1372 #endif
1373     return 0;
1374 }
1375
1376 static void unassigned_mem_write(void *opaque, hwaddr addr,
1377                                  uint64_t val, unsigned size)
1378 {
1379 #ifdef DEBUG_UNASSIGNED
1380     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1381 #endif
1382 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1383     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1384 #endif
1385 }
1386
1387 static const MemoryRegionOps unassigned_mem_ops = {
1388     .read = unassigned_mem_read,
1389     .write = unassigned_mem_write,
1390     .endianness = DEVICE_NATIVE_ENDIAN,
1391 };
1392
1393 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1394                                unsigned size)
1395 {
1396     abort();
1397 }
1398
1399 static void error_mem_write(void *opaque, hwaddr addr,
1400                             uint64_t value, unsigned size)
1401 {
1402     abort();
1403 }
1404
1405 static const MemoryRegionOps error_mem_ops = {
1406     .read = error_mem_read,
1407     .write = error_mem_write,
1408     .endianness = DEVICE_NATIVE_ENDIAN,
1409 };
1410
1411 static const MemoryRegionOps rom_mem_ops = {
1412     .read = error_mem_read,
1413     .write = unassigned_mem_write,
1414     .endianness = DEVICE_NATIVE_ENDIAN,
1415 };
1416
1417 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1418                                uint64_t val, unsigned size)
1419 {
1420     int dirty_flags;
1421     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1422     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1423 #if !defined(CONFIG_USER_ONLY)
1424         tb_invalidate_phys_page_fast(ram_addr, size);
1425         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1426 #endif
1427     }
1428     switch (size) {
1429     case 1:
1430         stb_p(qemu_get_ram_ptr(ram_addr), val);
1431         break;
1432     case 2:
1433         stw_p(qemu_get_ram_ptr(ram_addr), val);
1434         break;
1435     case 4:
1436         stl_p(qemu_get_ram_ptr(ram_addr), val);
1437         break;
1438     default:
1439         abort();
1440     }
1441     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1442     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1443     /* we remove the notdirty callback only if the code has been
1444        flushed */
1445     if (dirty_flags == 0xff)
1446         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1447 }
1448
1449 static const MemoryRegionOps notdirty_mem_ops = {
1450     .read = error_mem_read,
1451     .write = notdirty_mem_write,
1452     .endianness = DEVICE_NATIVE_ENDIAN,
1453 };
1454
1455 /* Generate a debug exception if a watchpoint has been hit.  */
1456 static void check_watchpoint(int offset, int len_mask, int flags)
1457 {
1458     CPUArchState *env = cpu_single_env;
1459     target_ulong pc, cs_base;
1460     target_ulong vaddr;
1461     CPUWatchpoint *wp;
1462     int cpu_flags;
1463
1464     if (env->watchpoint_hit) {
1465         /* We re-entered the check after replacing the TB. Now raise
1466          * the debug interrupt so that is will trigger after the
1467          * current instruction. */
1468         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1469         return;
1470     }
1471     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1472     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1473         if ((vaddr == (wp->vaddr & len_mask) ||
1474              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1475             wp->flags |= BP_WATCHPOINT_HIT;
1476             if (!env->watchpoint_hit) {
1477                 env->watchpoint_hit = wp;
1478                 tb_check_watchpoint(env);
1479                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1480                     env->exception_index = EXCP_DEBUG;
1481                     cpu_loop_exit(env);
1482                 } else {
1483                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1484                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1485                     cpu_resume_from_signal(env, NULL);
1486                 }
1487             }
1488         } else {
1489             wp->flags &= ~BP_WATCHPOINT_HIT;
1490         }
1491     }
1492 }
1493
1494 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1495    so these check for a hit then pass through to the normal out-of-line
1496    phys routines.  */
1497 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1498                                unsigned size)
1499 {
1500     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1501     switch (size) {
1502     case 1: return ldub_phys(addr);
1503     case 2: return lduw_phys(addr);
1504     case 4: return ldl_phys(addr);
1505     default: abort();
1506     }
1507 }
1508
1509 static void watch_mem_write(void *opaque, hwaddr addr,
1510                             uint64_t val, unsigned size)
1511 {
1512     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1513     switch (size) {
1514     case 1:
1515         stb_phys(addr, val);
1516         break;
1517     case 2:
1518         stw_phys(addr, val);
1519         break;
1520     case 4:
1521         stl_phys(addr, val);
1522         break;
1523     default: abort();
1524     }
1525 }
1526
1527 static const MemoryRegionOps watch_mem_ops = {
1528     .read = watch_mem_read,
1529     .write = watch_mem_write,
1530     .endianness = DEVICE_NATIVE_ENDIAN,
1531 };
1532
1533 static uint64_t subpage_read(void *opaque, hwaddr addr,
1534                              unsigned len)
1535 {
1536     subpage_t *mmio = opaque;
1537     unsigned int idx = SUBPAGE_IDX(addr);
1538     MemoryRegionSection *section;
1539 #if defined(DEBUG_SUBPAGE)
1540     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1541            mmio, len, addr, idx);
1542 #endif
1543
1544     section = &phys_sections[mmio->sub_section[idx]];
1545     addr += mmio->base;
1546     addr -= section->offset_within_address_space;
1547     addr += section->offset_within_region;
1548     return io_mem_read(section->mr, addr, len);
1549 }
1550
1551 static void subpage_write(void *opaque, hwaddr addr,
1552                           uint64_t value, unsigned len)
1553 {
1554     subpage_t *mmio = opaque;
1555     unsigned int idx = SUBPAGE_IDX(addr);
1556     MemoryRegionSection *section;
1557 #if defined(DEBUG_SUBPAGE)
1558     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1559            " idx %d value %"PRIx64"\n",
1560            __func__, mmio, len, addr, idx, value);
1561 #endif
1562
1563     section = &phys_sections[mmio->sub_section[idx]];
1564     addr += mmio->base;
1565     addr -= section->offset_within_address_space;
1566     addr += section->offset_within_region;
1567     io_mem_write(section->mr, addr, value, len);
1568 }
1569
1570 static const MemoryRegionOps subpage_ops = {
1571     .read = subpage_read,
1572     .write = subpage_write,
1573     .endianness = DEVICE_NATIVE_ENDIAN,
1574 };
1575
1576 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1577                                  unsigned size)
1578 {
1579     ram_addr_t raddr = addr;
1580     void *ptr = qemu_get_ram_ptr(raddr);
1581     switch (size) {
1582     case 1: return ldub_p(ptr);
1583     case 2: return lduw_p(ptr);
1584     case 4: return ldl_p(ptr);
1585     default: abort();
1586     }
1587 }
1588
1589 static void subpage_ram_write(void *opaque, hwaddr addr,
1590                               uint64_t value, unsigned size)
1591 {
1592     ram_addr_t raddr = addr;
1593     void *ptr = qemu_get_ram_ptr(raddr);
1594     switch (size) {
1595     case 1: return stb_p(ptr, value);
1596     case 2: return stw_p(ptr, value);
1597     case 4: return stl_p(ptr, value);
1598     default: abort();
1599     }
1600 }
1601
1602 static const MemoryRegionOps subpage_ram_ops = {
1603     .read = subpage_ram_read,
1604     .write = subpage_ram_write,
1605     .endianness = DEVICE_NATIVE_ENDIAN,
1606 };
1607
1608 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1609                              uint16_t section)
1610 {
1611     int idx, eidx;
1612
1613     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1614         return -1;
1615     idx = SUBPAGE_IDX(start);
1616     eidx = SUBPAGE_IDX(end);
1617 #if defined(DEBUG_SUBPAGE)
1618     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1619            mmio, start, end, idx, eidx, memory);
1620 #endif
1621     if (memory_region_is_ram(phys_sections[section].mr)) {
1622         MemoryRegionSection new_section = phys_sections[section];
1623         new_section.mr = &io_mem_subpage_ram;
1624         section = phys_section_add(&new_section);
1625     }
1626     for (; idx <= eidx; idx++) {
1627         mmio->sub_section[idx] = section;
1628     }
1629
1630     return 0;
1631 }
1632
1633 static subpage_t *subpage_init(hwaddr base)
1634 {
1635     subpage_t *mmio;
1636
1637     mmio = g_malloc0(sizeof(subpage_t));
1638
1639     mmio->base = base;
1640     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1641                           "subpage", TARGET_PAGE_SIZE);
1642     mmio->iomem.subpage = true;
1643 #if defined(DEBUG_SUBPAGE)
1644     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1645            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1646 #endif
1647     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1648
1649     return mmio;
1650 }
1651
1652 static uint16_t dummy_section(MemoryRegion *mr)
1653 {
1654     MemoryRegionSection section = {
1655         .mr = mr,
1656         .offset_within_address_space = 0,
1657         .offset_within_region = 0,
1658         .size = UINT64_MAX,
1659     };
1660
1661     return phys_section_add(&section);
1662 }
1663
1664 MemoryRegion *iotlb_to_region(hwaddr index)
1665 {
1666     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1667 }
1668
1669 static void io_mem_init(void)
1670 {
1671     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1672     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1673     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1674                           "unassigned", UINT64_MAX);
1675     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1676                           "notdirty", UINT64_MAX);
1677     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1678                           "subpage-ram", UINT64_MAX);
1679     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1680                           "watch", UINT64_MAX);
1681 }
1682
1683 static void mem_begin(MemoryListener *listener)
1684 {
1685     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1686
1687     destroy_all_mappings(d);
1688     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1689 }
1690
1691 static void core_begin(MemoryListener *listener)
1692 {
1693     phys_sections_clear();
1694     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1695     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1696     phys_section_rom = dummy_section(&io_mem_rom);
1697     phys_section_watch = dummy_section(&io_mem_watch);
1698 }
1699
1700 static void tcg_commit(MemoryListener *listener)
1701 {
1702     CPUArchState *env;
1703
1704     /* since each CPU stores ram addresses in its TLB cache, we must
1705        reset the modified entries */
1706     /* XXX: slow ! */
1707     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1708         tlb_flush(env, 1);
1709     }
1710 }
1711
1712 static void core_log_global_start(MemoryListener *listener)
1713 {
1714     cpu_physical_memory_set_dirty_tracking(1);
1715 }
1716
1717 static void core_log_global_stop(MemoryListener *listener)
1718 {
1719     cpu_physical_memory_set_dirty_tracking(0);
1720 }
1721
1722 static void io_region_add(MemoryListener *listener,
1723                           MemoryRegionSection *section)
1724 {
1725     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1726
1727     mrio->mr = section->mr;
1728     mrio->offset = section->offset_within_region;
1729     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1730                  section->offset_within_address_space, section->size);
1731     ioport_register(&mrio->iorange);
1732 }
1733
1734 static void io_region_del(MemoryListener *listener,
1735                           MemoryRegionSection *section)
1736 {
1737     isa_unassign_ioport(section->offset_within_address_space, section->size);
1738 }
1739
1740 static MemoryListener core_memory_listener = {
1741     .begin = core_begin,
1742     .log_global_start = core_log_global_start,
1743     .log_global_stop = core_log_global_stop,
1744     .priority = 1,
1745 };
1746
1747 static MemoryListener io_memory_listener = {
1748     .region_add = io_region_add,
1749     .region_del = io_region_del,
1750     .priority = 0,
1751 };
1752
1753 static MemoryListener tcg_memory_listener = {
1754     .commit = tcg_commit,
1755 };
1756
1757 void address_space_init_dispatch(AddressSpace *as)
1758 {
1759     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1760
1761     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1762     d->listener = (MemoryListener) {
1763         .begin = mem_begin,
1764         .region_add = mem_add,
1765         .region_nop = mem_add,
1766         .priority = 0,
1767     };
1768     as->dispatch = d;
1769     memory_listener_register(&d->listener, as);
1770 }
1771
1772 void address_space_destroy_dispatch(AddressSpace *as)
1773 {
1774     AddressSpaceDispatch *d = as->dispatch;
1775
1776     memory_listener_unregister(&d->listener);
1777     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1778     g_free(d);
1779     as->dispatch = NULL;
1780 }
1781
1782 static void memory_map_init(void)
1783 {
1784     system_memory = g_malloc(sizeof(*system_memory));
1785     memory_region_init(system_memory, "system", INT64_MAX);
1786     address_space_init(&address_space_memory, system_memory);
1787     address_space_memory.name = "memory";
1788
1789     system_io = g_malloc(sizeof(*system_io));
1790     memory_region_init(system_io, "io", 65536);
1791     address_space_init(&address_space_io, system_io);
1792     address_space_io.name = "I/O";
1793
1794     memory_listener_register(&core_memory_listener, &address_space_memory);
1795     memory_listener_register(&io_memory_listener, &address_space_io);
1796     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1797
1798     dma_context_init(&dma_context_memory, &address_space_memory,
1799                      NULL, NULL, NULL);
1800 }
1801
1802 MemoryRegion *get_system_memory(void)
1803 {
1804     return system_memory;
1805 }
1806
1807 MemoryRegion *get_system_io(void)
1808 {
1809     return system_io;
1810 }
1811
1812 #endif /* !defined(CONFIG_USER_ONLY) */
1813
1814 /* physical memory access (slow version, mainly for debug) */
1815 #if defined(CONFIG_USER_ONLY)
1816 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1817                         uint8_t *buf, int len, int is_write)
1818 {
1819     int l, flags;
1820     target_ulong page;
1821     void * p;
1822
1823     while (len > 0) {
1824         page = addr & TARGET_PAGE_MASK;
1825         l = (page + TARGET_PAGE_SIZE) - addr;
1826         if (l > len)
1827             l = len;
1828         flags = page_get_flags(page);
1829         if (!(flags & PAGE_VALID))
1830             return -1;
1831         if (is_write) {
1832             if (!(flags & PAGE_WRITE))
1833                 return -1;
1834             /* XXX: this code should not depend on lock_user */
1835             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1836                 return -1;
1837             memcpy(p, buf, l);
1838             unlock_user(p, addr, l);
1839         } else {
1840             if (!(flags & PAGE_READ))
1841                 return -1;
1842             /* XXX: this code should not depend on lock_user */
1843             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1844                 return -1;
1845             memcpy(buf, p, l);
1846             unlock_user(p, addr, 0);
1847         }
1848         len -= l;
1849         buf += l;
1850         addr += l;
1851     }
1852     return 0;
1853 }
1854
1855 #else
1856
1857 static void invalidate_and_set_dirty(hwaddr addr,
1858                                      hwaddr length)
1859 {
1860     if (!cpu_physical_memory_is_dirty(addr)) {
1861         /* invalidate code */
1862         tb_invalidate_phys_page_range(addr, addr + length, 0);
1863         /* set dirty bit */
1864         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1865     }
1866     xen_modified_memory(addr, length);
1867 }
1868
1869 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1870                       int len, bool is_write)
1871 {
1872     AddressSpaceDispatch *d = as->dispatch;
1873     int l;
1874     uint8_t *ptr;
1875     uint32_t val;
1876     hwaddr page;
1877     MemoryRegionSection *section;
1878
1879     while (len > 0) {
1880         page = addr & TARGET_PAGE_MASK;
1881         l = (page + TARGET_PAGE_SIZE) - addr;
1882         if (l > len)
1883             l = len;
1884         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1885
1886         if (is_write) {
1887             if (!memory_region_is_ram(section->mr)) {
1888                 hwaddr addr1;
1889                 addr1 = memory_region_section_addr(section, addr);
1890                 /* XXX: could force cpu_single_env to NULL to avoid
1891                    potential bugs */
1892                 if (l >= 4 && ((addr1 & 3) == 0)) {
1893                     /* 32 bit write access */
1894                     val = ldl_p(buf);
1895                     io_mem_write(section->mr, addr1, val, 4);
1896                     l = 4;
1897                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1898                     /* 16 bit write access */
1899                     val = lduw_p(buf);
1900                     io_mem_write(section->mr, addr1, val, 2);
1901                     l = 2;
1902                 } else {
1903                     /* 8 bit write access */
1904                     val = ldub_p(buf);
1905                     io_mem_write(section->mr, addr1, val, 1);
1906                     l = 1;
1907                 }
1908             } else if (!section->readonly) {
1909                 ram_addr_t addr1;
1910                 addr1 = memory_region_get_ram_addr(section->mr)
1911                     + memory_region_section_addr(section, addr);
1912                 /* RAM case */
1913                 ptr = qemu_get_ram_ptr(addr1);
1914                 memcpy(ptr, buf, l);
1915                 invalidate_and_set_dirty(addr1, l);
1916                 qemu_put_ram_ptr(ptr);
1917             }
1918         } else {
1919             if (!(memory_region_is_ram(section->mr) ||
1920                   memory_region_is_romd(section->mr))) {
1921                 hwaddr addr1;
1922                 /* I/O case */
1923                 addr1 = memory_region_section_addr(section, addr);
1924                 if (l >= 4 && ((addr1 & 3) == 0)) {
1925                     /* 32 bit read access */
1926                     val = io_mem_read(section->mr, addr1, 4);
1927                     stl_p(buf, val);
1928                     l = 4;
1929                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1930                     /* 16 bit read access */
1931                     val = io_mem_read(section->mr, addr1, 2);
1932                     stw_p(buf, val);
1933                     l = 2;
1934                 } else {
1935                     /* 8 bit read access */
1936                     val = io_mem_read(section->mr, addr1, 1);
1937                     stb_p(buf, val);
1938                     l = 1;
1939                 }
1940             } else {
1941                 /* RAM case */
1942                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1943                                        + memory_region_section_addr(section,
1944                                                                     addr));
1945                 memcpy(buf, ptr, l);
1946                 qemu_put_ram_ptr(ptr);
1947             }
1948         }
1949         len -= l;
1950         buf += l;
1951         addr += l;
1952     }
1953 }
1954
1955 void address_space_write(AddressSpace *as, hwaddr addr,
1956                          const uint8_t *buf, int len)
1957 {
1958     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1959 }
1960
1961 /**
1962  * address_space_read: read from an address space.
1963  *
1964  * @as: #AddressSpace to be accessed
1965  * @addr: address within that address space
1966  * @buf: buffer with the data transferred
1967  */
1968 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1969 {
1970     address_space_rw(as, addr, buf, len, false);
1971 }
1972
1973
1974 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1975                             int len, int is_write)
1976 {
1977     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1978 }
1979
1980 /* used for ROM loading : can write in RAM and ROM */
1981 void cpu_physical_memory_write_rom(hwaddr addr,
1982                                    const uint8_t *buf, int len)
1983 {
1984     AddressSpaceDispatch *d = address_space_memory.dispatch;
1985     int l;
1986     uint8_t *ptr;
1987     hwaddr page;
1988     MemoryRegionSection *section;
1989
1990     while (len > 0) {
1991         page = addr & TARGET_PAGE_MASK;
1992         l = (page + TARGET_PAGE_SIZE) - addr;
1993         if (l > len)
1994             l = len;
1995         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1996
1997         if (!(memory_region_is_ram(section->mr) ||
1998               memory_region_is_romd(section->mr))) {
1999             /* do nothing */
2000         } else {
2001             unsigned long addr1;
2002             addr1 = memory_region_get_ram_addr(section->mr)
2003                 + memory_region_section_addr(section, addr);
2004             /* ROM/RAM case */
2005             ptr = qemu_get_ram_ptr(addr1);
2006             memcpy(ptr, buf, l);
2007             invalidate_and_set_dirty(addr1, l);
2008             qemu_put_ram_ptr(ptr);
2009         }
2010         len -= l;
2011         buf += l;
2012         addr += l;
2013     }
2014 }
2015
2016 typedef struct {
2017     void *buffer;
2018     hwaddr addr;
2019     hwaddr len;
2020 } BounceBuffer;
2021
2022 static BounceBuffer bounce;
2023
2024 typedef struct MapClient {
2025     void *opaque;
2026     void (*callback)(void *opaque);
2027     QLIST_ENTRY(MapClient) link;
2028 } MapClient;
2029
2030 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2031     = QLIST_HEAD_INITIALIZER(map_client_list);
2032
2033 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2034 {
2035     MapClient *client = g_malloc(sizeof(*client));
2036
2037     client->opaque = opaque;
2038     client->callback = callback;
2039     QLIST_INSERT_HEAD(&map_client_list, client, link);
2040     return client;
2041 }
2042
2043 static void cpu_unregister_map_client(void *_client)
2044 {
2045     MapClient *client = (MapClient *)_client;
2046
2047     QLIST_REMOVE(client, link);
2048     g_free(client);
2049 }
2050
2051 static void cpu_notify_map_clients(void)
2052 {
2053     MapClient *client;
2054
2055     while (!QLIST_EMPTY(&map_client_list)) {
2056         client = QLIST_FIRST(&map_client_list);
2057         client->callback(client->opaque);
2058         cpu_unregister_map_client(client);
2059     }
2060 }
2061
2062 /* Map a physical memory region into a host virtual address.
2063  * May map a subset of the requested range, given by and returned in *plen.
2064  * May return NULL if resources needed to perform the mapping are exhausted.
2065  * Use only for reads OR writes - not for read-modify-write operations.
2066  * Use cpu_register_map_client() to know when retrying the map operation is
2067  * likely to succeed.
2068  */
2069 void *address_space_map(AddressSpace *as,
2070                         hwaddr addr,
2071                         hwaddr *plen,
2072                         bool is_write)
2073 {
2074     AddressSpaceDispatch *d = as->dispatch;
2075     hwaddr len = *plen;
2076     hwaddr todo = 0;
2077     int l;
2078     hwaddr page;
2079     MemoryRegionSection *section;
2080     ram_addr_t raddr = RAM_ADDR_MAX;
2081     ram_addr_t rlen;
2082     void *ret;
2083
2084     while (len > 0) {
2085         page = addr & TARGET_PAGE_MASK;
2086         l = (page + TARGET_PAGE_SIZE) - addr;
2087         if (l > len)
2088             l = len;
2089         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2090
2091         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2092             if (todo || bounce.buffer) {
2093                 break;
2094             }
2095             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2096             bounce.addr = addr;
2097             bounce.len = l;
2098             if (!is_write) {
2099                 address_space_read(as, addr, bounce.buffer, l);
2100             }
2101
2102             *plen = l;
2103             return bounce.buffer;
2104         }
2105         if (!todo) {
2106             raddr = memory_region_get_ram_addr(section->mr)
2107                 + memory_region_section_addr(section, addr);
2108         }
2109
2110         len -= l;
2111         addr += l;
2112         todo += l;
2113     }
2114     rlen = todo;
2115     ret = qemu_ram_ptr_length(raddr, &rlen);
2116     *plen = rlen;
2117     return ret;
2118 }
2119
2120 /* Unmaps a memory region previously mapped by address_space_map().
2121  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2122  * the amount of memory that was actually read or written by the caller.
2123  */
2124 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2125                          int is_write, hwaddr access_len)
2126 {
2127     if (buffer != bounce.buffer) {
2128         if (is_write) {
2129             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2130             while (access_len) {
2131                 unsigned l;
2132                 l = TARGET_PAGE_SIZE;
2133                 if (l > access_len)
2134                     l = access_len;
2135                 invalidate_and_set_dirty(addr1, l);
2136                 addr1 += l;
2137                 access_len -= l;
2138             }
2139         }
2140         if (xen_enabled()) {
2141             xen_invalidate_map_cache_entry(buffer);
2142         }
2143         return;
2144     }
2145     if (is_write) {
2146         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2147     }
2148     qemu_vfree(bounce.buffer);
2149     bounce.buffer = NULL;
2150     cpu_notify_map_clients();
2151 }
2152
2153 void *cpu_physical_memory_map(hwaddr addr,
2154                               hwaddr *plen,
2155                               int is_write)
2156 {
2157     return address_space_map(&address_space_memory, addr, plen, is_write);
2158 }
2159
2160 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2161                                int is_write, hwaddr access_len)
2162 {
2163     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2164 }
2165
2166 /* warning: addr must be aligned */
2167 static inline uint32_t ldl_phys_internal(hwaddr addr,
2168                                          enum device_endian endian)
2169 {
2170     uint8_t *ptr;
2171     uint32_t val;
2172     MemoryRegionSection *section;
2173
2174     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2175
2176     if (!(memory_region_is_ram(section->mr) ||
2177           memory_region_is_romd(section->mr))) {
2178         /* I/O case */
2179         addr = memory_region_section_addr(section, addr);
2180         val = io_mem_read(section->mr, addr, 4);
2181 #if defined(TARGET_WORDS_BIGENDIAN)
2182         if (endian == DEVICE_LITTLE_ENDIAN) {
2183             val = bswap32(val);
2184         }
2185 #else
2186         if (endian == DEVICE_BIG_ENDIAN) {
2187             val = bswap32(val);
2188         }
2189 #endif
2190     } else {
2191         /* RAM case */
2192         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2193                                 & TARGET_PAGE_MASK)
2194                                + memory_region_section_addr(section, addr));
2195         switch (endian) {
2196         case DEVICE_LITTLE_ENDIAN:
2197             val = ldl_le_p(ptr);
2198             break;
2199         case DEVICE_BIG_ENDIAN:
2200             val = ldl_be_p(ptr);
2201             break;
2202         default:
2203             val = ldl_p(ptr);
2204             break;
2205         }
2206     }
2207     return val;
2208 }
2209
2210 uint32_t ldl_phys(hwaddr addr)
2211 {
2212     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2213 }
2214
2215 uint32_t ldl_le_phys(hwaddr addr)
2216 {
2217     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2218 }
2219
2220 uint32_t ldl_be_phys(hwaddr addr)
2221 {
2222     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2223 }
2224
2225 /* warning: addr must be aligned */
2226 static inline uint64_t ldq_phys_internal(hwaddr addr,
2227                                          enum device_endian endian)
2228 {
2229     uint8_t *ptr;
2230     uint64_t val;
2231     MemoryRegionSection *section;
2232
2233     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2234
2235     if (!(memory_region_is_ram(section->mr) ||
2236           memory_region_is_romd(section->mr))) {
2237         /* I/O case */
2238         addr = memory_region_section_addr(section, addr);
2239
2240         /* XXX This is broken when device endian != cpu endian.
2241                Fix and add "endian" variable check */
2242 #ifdef TARGET_WORDS_BIGENDIAN
2243         val = io_mem_read(section->mr, addr, 4) << 32;
2244         val |= io_mem_read(section->mr, addr + 4, 4);
2245 #else
2246         val = io_mem_read(section->mr, addr, 4);
2247         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2248 #endif
2249     } else {
2250         /* RAM case */
2251         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2252                                 & TARGET_PAGE_MASK)
2253                                + memory_region_section_addr(section, addr));
2254         switch (endian) {
2255         case DEVICE_LITTLE_ENDIAN:
2256             val = ldq_le_p(ptr);
2257             break;
2258         case DEVICE_BIG_ENDIAN:
2259             val = ldq_be_p(ptr);
2260             break;
2261         default:
2262             val = ldq_p(ptr);
2263             break;
2264         }
2265     }
2266     return val;
2267 }
2268
2269 uint64_t ldq_phys(hwaddr addr)
2270 {
2271     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2272 }
2273
2274 uint64_t ldq_le_phys(hwaddr addr)
2275 {
2276     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2277 }
2278
2279 uint64_t ldq_be_phys(hwaddr addr)
2280 {
2281     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2282 }
2283
2284 /* XXX: optimize */
2285 uint32_t ldub_phys(hwaddr addr)
2286 {
2287     uint8_t val;
2288     cpu_physical_memory_read(addr, &val, 1);
2289     return val;
2290 }
2291
2292 /* warning: addr must be aligned */
2293 static inline uint32_t lduw_phys_internal(hwaddr addr,
2294                                           enum device_endian endian)
2295 {
2296     uint8_t *ptr;
2297     uint64_t val;
2298     MemoryRegionSection *section;
2299
2300     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2301
2302     if (!(memory_region_is_ram(section->mr) ||
2303           memory_region_is_romd(section->mr))) {
2304         /* I/O case */
2305         addr = memory_region_section_addr(section, addr);
2306         val = io_mem_read(section->mr, addr, 2);
2307 #if defined(TARGET_WORDS_BIGENDIAN)
2308         if (endian == DEVICE_LITTLE_ENDIAN) {
2309             val = bswap16(val);
2310         }
2311 #else
2312         if (endian == DEVICE_BIG_ENDIAN) {
2313             val = bswap16(val);
2314         }
2315 #endif
2316     } else {
2317         /* RAM case */
2318         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2319                                 & TARGET_PAGE_MASK)
2320                                + memory_region_section_addr(section, addr));
2321         switch (endian) {
2322         case DEVICE_LITTLE_ENDIAN:
2323             val = lduw_le_p(ptr);
2324             break;
2325         case DEVICE_BIG_ENDIAN:
2326             val = lduw_be_p(ptr);
2327             break;
2328         default:
2329             val = lduw_p(ptr);
2330             break;
2331         }
2332     }
2333     return val;
2334 }
2335
2336 uint32_t lduw_phys(hwaddr addr)
2337 {
2338     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2339 }
2340
2341 uint32_t lduw_le_phys(hwaddr addr)
2342 {
2343     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2344 }
2345
2346 uint32_t lduw_be_phys(hwaddr addr)
2347 {
2348     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2349 }
2350
2351 /* warning: addr must be aligned. The ram page is not masked as dirty
2352    and the code inside is not invalidated. It is useful if the dirty
2353    bits are used to track modified PTEs */
2354 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2355 {
2356     uint8_t *ptr;
2357     MemoryRegionSection *section;
2358
2359     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2360
2361     if (!memory_region_is_ram(section->mr) || section->readonly) {
2362         addr = memory_region_section_addr(section, addr);
2363         if (memory_region_is_ram(section->mr)) {
2364             section = &phys_sections[phys_section_rom];
2365         }
2366         io_mem_write(section->mr, addr, val, 4);
2367     } else {
2368         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2369                                & TARGET_PAGE_MASK)
2370             + memory_region_section_addr(section, addr);
2371         ptr = qemu_get_ram_ptr(addr1);
2372         stl_p(ptr, val);
2373
2374         if (unlikely(in_migration)) {
2375             if (!cpu_physical_memory_is_dirty(addr1)) {
2376                 /* invalidate code */
2377                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2378                 /* set dirty bit */
2379                 cpu_physical_memory_set_dirty_flags(
2380                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2381             }
2382         }
2383     }
2384 }
2385
2386 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2387 {
2388     uint8_t *ptr;
2389     MemoryRegionSection *section;
2390
2391     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2392
2393     if (!memory_region_is_ram(section->mr) || section->readonly) {
2394         addr = memory_region_section_addr(section, addr);
2395         if (memory_region_is_ram(section->mr)) {
2396             section = &phys_sections[phys_section_rom];
2397         }
2398 #ifdef TARGET_WORDS_BIGENDIAN
2399         io_mem_write(section->mr, addr, val >> 32, 4);
2400         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2401 #else
2402         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2403         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2404 #endif
2405     } else {
2406         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2407                                 & TARGET_PAGE_MASK)
2408                                + memory_region_section_addr(section, addr));
2409         stq_p(ptr, val);
2410     }
2411 }
2412
2413 /* warning: addr must be aligned */
2414 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2415                                      enum device_endian endian)
2416 {
2417     uint8_t *ptr;
2418     MemoryRegionSection *section;
2419
2420     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2421
2422     if (!memory_region_is_ram(section->mr) || section->readonly) {
2423         addr = memory_region_section_addr(section, addr);
2424         if (memory_region_is_ram(section->mr)) {
2425             section = &phys_sections[phys_section_rom];
2426         }
2427 #if defined(TARGET_WORDS_BIGENDIAN)
2428         if (endian == DEVICE_LITTLE_ENDIAN) {
2429             val = bswap32(val);
2430         }
2431 #else
2432         if (endian == DEVICE_BIG_ENDIAN) {
2433             val = bswap32(val);
2434         }
2435 #endif
2436         io_mem_write(section->mr, addr, val, 4);
2437     } else {
2438         unsigned long addr1;
2439         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2440             + memory_region_section_addr(section, addr);
2441         /* RAM case */
2442         ptr = qemu_get_ram_ptr(addr1);
2443         switch (endian) {
2444         case DEVICE_LITTLE_ENDIAN:
2445             stl_le_p(ptr, val);
2446             break;
2447         case DEVICE_BIG_ENDIAN:
2448             stl_be_p(ptr, val);
2449             break;
2450         default:
2451             stl_p(ptr, val);
2452             break;
2453         }
2454         invalidate_and_set_dirty(addr1, 4);
2455     }
2456 }
2457
2458 void stl_phys(hwaddr addr, uint32_t val)
2459 {
2460     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2461 }
2462
2463 void stl_le_phys(hwaddr addr, uint32_t val)
2464 {
2465     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2466 }
2467
2468 void stl_be_phys(hwaddr addr, uint32_t val)
2469 {
2470     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2471 }
2472
2473 /* XXX: optimize */
2474 void stb_phys(hwaddr addr, uint32_t val)
2475 {
2476     uint8_t v = val;
2477     cpu_physical_memory_write(addr, &v, 1);
2478 }
2479
2480 /* warning: addr must be aligned */
2481 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2482                                      enum device_endian endian)
2483 {
2484     uint8_t *ptr;
2485     MemoryRegionSection *section;
2486
2487     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2488
2489     if (!memory_region_is_ram(section->mr) || section->readonly) {
2490         addr = memory_region_section_addr(section, addr);
2491         if (memory_region_is_ram(section->mr)) {
2492             section = &phys_sections[phys_section_rom];
2493         }
2494 #if defined(TARGET_WORDS_BIGENDIAN)
2495         if (endian == DEVICE_LITTLE_ENDIAN) {
2496             val = bswap16(val);
2497         }
2498 #else
2499         if (endian == DEVICE_BIG_ENDIAN) {
2500             val = bswap16(val);
2501         }
2502 #endif
2503         io_mem_write(section->mr, addr, val, 2);
2504     } else {
2505         unsigned long addr1;
2506         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2507             + memory_region_section_addr(section, addr);
2508         /* RAM case */
2509         ptr = qemu_get_ram_ptr(addr1);
2510         switch (endian) {
2511         case DEVICE_LITTLE_ENDIAN:
2512             stw_le_p(ptr, val);
2513             break;
2514         case DEVICE_BIG_ENDIAN:
2515             stw_be_p(ptr, val);
2516             break;
2517         default:
2518             stw_p(ptr, val);
2519             break;
2520         }
2521         invalidate_and_set_dirty(addr1, 2);
2522     }
2523 }
2524
2525 void stw_phys(hwaddr addr, uint32_t val)
2526 {
2527     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2528 }
2529
2530 void stw_le_phys(hwaddr addr, uint32_t val)
2531 {
2532     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2533 }
2534
2535 void stw_be_phys(hwaddr addr, uint32_t val)
2536 {
2537     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2538 }
2539
2540 /* XXX: optimize */
2541 void stq_phys(hwaddr addr, uint64_t val)
2542 {
2543     val = tswap64(val);
2544     cpu_physical_memory_write(addr, &val, 8);
2545 }
2546
2547 void stq_le_phys(hwaddr addr, uint64_t val)
2548 {
2549     val = cpu_to_le64(val);
2550     cpu_physical_memory_write(addr, &val, 8);
2551 }
2552
2553 void stq_be_phys(hwaddr addr, uint64_t val)
2554 {
2555     val = cpu_to_be64(val);
2556     cpu_physical_memory_write(addr, &val, 8);
2557 }
2558
2559 /* virtual memory access for debug (includes writing to ROM) */
2560 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2561                         uint8_t *buf, int len, int is_write)
2562 {
2563     int l;
2564     hwaddr phys_addr;
2565     target_ulong page;
2566
2567     while (len > 0) {
2568         page = addr & TARGET_PAGE_MASK;
2569         phys_addr = cpu_get_phys_page_debug(env, page);
2570         /* if no physical page mapped, return an error */
2571         if (phys_addr == -1)
2572             return -1;
2573         l = (page + TARGET_PAGE_SIZE) - addr;
2574         if (l > len)
2575             l = len;
2576         phys_addr += (addr & ~TARGET_PAGE_MASK);
2577         if (is_write)
2578             cpu_physical_memory_write_rom(phys_addr, buf, l);
2579         else
2580             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2581         len -= l;
2582         buf += l;
2583         addr += l;
2584     }
2585     return 0;
2586 }
2587 #endif
2588
2589 #if !defined(CONFIG_USER_ONLY)
2590
2591 /*
2592  * A helper function for the _utterly broken_ virtio device model to find out if
2593  * it's running on a big endian machine. Don't do this at home kids!
2594  */
2595 bool virtio_is_big_endian(void);
2596 bool virtio_is_big_endian(void)
2597 {
2598 #if defined(TARGET_WORDS_BIGENDIAN)
2599     return true;
2600 #else
2601     return false;
2602 #endif
2603 }
2604
2605 #endif
2606
2607 #ifndef CONFIG_USER_ONLY
2608 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2609 {
2610     MemoryRegionSection *section;
2611
2612     section = phys_page_find(address_space_memory.dispatch,
2613                              phys_addr >> TARGET_PAGE_BITS);
2614
2615     return !(memory_region_is_ram(section->mr) ||
2616              memory_region_is_romd(section->mr));
2617 }
2618 #endif
This page took 0.17105 seconds and 4 git commands to generate.