]> Git Repo - qemu.git/blob - exec.c
gtk: custom cursor support
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUState *cpu = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     cpu->interrupt_request &= ~0x01;
231     tlb_flush(cpu->env_ptr, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUState),
244         VMSTATE_UINT32(interrupt_request, CPUState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #else
249 #define vmstate_cpu_common vmstate_dummy
250 #endif
251
252 CPUState *qemu_get_cpu(int index)
253 {
254     CPUArchState *env = first_cpu;
255     CPUState *cpu = NULL;
256
257     while (env) {
258         cpu = ENV_GET_CPU(env);
259         if (cpu->cpu_index == index) {
260             break;
261         }
262         env = env->next_cpu;
263     }
264
265     return env ? cpu : NULL;
266 }
267
268 void cpu_exec_init(CPUArchState *env)
269 {
270     CPUState *cpu = ENV_GET_CPU(env);
271     CPUClass *cc = CPU_GET_CLASS(cpu);
272     CPUArchState **penv;
273     int cpu_index;
274
275 #if defined(CONFIG_USER_ONLY)
276     cpu_list_lock();
277 #endif
278     env->next_cpu = NULL;
279     penv = &first_cpu;
280     cpu_index = 0;
281     while (*penv != NULL) {
282         penv = &(*penv)->next_cpu;
283         cpu_index++;
284     }
285     cpu->cpu_index = cpu_index;
286     cpu->numa_node = 0;
287     QTAILQ_INIT(&env->breakpoints);
288     QTAILQ_INIT(&env->watchpoints);
289 #ifndef CONFIG_USER_ONLY
290     cpu->thread_id = qemu_get_thread_id();
291 #endif
292     *penv = env;
293 #if defined(CONFIG_USER_ONLY)
294     cpu_list_unlock();
295 #endif
296     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
297 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
298     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
299                     cpu_save, cpu_load, env);
300     assert(cc->vmsd == NULL);
301 #endif
302     if (cc->vmsd != NULL) {
303         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
304     }
305 }
306
307 #if defined(TARGET_HAS_ICE)
308 #if defined(CONFIG_USER_ONLY)
309 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
310 {
311     tb_invalidate_phys_page_range(pc, pc + 1, 0);
312 }
313 #else
314 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
315 {
316     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
317             (pc & ~TARGET_PAGE_MASK));
318 }
319 #endif
320 #endif /* TARGET_HAS_ICE */
321
322 #if defined(CONFIG_USER_ONLY)
323 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
324
325 {
326 }
327
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     return -ENOSYS;
332 }
333 #else
334 /* Add a watchpoint.  */
335 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
336                           int flags, CPUWatchpoint **watchpoint)
337 {
338     target_ulong len_mask = ~(len - 1);
339     CPUWatchpoint *wp;
340
341     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
342     if ((len & (len - 1)) || (addr & ~len_mask) ||
343             len == 0 || len > TARGET_PAGE_SIZE) {
344         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
345                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
346         return -EINVAL;
347     }
348     wp = g_malloc(sizeof(*wp));
349
350     wp->vaddr = addr;
351     wp->len_mask = len_mask;
352     wp->flags = flags;
353
354     /* keep all GDB-injected watchpoints in front */
355     if (flags & BP_GDB)
356         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
357     else
358         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
359
360     tlb_flush_page(env, addr);
361
362     if (watchpoint)
363         *watchpoint = wp;
364     return 0;
365 }
366
367 /* Remove a specific watchpoint.  */
368 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
369                           int flags)
370 {
371     target_ulong len_mask = ~(len - 1);
372     CPUWatchpoint *wp;
373
374     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
375         if (addr == wp->vaddr && len_mask == wp->len_mask
376                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
377             cpu_watchpoint_remove_by_ref(env, wp);
378             return 0;
379         }
380     }
381     return -ENOENT;
382 }
383
384 /* Remove a specific watchpoint by reference.  */
385 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
386 {
387     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
388
389     tlb_flush_page(env, watchpoint->vaddr);
390
391     g_free(watchpoint);
392 }
393
394 /* Remove all matching watchpoints.  */
395 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
396 {
397     CPUWatchpoint *wp, *next;
398
399     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
400         if (wp->flags & mask)
401             cpu_watchpoint_remove_by_ref(env, wp);
402     }
403 }
404 #endif
405
406 /* Add a breakpoint.  */
407 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
408                           CPUBreakpoint **breakpoint)
409 {
410 #if defined(TARGET_HAS_ICE)
411     CPUBreakpoint *bp;
412
413     bp = g_malloc(sizeof(*bp));
414
415     bp->pc = pc;
416     bp->flags = flags;
417
418     /* keep all GDB-injected breakpoints in front */
419     if (flags & BP_GDB)
420         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
421     else
422         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
423
424     breakpoint_invalidate(env, pc);
425
426     if (breakpoint)
427         *breakpoint = bp;
428     return 0;
429 #else
430     return -ENOSYS;
431 #endif
432 }
433
434 /* Remove a specific breakpoint.  */
435 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
436 {
437 #if defined(TARGET_HAS_ICE)
438     CPUBreakpoint *bp;
439
440     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
441         if (bp->pc == pc && bp->flags == flags) {
442             cpu_breakpoint_remove_by_ref(env, bp);
443             return 0;
444         }
445     }
446     return -ENOENT;
447 #else
448     return -ENOSYS;
449 #endif
450 }
451
452 /* Remove a specific breakpoint by reference.  */
453 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
454 {
455 #if defined(TARGET_HAS_ICE)
456     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
457
458     breakpoint_invalidate(env, breakpoint->pc);
459
460     g_free(breakpoint);
461 #endif
462 }
463
464 /* Remove all matching breakpoints. */
465 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
466 {
467 #if defined(TARGET_HAS_ICE)
468     CPUBreakpoint *bp, *next;
469
470     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
471         if (bp->flags & mask)
472             cpu_breakpoint_remove_by_ref(env, bp);
473     }
474 #endif
475 }
476
477 /* enable or disable single step mode. EXCP_DEBUG is returned by the
478    CPU loop after each instruction */
479 void cpu_single_step(CPUArchState *env, int enabled)
480 {
481 #if defined(TARGET_HAS_ICE)
482     if (env->singlestep_enabled != enabled) {
483         env->singlestep_enabled = enabled;
484         if (kvm_enabled())
485             kvm_update_guest_debug(env, 0);
486         else {
487             /* must flush all the translated code to avoid inconsistencies */
488             /* XXX: only flush what is necessary */
489             tb_flush(env);
490         }
491     }
492 #endif
493 }
494
495 void cpu_exit(CPUArchState *env)
496 {
497     CPUState *cpu = ENV_GET_CPU(env);
498
499     cpu->exit_request = 1;
500     cpu->tcg_exit_req = 1;
501 }
502
503 void cpu_abort(CPUArchState *env, const char *fmt, ...)
504 {
505     va_list ap;
506     va_list ap2;
507
508     va_start(ap, fmt);
509     va_copy(ap2, ap);
510     fprintf(stderr, "qemu: fatal: ");
511     vfprintf(stderr, fmt, ap);
512     fprintf(stderr, "\n");
513     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
514     if (qemu_log_enabled()) {
515         qemu_log("qemu: fatal: ");
516         qemu_log_vprintf(fmt, ap2);
517         qemu_log("\n");
518         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
519         qemu_log_flush();
520         qemu_log_close();
521     }
522     va_end(ap2);
523     va_end(ap);
524 #if defined(CONFIG_USER_ONLY)
525     {
526         struct sigaction act;
527         sigfillset(&act.sa_mask);
528         act.sa_handler = SIG_DFL;
529         sigaction(SIGABRT, &act, NULL);
530     }
531 #endif
532     abort();
533 }
534
535 CPUArchState *cpu_copy(CPUArchState *env)
536 {
537     CPUArchState *new_env = cpu_init(env->cpu_model_str);
538     CPUArchState *next_cpu = new_env->next_cpu;
539 #if defined(TARGET_HAS_ICE)
540     CPUBreakpoint *bp;
541     CPUWatchpoint *wp;
542 #endif
543
544     memcpy(new_env, env, sizeof(CPUArchState));
545
546     /* Preserve chaining. */
547     new_env->next_cpu = next_cpu;
548
549     /* Clone all break/watchpoints.
550        Note: Once we support ptrace with hw-debug register access, make sure
551        BP_CPU break/watchpoints are handled correctly on clone. */
552     QTAILQ_INIT(&env->breakpoints);
553     QTAILQ_INIT(&env->watchpoints);
554 #if defined(TARGET_HAS_ICE)
555     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
556         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
557     }
558     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
559         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
560                               wp->flags, NULL);
561     }
562 #endif
563
564     return new_env;
565 }
566
567 #if !defined(CONFIG_USER_ONLY)
568 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
569                                       uintptr_t length)
570 {
571     uintptr_t start1;
572
573     /* we modify the TLB cache so that the dirty bit will be set again
574        when accessing the range */
575     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
576     /* Check that we don't span multiple blocks - this breaks the
577        address comparisons below.  */
578     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
579             != (end - 1) - start) {
580         abort();
581     }
582     cpu_tlb_reset_dirty_all(start1, length);
583
584 }
585
586 /* Note: start and end must be within the same ram block.  */
587 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
588                                      int dirty_flags)
589 {
590     uintptr_t length;
591
592     start &= TARGET_PAGE_MASK;
593     end = TARGET_PAGE_ALIGN(end);
594
595     length = end - start;
596     if (length == 0)
597         return;
598     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
599
600     if (tcg_enabled()) {
601         tlb_reset_dirty_range_all(start, end, length);
602     }
603 }
604
605 static int cpu_physical_memory_set_dirty_tracking(int enable)
606 {
607     int ret = 0;
608     in_migration = enable;
609     return ret;
610 }
611
612 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
613                                                    MemoryRegionSection *section,
614                                                    target_ulong vaddr,
615                                                    hwaddr paddr,
616                                                    int prot,
617                                                    target_ulong *address)
618 {
619     hwaddr iotlb;
620     CPUWatchpoint *wp;
621
622     if (memory_region_is_ram(section->mr)) {
623         /* Normal RAM.  */
624         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
625             + memory_region_section_addr(section, paddr);
626         if (!section->readonly) {
627             iotlb |= phys_section_notdirty;
628         } else {
629             iotlb |= phys_section_rom;
630         }
631     } else {
632         /* IO handlers are currently passed a physical address.
633            It would be nice to pass an offset from the base address
634            of that region.  This would avoid having to special case RAM,
635            and avoid full address decoding in every device.
636            We can't use the high bits of pd for this because
637            IO_MEM_ROMD uses these as a ram address.  */
638         iotlb = section - phys_sections;
639         iotlb += memory_region_section_addr(section, paddr);
640     }
641
642     /* Make accesses to pages with watchpoints go via the
643        watchpoint trap routines.  */
644     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
645         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
646             /* Avoid trapping reads of pages with a write breakpoint. */
647             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
648                 iotlb = phys_section_watch + paddr;
649                 *address |= TLB_MMIO;
650                 break;
651             }
652         }
653     }
654
655     return iotlb;
656 }
657 #endif /* defined(CONFIG_USER_ONLY) */
658
659 #if !defined(CONFIG_USER_ONLY)
660
661 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
662 typedef struct subpage_t {
663     MemoryRegion iomem;
664     hwaddr base;
665     uint16_t sub_section[TARGET_PAGE_SIZE];
666 } subpage_t;
667
668 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
669                              uint16_t section);
670 static subpage_t *subpage_init(hwaddr base);
671 static void destroy_page_desc(uint16_t section_index)
672 {
673     MemoryRegionSection *section = &phys_sections[section_index];
674     MemoryRegion *mr = section->mr;
675
676     if (mr->subpage) {
677         subpage_t *subpage = container_of(mr, subpage_t, iomem);
678         memory_region_destroy(&subpage->iomem);
679         g_free(subpage);
680     }
681 }
682
683 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
684 {
685     unsigned i;
686     PhysPageEntry *p;
687
688     if (lp->ptr == PHYS_MAP_NODE_NIL) {
689         return;
690     }
691
692     p = phys_map_nodes[lp->ptr];
693     for (i = 0; i < L2_SIZE; ++i) {
694         if (!p[i].is_leaf) {
695             destroy_l2_mapping(&p[i], level - 1);
696         } else {
697             destroy_page_desc(p[i].ptr);
698         }
699     }
700     lp->is_leaf = 0;
701     lp->ptr = PHYS_MAP_NODE_NIL;
702 }
703
704 static void destroy_all_mappings(AddressSpaceDispatch *d)
705 {
706     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
707     phys_map_nodes_reset();
708 }
709
710 static uint16_t phys_section_add(MemoryRegionSection *section)
711 {
712     if (phys_sections_nb == phys_sections_nb_alloc) {
713         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
714         phys_sections = g_renew(MemoryRegionSection, phys_sections,
715                                 phys_sections_nb_alloc);
716     }
717     phys_sections[phys_sections_nb] = *section;
718     return phys_sections_nb++;
719 }
720
721 static void phys_sections_clear(void)
722 {
723     phys_sections_nb = 0;
724 }
725
726 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
727 {
728     subpage_t *subpage;
729     hwaddr base = section->offset_within_address_space
730         & TARGET_PAGE_MASK;
731     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
732     MemoryRegionSection subsection = {
733         .offset_within_address_space = base,
734         .size = TARGET_PAGE_SIZE,
735     };
736     hwaddr start, end;
737
738     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
739
740     if (!(existing->mr->subpage)) {
741         subpage = subpage_init(base);
742         subsection.mr = &subpage->iomem;
743         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
744                       phys_section_add(&subsection));
745     } else {
746         subpage = container_of(existing->mr, subpage_t, iomem);
747     }
748     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
749     end = start + section->size - 1;
750     subpage_register(subpage, start, end, phys_section_add(section));
751 }
752
753
754 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
755 {
756     hwaddr start_addr = section->offset_within_address_space;
757     ram_addr_t size = section->size;
758     hwaddr addr;
759     uint16_t section_index = phys_section_add(section);
760
761     assert(size);
762
763     addr = start_addr;
764     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
765                   section_index);
766 }
767
768 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
769 {
770     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
771     MemoryRegionSection now = *section, remain = *section;
772
773     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
774         || (now.size < TARGET_PAGE_SIZE)) {
775         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
776                        - now.offset_within_address_space,
777                        now.size);
778         register_subpage(d, &now);
779         remain.size -= now.size;
780         remain.offset_within_address_space += now.size;
781         remain.offset_within_region += now.size;
782     }
783     while (remain.size >= TARGET_PAGE_SIZE) {
784         now = remain;
785         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
786             now.size = TARGET_PAGE_SIZE;
787             register_subpage(d, &now);
788         } else {
789             now.size &= TARGET_PAGE_MASK;
790             register_multipage(d, &now);
791         }
792         remain.size -= now.size;
793         remain.offset_within_address_space += now.size;
794         remain.offset_within_region += now.size;
795     }
796     now = remain;
797     if (now.size) {
798         register_subpage(d, &now);
799     }
800 }
801
802 void qemu_flush_coalesced_mmio_buffer(void)
803 {
804     if (kvm_enabled())
805         kvm_flush_coalesced_mmio_buffer();
806 }
807
808 void qemu_mutex_lock_ramlist(void)
809 {
810     qemu_mutex_lock(&ram_list.mutex);
811 }
812
813 void qemu_mutex_unlock_ramlist(void)
814 {
815     qemu_mutex_unlock(&ram_list.mutex);
816 }
817
818 #if defined(__linux__) && !defined(TARGET_S390X)
819
820 #include <sys/vfs.h>
821
822 #define HUGETLBFS_MAGIC       0x958458f6
823
824 static long gethugepagesize(const char *path)
825 {
826     struct statfs fs;
827     int ret;
828
829     do {
830         ret = statfs(path, &fs);
831     } while (ret != 0 && errno == EINTR);
832
833     if (ret != 0) {
834         perror(path);
835         return 0;
836     }
837
838     if (fs.f_type != HUGETLBFS_MAGIC)
839         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
840
841     return fs.f_bsize;
842 }
843
844 static void *file_ram_alloc(RAMBlock *block,
845                             ram_addr_t memory,
846                             const char *path)
847 {
848     char *filename;
849     char *sanitized_name;
850     char *c;
851     void *area;
852     int fd;
853 #ifdef MAP_POPULATE
854     int flags;
855 #endif
856     unsigned long hpagesize;
857
858     hpagesize = gethugepagesize(path);
859     if (!hpagesize) {
860         return NULL;
861     }
862
863     if (memory < hpagesize) {
864         return NULL;
865     }
866
867     if (kvm_enabled() && !kvm_has_sync_mmu()) {
868         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
869         return NULL;
870     }
871
872     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
873     sanitized_name = g_strdup(block->mr->name);
874     for (c = sanitized_name; *c != '\0'; c++) {
875         if (*c == '/')
876             *c = '_';
877     }
878
879     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
880                                sanitized_name);
881     g_free(sanitized_name);
882
883     fd = mkstemp(filename);
884     if (fd < 0) {
885         perror("unable to create backing store for hugepages");
886         g_free(filename);
887         return NULL;
888     }
889     unlink(filename);
890     g_free(filename);
891
892     memory = (memory+hpagesize-1) & ~(hpagesize-1);
893
894     /*
895      * ftruncate is not supported by hugetlbfs in older
896      * hosts, so don't bother bailing out on errors.
897      * If anything goes wrong with it under other filesystems,
898      * mmap will fail.
899      */
900     if (ftruncate(fd, memory))
901         perror("ftruncate");
902
903 #ifdef MAP_POPULATE
904     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
905      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
906      * to sidestep this quirk.
907      */
908     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
909     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
910 #else
911     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
912 #endif
913     if (area == MAP_FAILED) {
914         perror("file_ram_alloc: can't mmap RAM pages");
915         close(fd);
916         return (NULL);
917     }
918     block->fd = fd;
919     return area;
920 }
921 #endif
922
923 static ram_addr_t find_ram_offset(ram_addr_t size)
924 {
925     RAMBlock *block, *next_block;
926     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
927
928     assert(size != 0); /* it would hand out same offset multiple times */
929
930     if (QTAILQ_EMPTY(&ram_list.blocks))
931         return 0;
932
933     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
934         ram_addr_t end, next = RAM_ADDR_MAX;
935
936         end = block->offset + block->length;
937
938         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
939             if (next_block->offset >= end) {
940                 next = MIN(next, next_block->offset);
941             }
942         }
943         if (next - end >= size && next - end < mingap) {
944             offset = end;
945             mingap = next - end;
946         }
947     }
948
949     if (offset == RAM_ADDR_MAX) {
950         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
951                 (uint64_t)size);
952         abort();
953     }
954
955     return offset;
956 }
957
958 ram_addr_t last_ram_offset(void)
959 {
960     RAMBlock *block;
961     ram_addr_t last = 0;
962
963     QTAILQ_FOREACH(block, &ram_list.blocks, next)
964         last = MAX(last, block->offset + block->length);
965
966     return last;
967 }
968
969 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
970 {
971     int ret;
972     QemuOpts *machine_opts;
973
974     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
975     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
976     if (machine_opts &&
977         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
978         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
979         if (ret) {
980             perror("qemu_madvise");
981             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
982                             "but dump_guest_core=off specified\n");
983         }
984     }
985 }
986
987 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
988 {
989     RAMBlock *new_block, *block;
990
991     new_block = NULL;
992     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
993         if (block->offset == addr) {
994             new_block = block;
995             break;
996         }
997     }
998     assert(new_block);
999     assert(!new_block->idstr[0]);
1000
1001     if (dev) {
1002         char *id = qdev_get_dev_path(dev);
1003         if (id) {
1004             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1005             g_free(id);
1006         }
1007     }
1008     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1009
1010     /* This assumes the iothread lock is taken here too.  */
1011     qemu_mutex_lock_ramlist();
1012     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1013         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1014             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1015                     new_block->idstr);
1016             abort();
1017         }
1018     }
1019     qemu_mutex_unlock_ramlist();
1020 }
1021
1022 static int memory_try_enable_merging(void *addr, size_t len)
1023 {
1024     QemuOpts *opts;
1025
1026     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1027     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1028         /* disabled by the user */
1029         return 0;
1030     }
1031
1032     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1033 }
1034
1035 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1036                                    MemoryRegion *mr)
1037 {
1038     RAMBlock *block, *new_block;
1039
1040     size = TARGET_PAGE_ALIGN(size);
1041     new_block = g_malloc0(sizeof(*new_block));
1042
1043     /* This assumes the iothread lock is taken here too.  */
1044     qemu_mutex_lock_ramlist();
1045     new_block->mr = mr;
1046     new_block->offset = find_ram_offset(size);
1047     if (host) {
1048         new_block->host = host;
1049         new_block->flags |= RAM_PREALLOC_MASK;
1050     } else {
1051         if (mem_path) {
1052 #if defined (__linux__) && !defined(TARGET_S390X)
1053             new_block->host = file_ram_alloc(new_block, size, mem_path);
1054             if (!new_block->host) {
1055                 new_block->host = qemu_vmalloc(size);
1056                 memory_try_enable_merging(new_block->host, size);
1057             }
1058 #else
1059             fprintf(stderr, "-mem-path option unsupported\n");
1060             exit(1);
1061 #endif
1062         } else {
1063             if (xen_enabled()) {
1064                 xen_ram_alloc(new_block->offset, size, mr);
1065             } else if (kvm_enabled()) {
1066                 /* some s390/kvm configurations have special constraints */
1067                 new_block->host = kvm_vmalloc(size);
1068             } else {
1069                 new_block->host = qemu_vmalloc(size);
1070             }
1071             memory_try_enable_merging(new_block->host, size);
1072         }
1073     }
1074     new_block->length = size;
1075
1076     /* Keep the list sorted from biggest to smallest block.  */
1077     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1078         if (block->length < new_block->length) {
1079             break;
1080         }
1081     }
1082     if (block) {
1083         QTAILQ_INSERT_BEFORE(block, new_block, next);
1084     } else {
1085         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1086     }
1087     ram_list.mru_block = NULL;
1088
1089     ram_list.version++;
1090     qemu_mutex_unlock_ramlist();
1091
1092     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1093                                        last_ram_offset() >> TARGET_PAGE_BITS);
1094     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1095            0, size >> TARGET_PAGE_BITS);
1096     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1097
1098     qemu_ram_setup_dump(new_block->host, size);
1099     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1100
1101     if (kvm_enabled())
1102         kvm_setup_guest_memory(new_block->host, size);
1103
1104     return new_block->offset;
1105 }
1106
1107 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1108 {
1109     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1110 }
1111
1112 void qemu_ram_free_from_ptr(ram_addr_t addr)
1113 {
1114     RAMBlock *block;
1115
1116     /* This assumes the iothread lock is taken here too.  */
1117     qemu_mutex_lock_ramlist();
1118     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1119         if (addr == block->offset) {
1120             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1121             ram_list.mru_block = NULL;
1122             ram_list.version++;
1123             g_free(block);
1124             break;
1125         }
1126     }
1127     qemu_mutex_unlock_ramlist();
1128 }
1129
1130 void qemu_ram_free(ram_addr_t addr)
1131 {
1132     RAMBlock *block;
1133
1134     /* This assumes the iothread lock is taken here too.  */
1135     qemu_mutex_lock_ramlist();
1136     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1137         if (addr == block->offset) {
1138             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1139             ram_list.mru_block = NULL;
1140             ram_list.version++;
1141             if (block->flags & RAM_PREALLOC_MASK) {
1142                 ;
1143             } else if (mem_path) {
1144 #if defined (__linux__) && !defined(TARGET_S390X)
1145                 if (block->fd) {
1146                     munmap(block->host, block->length);
1147                     close(block->fd);
1148                 } else {
1149                     qemu_vfree(block->host);
1150                 }
1151 #else
1152                 abort();
1153 #endif
1154             } else {
1155 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1156                 munmap(block->host, block->length);
1157 #else
1158                 if (xen_enabled()) {
1159                     xen_invalidate_map_cache_entry(block->host);
1160                 } else {
1161                     qemu_vfree(block->host);
1162                 }
1163 #endif
1164             }
1165             g_free(block);
1166             break;
1167         }
1168     }
1169     qemu_mutex_unlock_ramlist();
1170
1171 }
1172
1173 #ifndef _WIN32
1174 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1175 {
1176     RAMBlock *block;
1177     ram_addr_t offset;
1178     int flags;
1179     void *area, *vaddr;
1180
1181     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1182         offset = addr - block->offset;
1183         if (offset < block->length) {
1184             vaddr = block->host + offset;
1185             if (block->flags & RAM_PREALLOC_MASK) {
1186                 ;
1187             } else {
1188                 flags = MAP_FIXED;
1189                 munmap(vaddr, length);
1190                 if (mem_path) {
1191 #if defined(__linux__) && !defined(TARGET_S390X)
1192                     if (block->fd) {
1193 #ifdef MAP_POPULATE
1194                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1195                             MAP_PRIVATE;
1196 #else
1197                         flags |= MAP_PRIVATE;
1198 #endif
1199                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1200                                     flags, block->fd, offset);
1201                     } else {
1202                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1203                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1204                                     flags, -1, 0);
1205                     }
1206 #else
1207                     abort();
1208 #endif
1209                 } else {
1210 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1211                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1212                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1213                                 flags, -1, 0);
1214 #else
1215                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1216                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1217                                 flags, -1, 0);
1218 #endif
1219                 }
1220                 if (area != vaddr) {
1221                     fprintf(stderr, "Could not remap addr: "
1222                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1223                             length, addr);
1224                     exit(1);
1225                 }
1226                 memory_try_enable_merging(vaddr, length);
1227                 qemu_ram_setup_dump(vaddr, length);
1228             }
1229             return;
1230         }
1231     }
1232 }
1233 #endif /* !_WIN32 */
1234
1235 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1236    With the exception of the softmmu code in this file, this should
1237    only be used for local memory (e.g. video ram) that the device owns,
1238    and knows it isn't going to access beyond the end of the block.
1239
1240    It should not be used for general purpose DMA.
1241    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1242  */
1243 void *qemu_get_ram_ptr(ram_addr_t addr)
1244 {
1245     RAMBlock *block;
1246
1247     /* The list is protected by the iothread lock here.  */
1248     block = ram_list.mru_block;
1249     if (block && addr - block->offset < block->length) {
1250         goto found;
1251     }
1252     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1253         if (addr - block->offset < block->length) {
1254             goto found;
1255         }
1256     }
1257
1258     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1259     abort();
1260
1261 found:
1262     ram_list.mru_block = block;
1263     if (xen_enabled()) {
1264         /* We need to check if the requested address is in the RAM
1265          * because we don't want to map the entire memory in QEMU.
1266          * In that case just map until the end of the page.
1267          */
1268         if (block->offset == 0) {
1269             return xen_map_cache(addr, 0, 0);
1270         } else if (block->host == NULL) {
1271             block->host =
1272                 xen_map_cache(block->offset, block->length, 1);
1273         }
1274     }
1275     return block->host + (addr - block->offset);
1276 }
1277
1278 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1279  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1280  *
1281  * ??? Is this still necessary?
1282  */
1283 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1284 {
1285     RAMBlock *block;
1286
1287     /* The list is protected by the iothread lock here.  */
1288     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1289         if (addr - block->offset < block->length) {
1290             if (xen_enabled()) {
1291                 /* We need to check if the requested address is in the RAM
1292                  * because we don't want to map the entire memory in QEMU.
1293                  * In that case just map until the end of the page.
1294                  */
1295                 if (block->offset == 0) {
1296                     return xen_map_cache(addr, 0, 0);
1297                 } else if (block->host == NULL) {
1298                     block->host =
1299                         xen_map_cache(block->offset, block->length, 1);
1300                 }
1301             }
1302             return block->host + (addr - block->offset);
1303         }
1304     }
1305
1306     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1307     abort();
1308
1309     return NULL;
1310 }
1311
1312 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1313  * but takes a size argument */
1314 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1315 {
1316     if (*size == 0) {
1317         return NULL;
1318     }
1319     if (xen_enabled()) {
1320         return xen_map_cache(addr, *size, 1);
1321     } else {
1322         RAMBlock *block;
1323
1324         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1325             if (addr - block->offset < block->length) {
1326                 if (addr - block->offset + *size > block->length)
1327                     *size = block->length - addr + block->offset;
1328                 return block->host + (addr - block->offset);
1329             }
1330         }
1331
1332         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1333         abort();
1334     }
1335 }
1336
1337 void qemu_put_ram_ptr(void *addr)
1338 {
1339     trace_qemu_put_ram_ptr(addr);
1340 }
1341
1342 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1343 {
1344     RAMBlock *block;
1345     uint8_t *host = ptr;
1346
1347     if (xen_enabled()) {
1348         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1349         return 0;
1350     }
1351
1352     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1353         /* This case append when the block is not mapped. */
1354         if (block->host == NULL) {
1355             continue;
1356         }
1357         if (host - block->host < block->length) {
1358             *ram_addr = block->offset + (host - block->host);
1359             return 0;
1360         }
1361     }
1362
1363     return -1;
1364 }
1365
1366 /* Some of the softmmu routines need to translate from a host pointer
1367    (typically a TLB entry) back to a ram offset.  */
1368 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1369 {
1370     ram_addr_t ram_addr;
1371
1372     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1373         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1374         abort();
1375     }
1376     return ram_addr;
1377 }
1378
1379 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1380                                     unsigned size)
1381 {
1382 #ifdef DEBUG_UNASSIGNED
1383     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1384 #endif
1385 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1386     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1387 #endif
1388     return 0;
1389 }
1390
1391 static void unassigned_mem_write(void *opaque, hwaddr addr,
1392                                  uint64_t val, unsigned size)
1393 {
1394 #ifdef DEBUG_UNASSIGNED
1395     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1396 #endif
1397 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1398     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1399 #endif
1400 }
1401
1402 static const MemoryRegionOps unassigned_mem_ops = {
1403     .read = unassigned_mem_read,
1404     .write = unassigned_mem_write,
1405     .endianness = DEVICE_NATIVE_ENDIAN,
1406 };
1407
1408 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1409                                unsigned size)
1410 {
1411     abort();
1412 }
1413
1414 static void error_mem_write(void *opaque, hwaddr addr,
1415                             uint64_t value, unsigned size)
1416 {
1417     abort();
1418 }
1419
1420 static const MemoryRegionOps error_mem_ops = {
1421     .read = error_mem_read,
1422     .write = error_mem_write,
1423     .endianness = DEVICE_NATIVE_ENDIAN,
1424 };
1425
1426 static const MemoryRegionOps rom_mem_ops = {
1427     .read = error_mem_read,
1428     .write = unassigned_mem_write,
1429     .endianness = DEVICE_NATIVE_ENDIAN,
1430 };
1431
1432 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1433                                uint64_t val, unsigned size)
1434 {
1435     int dirty_flags;
1436     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1437     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1438 #if !defined(CONFIG_USER_ONLY)
1439         tb_invalidate_phys_page_fast(ram_addr, size);
1440         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1441 #endif
1442     }
1443     switch (size) {
1444     case 1:
1445         stb_p(qemu_get_ram_ptr(ram_addr), val);
1446         break;
1447     case 2:
1448         stw_p(qemu_get_ram_ptr(ram_addr), val);
1449         break;
1450     case 4:
1451         stl_p(qemu_get_ram_ptr(ram_addr), val);
1452         break;
1453     default:
1454         abort();
1455     }
1456     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1457     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1458     /* we remove the notdirty callback only if the code has been
1459        flushed */
1460     if (dirty_flags == 0xff)
1461         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1462 }
1463
1464 static const MemoryRegionOps notdirty_mem_ops = {
1465     .read = error_mem_read,
1466     .write = notdirty_mem_write,
1467     .endianness = DEVICE_NATIVE_ENDIAN,
1468 };
1469
1470 /* Generate a debug exception if a watchpoint has been hit.  */
1471 static void check_watchpoint(int offset, int len_mask, int flags)
1472 {
1473     CPUArchState *env = cpu_single_env;
1474     target_ulong pc, cs_base;
1475     target_ulong vaddr;
1476     CPUWatchpoint *wp;
1477     int cpu_flags;
1478
1479     if (env->watchpoint_hit) {
1480         /* We re-entered the check after replacing the TB. Now raise
1481          * the debug interrupt so that is will trigger after the
1482          * current instruction. */
1483         cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1484         return;
1485     }
1486     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1487     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1488         if ((vaddr == (wp->vaddr & len_mask) ||
1489              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1490             wp->flags |= BP_WATCHPOINT_HIT;
1491             if (!env->watchpoint_hit) {
1492                 env->watchpoint_hit = wp;
1493                 tb_check_watchpoint(env);
1494                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1495                     env->exception_index = EXCP_DEBUG;
1496                     cpu_loop_exit(env);
1497                 } else {
1498                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1499                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1500                     cpu_resume_from_signal(env, NULL);
1501                 }
1502             }
1503         } else {
1504             wp->flags &= ~BP_WATCHPOINT_HIT;
1505         }
1506     }
1507 }
1508
1509 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1510    so these check for a hit then pass through to the normal out-of-line
1511    phys routines.  */
1512 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1513                                unsigned size)
1514 {
1515     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1516     switch (size) {
1517     case 1: return ldub_phys(addr);
1518     case 2: return lduw_phys(addr);
1519     case 4: return ldl_phys(addr);
1520     default: abort();
1521     }
1522 }
1523
1524 static void watch_mem_write(void *opaque, hwaddr addr,
1525                             uint64_t val, unsigned size)
1526 {
1527     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1528     switch (size) {
1529     case 1:
1530         stb_phys(addr, val);
1531         break;
1532     case 2:
1533         stw_phys(addr, val);
1534         break;
1535     case 4:
1536         stl_phys(addr, val);
1537         break;
1538     default: abort();
1539     }
1540 }
1541
1542 static const MemoryRegionOps watch_mem_ops = {
1543     .read = watch_mem_read,
1544     .write = watch_mem_write,
1545     .endianness = DEVICE_NATIVE_ENDIAN,
1546 };
1547
1548 static uint64_t subpage_read(void *opaque, hwaddr addr,
1549                              unsigned len)
1550 {
1551     subpage_t *mmio = opaque;
1552     unsigned int idx = SUBPAGE_IDX(addr);
1553     MemoryRegionSection *section;
1554 #if defined(DEBUG_SUBPAGE)
1555     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1556            mmio, len, addr, idx);
1557 #endif
1558
1559     section = &phys_sections[mmio->sub_section[idx]];
1560     addr += mmio->base;
1561     addr -= section->offset_within_address_space;
1562     addr += section->offset_within_region;
1563     return io_mem_read(section->mr, addr, len);
1564 }
1565
1566 static void subpage_write(void *opaque, hwaddr addr,
1567                           uint64_t value, unsigned len)
1568 {
1569     subpage_t *mmio = opaque;
1570     unsigned int idx = SUBPAGE_IDX(addr);
1571     MemoryRegionSection *section;
1572 #if defined(DEBUG_SUBPAGE)
1573     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1574            " idx %d value %"PRIx64"\n",
1575            __func__, mmio, len, addr, idx, value);
1576 #endif
1577
1578     section = &phys_sections[mmio->sub_section[idx]];
1579     addr += mmio->base;
1580     addr -= section->offset_within_address_space;
1581     addr += section->offset_within_region;
1582     io_mem_write(section->mr, addr, value, len);
1583 }
1584
1585 static const MemoryRegionOps subpage_ops = {
1586     .read = subpage_read,
1587     .write = subpage_write,
1588     .endianness = DEVICE_NATIVE_ENDIAN,
1589 };
1590
1591 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1592                                  unsigned size)
1593 {
1594     ram_addr_t raddr = addr;
1595     void *ptr = qemu_get_ram_ptr(raddr);
1596     switch (size) {
1597     case 1: return ldub_p(ptr);
1598     case 2: return lduw_p(ptr);
1599     case 4: return ldl_p(ptr);
1600     default: abort();
1601     }
1602 }
1603
1604 static void subpage_ram_write(void *opaque, hwaddr addr,
1605                               uint64_t value, unsigned size)
1606 {
1607     ram_addr_t raddr = addr;
1608     void *ptr = qemu_get_ram_ptr(raddr);
1609     switch (size) {
1610     case 1: return stb_p(ptr, value);
1611     case 2: return stw_p(ptr, value);
1612     case 4: return stl_p(ptr, value);
1613     default: abort();
1614     }
1615 }
1616
1617 static const MemoryRegionOps subpage_ram_ops = {
1618     .read = subpage_ram_read,
1619     .write = subpage_ram_write,
1620     .endianness = DEVICE_NATIVE_ENDIAN,
1621 };
1622
1623 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1624                              uint16_t section)
1625 {
1626     int idx, eidx;
1627
1628     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1629         return -1;
1630     idx = SUBPAGE_IDX(start);
1631     eidx = SUBPAGE_IDX(end);
1632 #if defined(DEBUG_SUBPAGE)
1633     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1634            mmio, start, end, idx, eidx, memory);
1635 #endif
1636     if (memory_region_is_ram(phys_sections[section].mr)) {
1637         MemoryRegionSection new_section = phys_sections[section];
1638         new_section.mr = &io_mem_subpage_ram;
1639         section = phys_section_add(&new_section);
1640     }
1641     for (; idx <= eidx; idx++) {
1642         mmio->sub_section[idx] = section;
1643     }
1644
1645     return 0;
1646 }
1647
1648 static subpage_t *subpage_init(hwaddr base)
1649 {
1650     subpage_t *mmio;
1651
1652     mmio = g_malloc0(sizeof(subpage_t));
1653
1654     mmio->base = base;
1655     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1656                           "subpage", TARGET_PAGE_SIZE);
1657     mmio->iomem.subpage = true;
1658 #if defined(DEBUG_SUBPAGE)
1659     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1660            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1661 #endif
1662     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1663
1664     return mmio;
1665 }
1666
1667 static uint16_t dummy_section(MemoryRegion *mr)
1668 {
1669     MemoryRegionSection section = {
1670         .mr = mr,
1671         .offset_within_address_space = 0,
1672         .offset_within_region = 0,
1673         .size = UINT64_MAX,
1674     };
1675
1676     return phys_section_add(&section);
1677 }
1678
1679 MemoryRegion *iotlb_to_region(hwaddr index)
1680 {
1681     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1682 }
1683
1684 static void io_mem_init(void)
1685 {
1686     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1687     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1688     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1689                           "unassigned", UINT64_MAX);
1690     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1691                           "notdirty", UINT64_MAX);
1692     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1693                           "subpage-ram", UINT64_MAX);
1694     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1695                           "watch", UINT64_MAX);
1696 }
1697
1698 static void mem_begin(MemoryListener *listener)
1699 {
1700     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1701
1702     destroy_all_mappings(d);
1703     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1704 }
1705
1706 static void core_begin(MemoryListener *listener)
1707 {
1708     phys_sections_clear();
1709     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1710     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1711     phys_section_rom = dummy_section(&io_mem_rom);
1712     phys_section_watch = dummy_section(&io_mem_watch);
1713 }
1714
1715 static void tcg_commit(MemoryListener *listener)
1716 {
1717     CPUArchState *env;
1718
1719     /* since each CPU stores ram addresses in its TLB cache, we must
1720        reset the modified entries */
1721     /* XXX: slow ! */
1722     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1723         tlb_flush(env, 1);
1724     }
1725 }
1726
1727 static void core_log_global_start(MemoryListener *listener)
1728 {
1729     cpu_physical_memory_set_dirty_tracking(1);
1730 }
1731
1732 static void core_log_global_stop(MemoryListener *listener)
1733 {
1734     cpu_physical_memory_set_dirty_tracking(0);
1735 }
1736
1737 static void io_region_add(MemoryListener *listener,
1738                           MemoryRegionSection *section)
1739 {
1740     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1741
1742     mrio->mr = section->mr;
1743     mrio->offset = section->offset_within_region;
1744     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1745                  section->offset_within_address_space, section->size);
1746     ioport_register(&mrio->iorange);
1747 }
1748
1749 static void io_region_del(MemoryListener *listener,
1750                           MemoryRegionSection *section)
1751 {
1752     isa_unassign_ioport(section->offset_within_address_space, section->size);
1753 }
1754
1755 static MemoryListener core_memory_listener = {
1756     .begin = core_begin,
1757     .log_global_start = core_log_global_start,
1758     .log_global_stop = core_log_global_stop,
1759     .priority = 1,
1760 };
1761
1762 static MemoryListener io_memory_listener = {
1763     .region_add = io_region_add,
1764     .region_del = io_region_del,
1765     .priority = 0,
1766 };
1767
1768 static MemoryListener tcg_memory_listener = {
1769     .commit = tcg_commit,
1770 };
1771
1772 void address_space_init_dispatch(AddressSpace *as)
1773 {
1774     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1775
1776     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1777     d->listener = (MemoryListener) {
1778         .begin = mem_begin,
1779         .region_add = mem_add,
1780         .region_nop = mem_add,
1781         .priority = 0,
1782     };
1783     as->dispatch = d;
1784     memory_listener_register(&d->listener, as);
1785 }
1786
1787 void address_space_destroy_dispatch(AddressSpace *as)
1788 {
1789     AddressSpaceDispatch *d = as->dispatch;
1790
1791     memory_listener_unregister(&d->listener);
1792     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1793     g_free(d);
1794     as->dispatch = NULL;
1795 }
1796
1797 static void memory_map_init(void)
1798 {
1799     system_memory = g_malloc(sizeof(*system_memory));
1800     memory_region_init(system_memory, "system", INT64_MAX);
1801     address_space_init(&address_space_memory, system_memory);
1802     address_space_memory.name = "memory";
1803
1804     system_io = g_malloc(sizeof(*system_io));
1805     memory_region_init(system_io, "io", 65536);
1806     address_space_init(&address_space_io, system_io);
1807     address_space_io.name = "I/O";
1808
1809     memory_listener_register(&core_memory_listener, &address_space_memory);
1810     memory_listener_register(&io_memory_listener, &address_space_io);
1811     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1812
1813     dma_context_init(&dma_context_memory, &address_space_memory,
1814                      NULL, NULL, NULL);
1815 }
1816
1817 MemoryRegion *get_system_memory(void)
1818 {
1819     return system_memory;
1820 }
1821
1822 MemoryRegion *get_system_io(void)
1823 {
1824     return system_io;
1825 }
1826
1827 #endif /* !defined(CONFIG_USER_ONLY) */
1828
1829 /* physical memory access (slow version, mainly for debug) */
1830 #if defined(CONFIG_USER_ONLY)
1831 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1832                         uint8_t *buf, int len, int is_write)
1833 {
1834     int l, flags;
1835     target_ulong page;
1836     void * p;
1837
1838     while (len > 0) {
1839         page = addr & TARGET_PAGE_MASK;
1840         l = (page + TARGET_PAGE_SIZE) - addr;
1841         if (l > len)
1842             l = len;
1843         flags = page_get_flags(page);
1844         if (!(flags & PAGE_VALID))
1845             return -1;
1846         if (is_write) {
1847             if (!(flags & PAGE_WRITE))
1848                 return -1;
1849             /* XXX: this code should not depend on lock_user */
1850             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1851                 return -1;
1852             memcpy(p, buf, l);
1853             unlock_user(p, addr, l);
1854         } else {
1855             if (!(flags & PAGE_READ))
1856                 return -1;
1857             /* XXX: this code should not depend on lock_user */
1858             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1859                 return -1;
1860             memcpy(buf, p, l);
1861             unlock_user(p, addr, 0);
1862         }
1863         len -= l;
1864         buf += l;
1865         addr += l;
1866     }
1867     return 0;
1868 }
1869
1870 #else
1871
1872 static void invalidate_and_set_dirty(hwaddr addr,
1873                                      hwaddr length)
1874 {
1875     if (!cpu_physical_memory_is_dirty(addr)) {
1876         /* invalidate code */
1877         tb_invalidate_phys_page_range(addr, addr + length, 0);
1878         /* set dirty bit */
1879         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1880     }
1881     xen_modified_memory(addr, length);
1882 }
1883
1884 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1885                       int len, bool is_write)
1886 {
1887     AddressSpaceDispatch *d = as->dispatch;
1888     int l;
1889     uint8_t *ptr;
1890     uint32_t val;
1891     hwaddr page;
1892     MemoryRegionSection *section;
1893
1894     while (len > 0) {
1895         page = addr & TARGET_PAGE_MASK;
1896         l = (page + TARGET_PAGE_SIZE) - addr;
1897         if (l > len)
1898             l = len;
1899         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1900
1901         if (is_write) {
1902             if (!memory_region_is_ram(section->mr)) {
1903                 hwaddr addr1;
1904                 addr1 = memory_region_section_addr(section, addr);
1905                 /* XXX: could force cpu_single_env to NULL to avoid
1906                    potential bugs */
1907                 if (l >= 4 && ((addr1 & 3) == 0)) {
1908                     /* 32 bit write access */
1909                     val = ldl_p(buf);
1910                     io_mem_write(section->mr, addr1, val, 4);
1911                     l = 4;
1912                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1913                     /* 16 bit write access */
1914                     val = lduw_p(buf);
1915                     io_mem_write(section->mr, addr1, val, 2);
1916                     l = 2;
1917                 } else {
1918                     /* 8 bit write access */
1919                     val = ldub_p(buf);
1920                     io_mem_write(section->mr, addr1, val, 1);
1921                     l = 1;
1922                 }
1923             } else if (!section->readonly) {
1924                 ram_addr_t addr1;
1925                 addr1 = memory_region_get_ram_addr(section->mr)
1926                     + memory_region_section_addr(section, addr);
1927                 /* RAM case */
1928                 ptr = qemu_get_ram_ptr(addr1);
1929                 memcpy(ptr, buf, l);
1930                 invalidate_and_set_dirty(addr1, l);
1931                 qemu_put_ram_ptr(ptr);
1932             }
1933         } else {
1934             if (!(memory_region_is_ram(section->mr) ||
1935                   memory_region_is_romd(section->mr))) {
1936                 hwaddr addr1;
1937                 /* I/O case */
1938                 addr1 = memory_region_section_addr(section, addr);
1939                 if (l >= 4 && ((addr1 & 3) == 0)) {
1940                     /* 32 bit read access */
1941                     val = io_mem_read(section->mr, addr1, 4);
1942                     stl_p(buf, val);
1943                     l = 4;
1944                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1945                     /* 16 bit read access */
1946                     val = io_mem_read(section->mr, addr1, 2);
1947                     stw_p(buf, val);
1948                     l = 2;
1949                 } else {
1950                     /* 8 bit read access */
1951                     val = io_mem_read(section->mr, addr1, 1);
1952                     stb_p(buf, val);
1953                     l = 1;
1954                 }
1955             } else {
1956                 /* RAM case */
1957                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1958                                        + memory_region_section_addr(section,
1959                                                                     addr));
1960                 memcpy(buf, ptr, l);
1961                 qemu_put_ram_ptr(ptr);
1962             }
1963         }
1964         len -= l;
1965         buf += l;
1966         addr += l;
1967     }
1968 }
1969
1970 void address_space_write(AddressSpace *as, hwaddr addr,
1971                          const uint8_t *buf, int len)
1972 {
1973     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1974 }
1975
1976 /**
1977  * address_space_read: read from an address space.
1978  *
1979  * @as: #AddressSpace to be accessed
1980  * @addr: address within that address space
1981  * @buf: buffer with the data transferred
1982  */
1983 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1984 {
1985     address_space_rw(as, addr, buf, len, false);
1986 }
1987
1988
1989 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1990                             int len, int is_write)
1991 {
1992     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1993 }
1994
1995 /* used for ROM loading : can write in RAM and ROM */
1996 void cpu_physical_memory_write_rom(hwaddr addr,
1997                                    const uint8_t *buf, int len)
1998 {
1999     AddressSpaceDispatch *d = address_space_memory.dispatch;
2000     int l;
2001     uint8_t *ptr;
2002     hwaddr page;
2003     MemoryRegionSection *section;
2004
2005     while (len > 0) {
2006         page = addr & TARGET_PAGE_MASK;
2007         l = (page + TARGET_PAGE_SIZE) - addr;
2008         if (l > len)
2009             l = len;
2010         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2011
2012         if (!(memory_region_is_ram(section->mr) ||
2013               memory_region_is_romd(section->mr))) {
2014             /* do nothing */
2015         } else {
2016             unsigned long addr1;
2017             addr1 = memory_region_get_ram_addr(section->mr)
2018                 + memory_region_section_addr(section, addr);
2019             /* ROM/RAM case */
2020             ptr = qemu_get_ram_ptr(addr1);
2021             memcpy(ptr, buf, l);
2022             invalidate_and_set_dirty(addr1, l);
2023             qemu_put_ram_ptr(ptr);
2024         }
2025         len -= l;
2026         buf += l;
2027         addr += l;
2028     }
2029 }
2030
2031 typedef struct {
2032     void *buffer;
2033     hwaddr addr;
2034     hwaddr len;
2035 } BounceBuffer;
2036
2037 static BounceBuffer bounce;
2038
2039 typedef struct MapClient {
2040     void *opaque;
2041     void (*callback)(void *opaque);
2042     QLIST_ENTRY(MapClient) link;
2043 } MapClient;
2044
2045 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2046     = QLIST_HEAD_INITIALIZER(map_client_list);
2047
2048 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2049 {
2050     MapClient *client = g_malloc(sizeof(*client));
2051
2052     client->opaque = opaque;
2053     client->callback = callback;
2054     QLIST_INSERT_HEAD(&map_client_list, client, link);
2055     return client;
2056 }
2057
2058 static void cpu_unregister_map_client(void *_client)
2059 {
2060     MapClient *client = (MapClient *)_client;
2061
2062     QLIST_REMOVE(client, link);
2063     g_free(client);
2064 }
2065
2066 static void cpu_notify_map_clients(void)
2067 {
2068     MapClient *client;
2069
2070     while (!QLIST_EMPTY(&map_client_list)) {
2071         client = QLIST_FIRST(&map_client_list);
2072         client->callback(client->opaque);
2073         cpu_unregister_map_client(client);
2074     }
2075 }
2076
2077 /* Map a physical memory region into a host virtual address.
2078  * May map a subset of the requested range, given by and returned in *plen.
2079  * May return NULL if resources needed to perform the mapping are exhausted.
2080  * Use only for reads OR writes - not for read-modify-write operations.
2081  * Use cpu_register_map_client() to know when retrying the map operation is
2082  * likely to succeed.
2083  */
2084 void *address_space_map(AddressSpace *as,
2085                         hwaddr addr,
2086                         hwaddr *plen,
2087                         bool is_write)
2088 {
2089     AddressSpaceDispatch *d = as->dispatch;
2090     hwaddr len = *plen;
2091     hwaddr todo = 0;
2092     int l;
2093     hwaddr page;
2094     MemoryRegionSection *section;
2095     ram_addr_t raddr = RAM_ADDR_MAX;
2096     ram_addr_t rlen;
2097     void *ret;
2098
2099     while (len > 0) {
2100         page = addr & TARGET_PAGE_MASK;
2101         l = (page + TARGET_PAGE_SIZE) - addr;
2102         if (l > len)
2103             l = len;
2104         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2105
2106         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2107             if (todo || bounce.buffer) {
2108                 break;
2109             }
2110             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2111             bounce.addr = addr;
2112             bounce.len = l;
2113             if (!is_write) {
2114                 address_space_read(as, addr, bounce.buffer, l);
2115             }
2116
2117             *plen = l;
2118             return bounce.buffer;
2119         }
2120         if (!todo) {
2121             raddr = memory_region_get_ram_addr(section->mr)
2122                 + memory_region_section_addr(section, addr);
2123         }
2124
2125         len -= l;
2126         addr += l;
2127         todo += l;
2128     }
2129     rlen = todo;
2130     ret = qemu_ram_ptr_length(raddr, &rlen);
2131     *plen = rlen;
2132     return ret;
2133 }
2134
2135 /* Unmaps a memory region previously mapped by address_space_map().
2136  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2137  * the amount of memory that was actually read or written by the caller.
2138  */
2139 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2140                          int is_write, hwaddr access_len)
2141 {
2142     if (buffer != bounce.buffer) {
2143         if (is_write) {
2144             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2145             while (access_len) {
2146                 unsigned l;
2147                 l = TARGET_PAGE_SIZE;
2148                 if (l > access_len)
2149                     l = access_len;
2150                 invalidate_and_set_dirty(addr1, l);
2151                 addr1 += l;
2152                 access_len -= l;
2153             }
2154         }
2155         if (xen_enabled()) {
2156             xen_invalidate_map_cache_entry(buffer);
2157         }
2158         return;
2159     }
2160     if (is_write) {
2161         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2162     }
2163     qemu_vfree(bounce.buffer);
2164     bounce.buffer = NULL;
2165     cpu_notify_map_clients();
2166 }
2167
2168 void *cpu_physical_memory_map(hwaddr addr,
2169                               hwaddr *plen,
2170                               int is_write)
2171 {
2172     return address_space_map(&address_space_memory, addr, plen, is_write);
2173 }
2174
2175 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2176                                int is_write, hwaddr access_len)
2177 {
2178     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2179 }
2180
2181 /* warning: addr must be aligned */
2182 static inline uint32_t ldl_phys_internal(hwaddr addr,
2183                                          enum device_endian endian)
2184 {
2185     uint8_t *ptr;
2186     uint32_t val;
2187     MemoryRegionSection *section;
2188
2189     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2190
2191     if (!(memory_region_is_ram(section->mr) ||
2192           memory_region_is_romd(section->mr))) {
2193         /* I/O case */
2194         addr = memory_region_section_addr(section, addr);
2195         val = io_mem_read(section->mr, addr, 4);
2196 #if defined(TARGET_WORDS_BIGENDIAN)
2197         if (endian == DEVICE_LITTLE_ENDIAN) {
2198             val = bswap32(val);
2199         }
2200 #else
2201         if (endian == DEVICE_BIG_ENDIAN) {
2202             val = bswap32(val);
2203         }
2204 #endif
2205     } else {
2206         /* RAM case */
2207         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2208                                 & TARGET_PAGE_MASK)
2209                                + memory_region_section_addr(section, addr));
2210         switch (endian) {
2211         case DEVICE_LITTLE_ENDIAN:
2212             val = ldl_le_p(ptr);
2213             break;
2214         case DEVICE_BIG_ENDIAN:
2215             val = ldl_be_p(ptr);
2216             break;
2217         default:
2218             val = ldl_p(ptr);
2219             break;
2220         }
2221     }
2222     return val;
2223 }
2224
2225 uint32_t ldl_phys(hwaddr addr)
2226 {
2227     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2228 }
2229
2230 uint32_t ldl_le_phys(hwaddr addr)
2231 {
2232     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2233 }
2234
2235 uint32_t ldl_be_phys(hwaddr addr)
2236 {
2237     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2238 }
2239
2240 /* warning: addr must be aligned */
2241 static inline uint64_t ldq_phys_internal(hwaddr addr,
2242                                          enum device_endian endian)
2243 {
2244     uint8_t *ptr;
2245     uint64_t val;
2246     MemoryRegionSection *section;
2247
2248     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2249
2250     if (!(memory_region_is_ram(section->mr) ||
2251           memory_region_is_romd(section->mr))) {
2252         /* I/O case */
2253         addr = memory_region_section_addr(section, addr);
2254
2255         /* XXX This is broken when device endian != cpu endian.
2256                Fix and add "endian" variable check */
2257 #ifdef TARGET_WORDS_BIGENDIAN
2258         val = io_mem_read(section->mr, addr, 4) << 32;
2259         val |= io_mem_read(section->mr, addr + 4, 4);
2260 #else
2261         val = io_mem_read(section->mr, addr, 4);
2262         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2263 #endif
2264     } else {
2265         /* RAM case */
2266         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2267                                 & TARGET_PAGE_MASK)
2268                                + memory_region_section_addr(section, addr));
2269         switch (endian) {
2270         case DEVICE_LITTLE_ENDIAN:
2271             val = ldq_le_p(ptr);
2272             break;
2273         case DEVICE_BIG_ENDIAN:
2274             val = ldq_be_p(ptr);
2275             break;
2276         default:
2277             val = ldq_p(ptr);
2278             break;
2279         }
2280     }
2281     return val;
2282 }
2283
2284 uint64_t ldq_phys(hwaddr addr)
2285 {
2286     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2287 }
2288
2289 uint64_t ldq_le_phys(hwaddr addr)
2290 {
2291     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2292 }
2293
2294 uint64_t ldq_be_phys(hwaddr addr)
2295 {
2296     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2297 }
2298
2299 /* XXX: optimize */
2300 uint32_t ldub_phys(hwaddr addr)
2301 {
2302     uint8_t val;
2303     cpu_physical_memory_read(addr, &val, 1);
2304     return val;
2305 }
2306
2307 /* warning: addr must be aligned */
2308 static inline uint32_t lduw_phys_internal(hwaddr addr,
2309                                           enum device_endian endian)
2310 {
2311     uint8_t *ptr;
2312     uint64_t val;
2313     MemoryRegionSection *section;
2314
2315     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2316
2317     if (!(memory_region_is_ram(section->mr) ||
2318           memory_region_is_romd(section->mr))) {
2319         /* I/O case */
2320         addr = memory_region_section_addr(section, addr);
2321         val = io_mem_read(section->mr, addr, 2);
2322 #if defined(TARGET_WORDS_BIGENDIAN)
2323         if (endian == DEVICE_LITTLE_ENDIAN) {
2324             val = bswap16(val);
2325         }
2326 #else
2327         if (endian == DEVICE_BIG_ENDIAN) {
2328             val = bswap16(val);
2329         }
2330 #endif
2331     } else {
2332         /* RAM case */
2333         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2334                                 & TARGET_PAGE_MASK)
2335                                + memory_region_section_addr(section, addr));
2336         switch (endian) {
2337         case DEVICE_LITTLE_ENDIAN:
2338             val = lduw_le_p(ptr);
2339             break;
2340         case DEVICE_BIG_ENDIAN:
2341             val = lduw_be_p(ptr);
2342             break;
2343         default:
2344             val = lduw_p(ptr);
2345             break;
2346         }
2347     }
2348     return val;
2349 }
2350
2351 uint32_t lduw_phys(hwaddr addr)
2352 {
2353     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2354 }
2355
2356 uint32_t lduw_le_phys(hwaddr addr)
2357 {
2358     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2359 }
2360
2361 uint32_t lduw_be_phys(hwaddr addr)
2362 {
2363     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2364 }
2365
2366 /* warning: addr must be aligned. The ram page is not masked as dirty
2367    and the code inside is not invalidated. It is useful if the dirty
2368    bits are used to track modified PTEs */
2369 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2370 {
2371     uint8_t *ptr;
2372     MemoryRegionSection *section;
2373
2374     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2375
2376     if (!memory_region_is_ram(section->mr) || section->readonly) {
2377         addr = memory_region_section_addr(section, addr);
2378         if (memory_region_is_ram(section->mr)) {
2379             section = &phys_sections[phys_section_rom];
2380         }
2381         io_mem_write(section->mr, addr, val, 4);
2382     } else {
2383         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2384                                & TARGET_PAGE_MASK)
2385             + memory_region_section_addr(section, addr);
2386         ptr = qemu_get_ram_ptr(addr1);
2387         stl_p(ptr, val);
2388
2389         if (unlikely(in_migration)) {
2390             if (!cpu_physical_memory_is_dirty(addr1)) {
2391                 /* invalidate code */
2392                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2393                 /* set dirty bit */
2394                 cpu_physical_memory_set_dirty_flags(
2395                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2396             }
2397         }
2398     }
2399 }
2400
2401 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2402 {
2403     uint8_t *ptr;
2404     MemoryRegionSection *section;
2405
2406     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2407
2408     if (!memory_region_is_ram(section->mr) || section->readonly) {
2409         addr = memory_region_section_addr(section, addr);
2410         if (memory_region_is_ram(section->mr)) {
2411             section = &phys_sections[phys_section_rom];
2412         }
2413 #ifdef TARGET_WORDS_BIGENDIAN
2414         io_mem_write(section->mr, addr, val >> 32, 4);
2415         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2416 #else
2417         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2418         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2419 #endif
2420     } else {
2421         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2422                                 & TARGET_PAGE_MASK)
2423                                + memory_region_section_addr(section, addr));
2424         stq_p(ptr, val);
2425     }
2426 }
2427
2428 /* warning: addr must be aligned */
2429 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2430                                      enum device_endian endian)
2431 {
2432     uint8_t *ptr;
2433     MemoryRegionSection *section;
2434
2435     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2436
2437     if (!memory_region_is_ram(section->mr) || section->readonly) {
2438         addr = memory_region_section_addr(section, addr);
2439         if (memory_region_is_ram(section->mr)) {
2440             section = &phys_sections[phys_section_rom];
2441         }
2442 #if defined(TARGET_WORDS_BIGENDIAN)
2443         if (endian == DEVICE_LITTLE_ENDIAN) {
2444             val = bswap32(val);
2445         }
2446 #else
2447         if (endian == DEVICE_BIG_ENDIAN) {
2448             val = bswap32(val);
2449         }
2450 #endif
2451         io_mem_write(section->mr, addr, val, 4);
2452     } else {
2453         unsigned long addr1;
2454         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2455             + memory_region_section_addr(section, addr);
2456         /* RAM case */
2457         ptr = qemu_get_ram_ptr(addr1);
2458         switch (endian) {
2459         case DEVICE_LITTLE_ENDIAN:
2460             stl_le_p(ptr, val);
2461             break;
2462         case DEVICE_BIG_ENDIAN:
2463             stl_be_p(ptr, val);
2464             break;
2465         default:
2466             stl_p(ptr, val);
2467             break;
2468         }
2469         invalidate_and_set_dirty(addr1, 4);
2470     }
2471 }
2472
2473 void stl_phys(hwaddr addr, uint32_t val)
2474 {
2475     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2476 }
2477
2478 void stl_le_phys(hwaddr addr, uint32_t val)
2479 {
2480     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2481 }
2482
2483 void stl_be_phys(hwaddr addr, uint32_t val)
2484 {
2485     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2486 }
2487
2488 /* XXX: optimize */
2489 void stb_phys(hwaddr addr, uint32_t val)
2490 {
2491     uint8_t v = val;
2492     cpu_physical_memory_write(addr, &v, 1);
2493 }
2494
2495 /* warning: addr must be aligned */
2496 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2497                                      enum device_endian endian)
2498 {
2499     uint8_t *ptr;
2500     MemoryRegionSection *section;
2501
2502     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2503
2504     if (!memory_region_is_ram(section->mr) || section->readonly) {
2505         addr = memory_region_section_addr(section, addr);
2506         if (memory_region_is_ram(section->mr)) {
2507             section = &phys_sections[phys_section_rom];
2508         }
2509 #if defined(TARGET_WORDS_BIGENDIAN)
2510         if (endian == DEVICE_LITTLE_ENDIAN) {
2511             val = bswap16(val);
2512         }
2513 #else
2514         if (endian == DEVICE_BIG_ENDIAN) {
2515             val = bswap16(val);
2516         }
2517 #endif
2518         io_mem_write(section->mr, addr, val, 2);
2519     } else {
2520         unsigned long addr1;
2521         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2522             + memory_region_section_addr(section, addr);
2523         /* RAM case */
2524         ptr = qemu_get_ram_ptr(addr1);
2525         switch (endian) {
2526         case DEVICE_LITTLE_ENDIAN:
2527             stw_le_p(ptr, val);
2528             break;
2529         case DEVICE_BIG_ENDIAN:
2530             stw_be_p(ptr, val);
2531             break;
2532         default:
2533             stw_p(ptr, val);
2534             break;
2535         }
2536         invalidate_and_set_dirty(addr1, 2);
2537     }
2538 }
2539
2540 void stw_phys(hwaddr addr, uint32_t val)
2541 {
2542     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2543 }
2544
2545 void stw_le_phys(hwaddr addr, uint32_t val)
2546 {
2547     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2548 }
2549
2550 void stw_be_phys(hwaddr addr, uint32_t val)
2551 {
2552     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2553 }
2554
2555 /* XXX: optimize */
2556 void stq_phys(hwaddr addr, uint64_t val)
2557 {
2558     val = tswap64(val);
2559     cpu_physical_memory_write(addr, &val, 8);
2560 }
2561
2562 void stq_le_phys(hwaddr addr, uint64_t val)
2563 {
2564     val = cpu_to_le64(val);
2565     cpu_physical_memory_write(addr, &val, 8);
2566 }
2567
2568 void stq_be_phys(hwaddr addr, uint64_t val)
2569 {
2570     val = cpu_to_be64(val);
2571     cpu_physical_memory_write(addr, &val, 8);
2572 }
2573
2574 /* virtual memory access for debug (includes writing to ROM) */
2575 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2576                         uint8_t *buf, int len, int is_write)
2577 {
2578     int l;
2579     hwaddr phys_addr;
2580     target_ulong page;
2581
2582     while (len > 0) {
2583         page = addr & TARGET_PAGE_MASK;
2584         phys_addr = cpu_get_phys_page_debug(env, page);
2585         /* if no physical page mapped, return an error */
2586         if (phys_addr == -1)
2587             return -1;
2588         l = (page + TARGET_PAGE_SIZE) - addr;
2589         if (l > len)
2590             l = len;
2591         phys_addr += (addr & ~TARGET_PAGE_MASK);
2592         if (is_write)
2593             cpu_physical_memory_write_rom(phys_addr, buf, l);
2594         else
2595             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2596         len -= l;
2597         buf += l;
2598         addr += l;
2599     }
2600     return 0;
2601 }
2602 #endif
2603
2604 #if !defined(CONFIG_USER_ONLY)
2605
2606 /*
2607  * A helper function for the _utterly broken_ virtio device model to find out if
2608  * it's running on a big endian machine. Don't do this at home kids!
2609  */
2610 bool virtio_is_big_endian(void);
2611 bool virtio_is_big_endian(void)
2612 {
2613 #if defined(TARGET_WORDS_BIGENDIAN)
2614     return true;
2615 #else
2616     return false;
2617 #endif
2618 }
2619
2620 #endif
2621
2622 #ifndef CONFIG_USER_ONLY
2623 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2624 {
2625     MemoryRegionSection *section;
2626
2627     section = phys_page_find(address_space_memory.dispatch,
2628                              phys_addr >> TARGET_PAGE_BITS);
2629
2630     return !(memory_region_is_ram(section->mr) ||
2631              memory_region_is_romd(section->mr));
2632 }
2633 #endif
This page took 0.167973 seconds and 4 git commands to generate.