]> Git Repo - qemu.git/blob - exec.c
nbd: Keep hostname and port separate
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUState *cpu = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     cpu->interrupt_request &= ~0x01;
231     tlb_flush(cpu->env_ptr, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUState),
244         VMSTATE_UINT32(interrupt_request, CPUState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #else
249 #define vmstate_cpu_common vmstate_dummy
250 #endif
251
252 CPUState *qemu_get_cpu(int index)
253 {
254     CPUArchState *env = first_cpu;
255     CPUState *cpu = NULL;
256
257     while (env) {
258         cpu = ENV_GET_CPU(env);
259         if (cpu->cpu_index == index) {
260             break;
261         }
262         env = env->next_cpu;
263     }
264
265     return env ? cpu : NULL;
266 }
267
268 void cpu_exec_init(CPUArchState *env)
269 {
270     CPUState *cpu = ENV_GET_CPU(env);
271     CPUClass *cc = CPU_GET_CLASS(cpu);
272     CPUArchState **penv;
273     int cpu_index;
274
275 #if defined(CONFIG_USER_ONLY)
276     cpu_list_lock();
277 #endif
278     env->next_cpu = NULL;
279     penv = &first_cpu;
280     cpu_index = 0;
281     while (*penv != NULL) {
282         penv = &(*penv)->next_cpu;
283         cpu_index++;
284     }
285     cpu->cpu_index = cpu_index;
286     cpu->numa_node = 0;
287     QTAILQ_INIT(&env->breakpoints);
288     QTAILQ_INIT(&env->watchpoints);
289 #ifndef CONFIG_USER_ONLY
290     cpu->thread_id = qemu_get_thread_id();
291 #endif
292     *penv = env;
293 #if defined(CONFIG_USER_ONLY)
294     cpu_list_unlock();
295 #endif
296     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
297 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
298     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
299                     cpu_save, cpu_load, env);
300     assert(cc->vmsd == NULL);
301 #endif
302     if (cc->vmsd != NULL) {
303         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
304     }
305 }
306
307 #if defined(TARGET_HAS_ICE)
308 #if defined(CONFIG_USER_ONLY)
309 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
310 {
311     tb_invalidate_phys_page_range(pc, pc + 1, 0);
312 }
313 #else
314 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
315 {
316     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
317             (pc & ~TARGET_PAGE_MASK));
318 }
319 #endif
320 #endif /* TARGET_HAS_ICE */
321
322 #if defined(CONFIG_USER_ONLY)
323 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
324
325 {
326 }
327
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     return -ENOSYS;
332 }
333 #else
334 /* Add a watchpoint.  */
335 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
336                           int flags, CPUWatchpoint **watchpoint)
337 {
338     target_ulong len_mask = ~(len - 1);
339     CPUWatchpoint *wp;
340
341     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
342     if ((len & (len - 1)) || (addr & ~len_mask) ||
343             len == 0 || len > TARGET_PAGE_SIZE) {
344         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
345                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
346         return -EINVAL;
347     }
348     wp = g_malloc(sizeof(*wp));
349
350     wp->vaddr = addr;
351     wp->len_mask = len_mask;
352     wp->flags = flags;
353
354     /* keep all GDB-injected watchpoints in front */
355     if (flags & BP_GDB)
356         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
357     else
358         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
359
360     tlb_flush_page(env, addr);
361
362     if (watchpoint)
363         *watchpoint = wp;
364     return 0;
365 }
366
367 /* Remove a specific watchpoint.  */
368 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
369                           int flags)
370 {
371     target_ulong len_mask = ~(len - 1);
372     CPUWatchpoint *wp;
373
374     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
375         if (addr == wp->vaddr && len_mask == wp->len_mask
376                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
377             cpu_watchpoint_remove_by_ref(env, wp);
378             return 0;
379         }
380     }
381     return -ENOENT;
382 }
383
384 /* Remove a specific watchpoint by reference.  */
385 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
386 {
387     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
388
389     tlb_flush_page(env, watchpoint->vaddr);
390
391     g_free(watchpoint);
392 }
393
394 /* Remove all matching watchpoints.  */
395 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
396 {
397     CPUWatchpoint *wp, *next;
398
399     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
400         if (wp->flags & mask)
401             cpu_watchpoint_remove_by_ref(env, wp);
402     }
403 }
404 #endif
405
406 /* Add a breakpoint.  */
407 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
408                           CPUBreakpoint **breakpoint)
409 {
410 #if defined(TARGET_HAS_ICE)
411     CPUBreakpoint *bp;
412
413     bp = g_malloc(sizeof(*bp));
414
415     bp->pc = pc;
416     bp->flags = flags;
417
418     /* keep all GDB-injected breakpoints in front */
419     if (flags & BP_GDB)
420         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
421     else
422         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
423
424     breakpoint_invalidate(env, pc);
425
426     if (breakpoint)
427         *breakpoint = bp;
428     return 0;
429 #else
430     return -ENOSYS;
431 #endif
432 }
433
434 /* Remove a specific breakpoint.  */
435 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
436 {
437 #if defined(TARGET_HAS_ICE)
438     CPUBreakpoint *bp;
439
440     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
441         if (bp->pc == pc && bp->flags == flags) {
442             cpu_breakpoint_remove_by_ref(env, bp);
443             return 0;
444         }
445     }
446     return -ENOENT;
447 #else
448     return -ENOSYS;
449 #endif
450 }
451
452 /* Remove a specific breakpoint by reference.  */
453 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
454 {
455 #if defined(TARGET_HAS_ICE)
456     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
457
458     breakpoint_invalidate(env, breakpoint->pc);
459
460     g_free(breakpoint);
461 #endif
462 }
463
464 /* Remove all matching breakpoints. */
465 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
466 {
467 #if defined(TARGET_HAS_ICE)
468     CPUBreakpoint *bp, *next;
469
470     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
471         if (bp->flags & mask)
472             cpu_breakpoint_remove_by_ref(env, bp);
473     }
474 #endif
475 }
476
477 /* enable or disable single step mode. EXCP_DEBUG is returned by the
478    CPU loop after each instruction */
479 void cpu_single_step(CPUArchState *env, int enabled)
480 {
481 #if defined(TARGET_HAS_ICE)
482     if (env->singlestep_enabled != enabled) {
483         env->singlestep_enabled = enabled;
484         if (kvm_enabled())
485             kvm_update_guest_debug(env, 0);
486         else {
487             /* must flush all the translated code to avoid inconsistencies */
488             /* XXX: only flush what is necessary */
489             tb_flush(env);
490         }
491     }
492 #endif
493 }
494
495 void cpu_exit(CPUArchState *env)
496 {
497     CPUState *cpu = ENV_GET_CPU(env);
498
499     cpu->exit_request = 1;
500     cpu->tcg_exit_req = 1;
501 }
502
503 void cpu_abort(CPUArchState *env, const char *fmt, ...)
504 {
505     va_list ap;
506     va_list ap2;
507
508     va_start(ap, fmt);
509     va_copy(ap2, ap);
510     fprintf(stderr, "qemu: fatal: ");
511     vfprintf(stderr, fmt, ap);
512     fprintf(stderr, "\n");
513     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
514     if (qemu_log_enabled()) {
515         qemu_log("qemu: fatal: ");
516         qemu_log_vprintf(fmt, ap2);
517         qemu_log("\n");
518         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
519         qemu_log_flush();
520         qemu_log_close();
521     }
522     va_end(ap2);
523     va_end(ap);
524 #if defined(CONFIG_USER_ONLY)
525     {
526         struct sigaction act;
527         sigfillset(&act.sa_mask);
528         act.sa_handler = SIG_DFL;
529         sigaction(SIGABRT, &act, NULL);
530     }
531 #endif
532     abort();
533 }
534
535 CPUArchState *cpu_copy(CPUArchState *env)
536 {
537     CPUArchState *new_env = cpu_init(env->cpu_model_str);
538     CPUArchState *next_cpu = new_env->next_cpu;
539 #if defined(TARGET_HAS_ICE)
540     CPUBreakpoint *bp;
541     CPUWatchpoint *wp;
542 #endif
543
544     memcpy(new_env, env, sizeof(CPUArchState));
545
546     /* Preserve chaining. */
547     new_env->next_cpu = next_cpu;
548
549     /* Clone all break/watchpoints.
550        Note: Once we support ptrace with hw-debug register access, make sure
551        BP_CPU break/watchpoints are handled correctly on clone. */
552     QTAILQ_INIT(&env->breakpoints);
553     QTAILQ_INIT(&env->watchpoints);
554 #if defined(TARGET_HAS_ICE)
555     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
556         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
557     }
558     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
559         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
560                               wp->flags, NULL);
561     }
562 #endif
563
564     return new_env;
565 }
566
567 #if !defined(CONFIG_USER_ONLY)
568 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
569                                       uintptr_t length)
570 {
571     uintptr_t start1;
572
573     /* we modify the TLB cache so that the dirty bit will be set again
574        when accessing the range */
575     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
576     /* Check that we don't span multiple blocks - this breaks the
577        address comparisons below.  */
578     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
579             != (end - 1) - start) {
580         abort();
581     }
582     cpu_tlb_reset_dirty_all(start1, length);
583
584 }
585
586 /* Note: start and end must be within the same ram block.  */
587 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
588                                      int dirty_flags)
589 {
590     uintptr_t length;
591
592     start &= TARGET_PAGE_MASK;
593     end = TARGET_PAGE_ALIGN(end);
594
595     length = end - start;
596     if (length == 0)
597         return;
598     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
599
600     if (tcg_enabled()) {
601         tlb_reset_dirty_range_all(start, end, length);
602     }
603 }
604
605 static int cpu_physical_memory_set_dirty_tracking(int enable)
606 {
607     int ret = 0;
608     in_migration = enable;
609     return ret;
610 }
611
612 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
613                                                    MemoryRegionSection *section,
614                                                    target_ulong vaddr,
615                                                    hwaddr paddr,
616                                                    int prot,
617                                                    target_ulong *address)
618 {
619     hwaddr iotlb;
620     CPUWatchpoint *wp;
621
622     if (memory_region_is_ram(section->mr)) {
623         /* Normal RAM.  */
624         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
625             + memory_region_section_addr(section, paddr);
626         if (!section->readonly) {
627             iotlb |= phys_section_notdirty;
628         } else {
629             iotlb |= phys_section_rom;
630         }
631     } else {
632         /* IO handlers are currently passed a physical address.
633            It would be nice to pass an offset from the base address
634            of that region.  This would avoid having to special case RAM,
635            and avoid full address decoding in every device.
636            We can't use the high bits of pd for this because
637            IO_MEM_ROMD uses these as a ram address.  */
638         iotlb = section - phys_sections;
639         iotlb += memory_region_section_addr(section, paddr);
640     }
641
642     /* Make accesses to pages with watchpoints go via the
643        watchpoint trap routines.  */
644     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
645         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
646             /* Avoid trapping reads of pages with a write breakpoint. */
647             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
648                 iotlb = phys_section_watch + paddr;
649                 *address |= TLB_MMIO;
650                 break;
651             }
652         }
653     }
654
655     return iotlb;
656 }
657 #endif /* defined(CONFIG_USER_ONLY) */
658
659 #if !defined(CONFIG_USER_ONLY)
660
661 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
662 typedef struct subpage_t {
663     MemoryRegion iomem;
664     hwaddr base;
665     uint16_t sub_section[TARGET_PAGE_SIZE];
666 } subpage_t;
667
668 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
669                              uint16_t section);
670 static subpage_t *subpage_init(hwaddr base);
671 static void destroy_page_desc(uint16_t section_index)
672 {
673     MemoryRegionSection *section = &phys_sections[section_index];
674     MemoryRegion *mr = section->mr;
675
676     if (mr->subpage) {
677         subpage_t *subpage = container_of(mr, subpage_t, iomem);
678         memory_region_destroy(&subpage->iomem);
679         g_free(subpage);
680     }
681 }
682
683 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
684 {
685     unsigned i;
686     PhysPageEntry *p;
687
688     if (lp->ptr == PHYS_MAP_NODE_NIL) {
689         return;
690     }
691
692     p = phys_map_nodes[lp->ptr];
693     for (i = 0; i < L2_SIZE; ++i) {
694         if (!p[i].is_leaf) {
695             destroy_l2_mapping(&p[i], level - 1);
696         } else {
697             destroy_page_desc(p[i].ptr);
698         }
699     }
700     lp->is_leaf = 0;
701     lp->ptr = PHYS_MAP_NODE_NIL;
702 }
703
704 static void destroy_all_mappings(AddressSpaceDispatch *d)
705 {
706     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
707     phys_map_nodes_reset();
708 }
709
710 static uint16_t phys_section_add(MemoryRegionSection *section)
711 {
712     if (phys_sections_nb == phys_sections_nb_alloc) {
713         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
714         phys_sections = g_renew(MemoryRegionSection, phys_sections,
715                                 phys_sections_nb_alloc);
716     }
717     phys_sections[phys_sections_nb] = *section;
718     return phys_sections_nb++;
719 }
720
721 static void phys_sections_clear(void)
722 {
723     phys_sections_nb = 0;
724 }
725
726 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
727 {
728     subpage_t *subpage;
729     hwaddr base = section->offset_within_address_space
730         & TARGET_PAGE_MASK;
731     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
732     MemoryRegionSection subsection = {
733         .offset_within_address_space = base,
734         .size = TARGET_PAGE_SIZE,
735     };
736     hwaddr start, end;
737
738     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
739
740     if (!(existing->mr->subpage)) {
741         subpage = subpage_init(base);
742         subsection.mr = &subpage->iomem;
743         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
744                       phys_section_add(&subsection));
745     } else {
746         subpage = container_of(existing->mr, subpage_t, iomem);
747     }
748     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
749     end = start + section->size - 1;
750     subpage_register(subpage, start, end, phys_section_add(section));
751 }
752
753
754 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
755 {
756     hwaddr start_addr = section->offset_within_address_space;
757     ram_addr_t size = section->size;
758     hwaddr addr;
759     uint16_t section_index = phys_section_add(section);
760
761     assert(size);
762
763     addr = start_addr;
764     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
765                   section_index);
766 }
767
768 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
769 {
770     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
771     MemoryRegionSection now = *section, remain = *section;
772
773     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
774         || (now.size < TARGET_PAGE_SIZE)) {
775         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
776                        - now.offset_within_address_space,
777                        now.size);
778         register_subpage(d, &now);
779         remain.size -= now.size;
780         remain.offset_within_address_space += now.size;
781         remain.offset_within_region += now.size;
782     }
783     while (remain.size >= TARGET_PAGE_SIZE) {
784         now = remain;
785         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
786             now.size = TARGET_PAGE_SIZE;
787             register_subpage(d, &now);
788         } else {
789             now.size &= TARGET_PAGE_MASK;
790             register_multipage(d, &now);
791         }
792         remain.size -= now.size;
793         remain.offset_within_address_space += now.size;
794         remain.offset_within_region += now.size;
795     }
796     now = remain;
797     if (now.size) {
798         register_subpage(d, &now);
799     }
800 }
801
802 void qemu_flush_coalesced_mmio_buffer(void)
803 {
804     if (kvm_enabled())
805         kvm_flush_coalesced_mmio_buffer();
806 }
807
808 void qemu_mutex_lock_ramlist(void)
809 {
810     qemu_mutex_lock(&ram_list.mutex);
811 }
812
813 void qemu_mutex_unlock_ramlist(void)
814 {
815     qemu_mutex_unlock(&ram_list.mutex);
816 }
817
818 #if defined(__linux__) && !defined(TARGET_S390X)
819
820 #include <sys/vfs.h>
821
822 #define HUGETLBFS_MAGIC       0x958458f6
823
824 static long gethugepagesize(const char *path)
825 {
826     struct statfs fs;
827     int ret;
828
829     do {
830         ret = statfs(path, &fs);
831     } while (ret != 0 && errno == EINTR);
832
833     if (ret != 0) {
834         perror(path);
835         return 0;
836     }
837
838     if (fs.f_type != HUGETLBFS_MAGIC)
839         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
840
841     return fs.f_bsize;
842 }
843
844 static void *file_ram_alloc(RAMBlock *block,
845                             ram_addr_t memory,
846                             const char *path)
847 {
848     char *filename;
849     char *sanitized_name;
850     char *c;
851     void *area;
852     int fd;
853 #ifdef MAP_POPULATE
854     int flags;
855 #endif
856     unsigned long hpagesize;
857
858     hpagesize = gethugepagesize(path);
859     if (!hpagesize) {
860         return NULL;
861     }
862
863     if (memory < hpagesize) {
864         return NULL;
865     }
866
867     if (kvm_enabled() && !kvm_has_sync_mmu()) {
868         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
869         return NULL;
870     }
871
872     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
873     sanitized_name = g_strdup(block->mr->name);
874     for (c = sanitized_name; *c != '\0'; c++) {
875         if (*c == '/')
876             *c = '_';
877     }
878
879     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
880                                sanitized_name);
881     g_free(sanitized_name);
882
883     fd = mkstemp(filename);
884     if (fd < 0) {
885         perror("unable to create backing store for hugepages");
886         g_free(filename);
887         return NULL;
888     }
889     unlink(filename);
890     g_free(filename);
891
892     memory = (memory+hpagesize-1) & ~(hpagesize-1);
893
894     /*
895      * ftruncate is not supported by hugetlbfs in older
896      * hosts, so don't bother bailing out on errors.
897      * If anything goes wrong with it under other filesystems,
898      * mmap will fail.
899      */
900     if (ftruncate(fd, memory))
901         perror("ftruncate");
902
903 #ifdef MAP_POPULATE
904     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
905      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
906      * to sidestep this quirk.
907      */
908     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
909     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
910 #else
911     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
912 #endif
913     if (area == MAP_FAILED) {
914         perror("file_ram_alloc: can't mmap RAM pages");
915         close(fd);
916         return (NULL);
917     }
918     block->fd = fd;
919     return area;
920 }
921 #endif
922
923 static ram_addr_t find_ram_offset(ram_addr_t size)
924 {
925     RAMBlock *block, *next_block;
926     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
927
928     if (QTAILQ_EMPTY(&ram_list.blocks))
929         return 0;
930
931     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
932         ram_addr_t end, next = RAM_ADDR_MAX;
933
934         end = block->offset + block->length;
935
936         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
937             if (next_block->offset >= end) {
938                 next = MIN(next, next_block->offset);
939             }
940         }
941         if (next - end >= size && next - end < mingap) {
942             offset = end;
943             mingap = next - end;
944         }
945     }
946
947     if (offset == RAM_ADDR_MAX) {
948         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
949                 (uint64_t)size);
950         abort();
951     }
952
953     return offset;
954 }
955
956 ram_addr_t last_ram_offset(void)
957 {
958     RAMBlock *block;
959     ram_addr_t last = 0;
960
961     QTAILQ_FOREACH(block, &ram_list.blocks, next)
962         last = MAX(last, block->offset + block->length);
963
964     return last;
965 }
966
967 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
968 {
969     int ret;
970     QemuOpts *machine_opts;
971
972     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
973     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
974     if (machine_opts &&
975         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
976         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
977         if (ret) {
978             perror("qemu_madvise");
979             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
980                             "but dump_guest_core=off specified\n");
981         }
982     }
983 }
984
985 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
986 {
987     RAMBlock *new_block, *block;
988
989     new_block = NULL;
990     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
991         if (block->offset == addr) {
992             new_block = block;
993             break;
994         }
995     }
996     assert(new_block);
997     assert(!new_block->idstr[0]);
998
999     if (dev) {
1000         char *id = qdev_get_dev_path(dev);
1001         if (id) {
1002             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1003             g_free(id);
1004         }
1005     }
1006     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1007
1008     /* This assumes the iothread lock is taken here too.  */
1009     qemu_mutex_lock_ramlist();
1010     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1011         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1012             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1013                     new_block->idstr);
1014             abort();
1015         }
1016     }
1017     qemu_mutex_unlock_ramlist();
1018 }
1019
1020 static int memory_try_enable_merging(void *addr, size_t len)
1021 {
1022     QemuOpts *opts;
1023
1024     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1025     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1026         /* disabled by the user */
1027         return 0;
1028     }
1029
1030     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1031 }
1032
1033 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1034                                    MemoryRegion *mr)
1035 {
1036     RAMBlock *block, *new_block;
1037
1038     size = TARGET_PAGE_ALIGN(size);
1039     new_block = g_malloc0(sizeof(*new_block));
1040
1041     /* This assumes the iothread lock is taken here too.  */
1042     qemu_mutex_lock_ramlist();
1043     new_block->mr = mr;
1044     new_block->offset = find_ram_offset(size);
1045     if (host) {
1046         new_block->host = host;
1047         new_block->flags |= RAM_PREALLOC_MASK;
1048     } else {
1049         if (mem_path) {
1050 #if defined (__linux__) && !defined(TARGET_S390X)
1051             new_block->host = file_ram_alloc(new_block, size, mem_path);
1052             if (!new_block->host) {
1053                 new_block->host = qemu_vmalloc(size);
1054                 memory_try_enable_merging(new_block->host, size);
1055             }
1056 #else
1057             fprintf(stderr, "-mem-path option unsupported\n");
1058             exit(1);
1059 #endif
1060         } else {
1061             if (xen_enabled()) {
1062                 xen_ram_alloc(new_block->offset, size, mr);
1063             } else if (kvm_enabled()) {
1064                 /* some s390/kvm configurations have special constraints */
1065                 new_block->host = kvm_vmalloc(size);
1066             } else {
1067                 new_block->host = qemu_vmalloc(size);
1068             }
1069             memory_try_enable_merging(new_block->host, size);
1070         }
1071     }
1072     new_block->length = size;
1073
1074     /* Keep the list sorted from biggest to smallest block.  */
1075     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1076         if (block->length < new_block->length) {
1077             break;
1078         }
1079     }
1080     if (block) {
1081         QTAILQ_INSERT_BEFORE(block, new_block, next);
1082     } else {
1083         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1084     }
1085     ram_list.mru_block = NULL;
1086
1087     ram_list.version++;
1088     qemu_mutex_unlock_ramlist();
1089
1090     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1091                                        last_ram_offset() >> TARGET_PAGE_BITS);
1092     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1093            0, size >> TARGET_PAGE_BITS);
1094     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1095
1096     qemu_ram_setup_dump(new_block->host, size);
1097     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1098
1099     if (kvm_enabled())
1100         kvm_setup_guest_memory(new_block->host, size);
1101
1102     return new_block->offset;
1103 }
1104
1105 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1106 {
1107     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1108 }
1109
1110 void qemu_ram_free_from_ptr(ram_addr_t addr)
1111 {
1112     RAMBlock *block;
1113
1114     /* This assumes the iothread lock is taken here too.  */
1115     qemu_mutex_lock_ramlist();
1116     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1117         if (addr == block->offset) {
1118             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1119             ram_list.mru_block = NULL;
1120             ram_list.version++;
1121             g_free(block);
1122             break;
1123         }
1124     }
1125     qemu_mutex_unlock_ramlist();
1126 }
1127
1128 void qemu_ram_free(ram_addr_t addr)
1129 {
1130     RAMBlock *block;
1131
1132     /* This assumes the iothread lock is taken here too.  */
1133     qemu_mutex_lock_ramlist();
1134     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1135         if (addr == block->offset) {
1136             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1137             ram_list.mru_block = NULL;
1138             ram_list.version++;
1139             if (block->flags & RAM_PREALLOC_MASK) {
1140                 ;
1141             } else if (mem_path) {
1142 #if defined (__linux__) && !defined(TARGET_S390X)
1143                 if (block->fd) {
1144                     munmap(block->host, block->length);
1145                     close(block->fd);
1146                 } else {
1147                     qemu_vfree(block->host);
1148                 }
1149 #else
1150                 abort();
1151 #endif
1152             } else {
1153 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1154                 munmap(block->host, block->length);
1155 #else
1156                 if (xen_enabled()) {
1157                     xen_invalidate_map_cache_entry(block->host);
1158                 } else {
1159                     qemu_vfree(block->host);
1160                 }
1161 #endif
1162             }
1163             g_free(block);
1164             break;
1165         }
1166     }
1167     qemu_mutex_unlock_ramlist();
1168
1169 }
1170
1171 #ifndef _WIN32
1172 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1173 {
1174     RAMBlock *block;
1175     ram_addr_t offset;
1176     int flags;
1177     void *area, *vaddr;
1178
1179     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1180         offset = addr - block->offset;
1181         if (offset < block->length) {
1182             vaddr = block->host + offset;
1183             if (block->flags & RAM_PREALLOC_MASK) {
1184                 ;
1185             } else {
1186                 flags = MAP_FIXED;
1187                 munmap(vaddr, length);
1188                 if (mem_path) {
1189 #if defined(__linux__) && !defined(TARGET_S390X)
1190                     if (block->fd) {
1191 #ifdef MAP_POPULATE
1192                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1193                             MAP_PRIVATE;
1194 #else
1195                         flags |= MAP_PRIVATE;
1196 #endif
1197                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1198                                     flags, block->fd, offset);
1199                     } else {
1200                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1201                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1202                                     flags, -1, 0);
1203                     }
1204 #else
1205                     abort();
1206 #endif
1207                 } else {
1208 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1209                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1210                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1211                                 flags, -1, 0);
1212 #else
1213                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1214                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1215                                 flags, -1, 0);
1216 #endif
1217                 }
1218                 if (area != vaddr) {
1219                     fprintf(stderr, "Could not remap addr: "
1220                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1221                             length, addr);
1222                     exit(1);
1223                 }
1224                 memory_try_enable_merging(vaddr, length);
1225                 qemu_ram_setup_dump(vaddr, length);
1226             }
1227             return;
1228         }
1229     }
1230 }
1231 #endif /* !_WIN32 */
1232
1233 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1234    With the exception of the softmmu code in this file, this should
1235    only be used for local memory (e.g. video ram) that the device owns,
1236    and knows it isn't going to access beyond the end of the block.
1237
1238    It should not be used for general purpose DMA.
1239    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1240  */
1241 void *qemu_get_ram_ptr(ram_addr_t addr)
1242 {
1243     RAMBlock *block;
1244
1245     /* The list is protected by the iothread lock here.  */
1246     block = ram_list.mru_block;
1247     if (block && addr - block->offset < block->length) {
1248         goto found;
1249     }
1250     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1251         if (addr - block->offset < block->length) {
1252             goto found;
1253         }
1254     }
1255
1256     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1257     abort();
1258
1259 found:
1260     ram_list.mru_block = block;
1261     if (xen_enabled()) {
1262         /* We need to check if the requested address is in the RAM
1263          * because we don't want to map the entire memory in QEMU.
1264          * In that case just map until the end of the page.
1265          */
1266         if (block->offset == 0) {
1267             return xen_map_cache(addr, 0, 0);
1268         } else if (block->host == NULL) {
1269             block->host =
1270                 xen_map_cache(block->offset, block->length, 1);
1271         }
1272     }
1273     return block->host + (addr - block->offset);
1274 }
1275
1276 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1277  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1278  *
1279  * ??? Is this still necessary?
1280  */
1281 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1282 {
1283     RAMBlock *block;
1284
1285     /* The list is protected by the iothread lock here.  */
1286     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1287         if (addr - block->offset < block->length) {
1288             if (xen_enabled()) {
1289                 /* We need to check if the requested address is in the RAM
1290                  * because we don't want to map the entire memory in QEMU.
1291                  * In that case just map until the end of the page.
1292                  */
1293                 if (block->offset == 0) {
1294                     return xen_map_cache(addr, 0, 0);
1295                 } else if (block->host == NULL) {
1296                     block->host =
1297                         xen_map_cache(block->offset, block->length, 1);
1298                 }
1299             }
1300             return block->host + (addr - block->offset);
1301         }
1302     }
1303
1304     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1305     abort();
1306
1307     return NULL;
1308 }
1309
1310 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1311  * but takes a size argument */
1312 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1313 {
1314     if (*size == 0) {
1315         return NULL;
1316     }
1317     if (xen_enabled()) {
1318         return xen_map_cache(addr, *size, 1);
1319     } else {
1320         RAMBlock *block;
1321
1322         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1323             if (addr - block->offset < block->length) {
1324                 if (addr - block->offset + *size > block->length)
1325                     *size = block->length - addr + block->offset;
1326                 return block->host + (addr - block->offset);
1327             }
1328         }
1329
1330         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1331         abort();
1332     }
1333 }
1334
1335 void qemu_put_ram_ptr(void *addr)
1336 {
1337     trace_qemu_put_ram_ptr(addr);
1338 }
1339
1340 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1341 {
1342     RAMBlock *block;
1343     uint8_t *host = ptr;
1344
1345     if (xen_enabled()) {
1346         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1347         return 0;
1348     }
1349
1350     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1351         /* This case append when the block is not mapped. */
1352         if (block->host == NULL) {
1353             continue;
1354         }
1355         if (host - block->host < block->length) {
1356             *ram_addr = block->offset + (host - block->host);
1357             return 0;
1358         }
1359     }
1360
1361     return -1;
1362 }
1363
1364 /* Some of the softmmu routines need to translate from a host pointer
1365    (typically a TLB entry) back to a ram offset.  */
1366 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1367 {
1368     ram_addr_t ram_addr;
1369
1370     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1371         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1372         abort();
1373     }
1374     return ram_addr;
1375 }
1376
1377 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1378                                     unsigned size)
1379 {
1380 #ifdef DEBUG_UNASSIGNED
1381     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1382 #endif
1383 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1384     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1385 #endif
1386     return 0;
1387 }
1388
1389 static void unassigned_mem_write(void *opaque, hwaddr addr,
1390                                  uint64_t val, unsigned size)
1391 {
1392 #ifdef DEBUG_UNASSIGNED
1393     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1394 #endif
1395 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1396     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1397 #endif
1398 }
1399
1400 static const MemoryRegionOps unassigned_mem_ops = {
1401     .read = unassigned_mem_read,
1402     .write = unassigned_mem_write,
1403     .endianness = DEVICE_NATIVE_ENDIAN,
1404 };
1405
1406 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1407                                unsigned size)
1408 {
1409     abort();
1410 }
1411
1412 static void error_mem_write(void *opaque, hwaddr addr,
1413                             uint64_t value, unsigned size)
1414 {
1415     abort();
1416 }
1417
1418 static const MemoryRegionOps error_mem_ops = {
1419     .read = error_mem_read,
1420     .write = error_mem_write,
1421     .endianness = DEVICE_NATIVE_ENDIAN,
1422 };
1423
1424 static const MemoryRegionOps rom_mem_ops = {
1425     .read = error_mem_read,
1426     .write = unassigned_mem_write,
1427     .endianness = DEVICE_NATIVE_ENDIAN,
1428 };
1429
1430 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1431                                uint64_t val, unsigned size)
1432 {
1433     int dirty_flags;
1434     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1435     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1436 #if !defined(CONFIG_USER_ONLY)
1437         tb_invalidate_phys_page_fast(ram_addr, size);
1438         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1439 #endif
1440     }
1441     switch (size) {
1442     case 1:
1443         stb_p(qemu_get_ram_ptr(ram_addr), val);
1444         break;
1445     case 2:
1446         stw_p(qemu_get_ram_ptr(ram_addr), val);
1447         break;
1448     case 4:
1449         stl_p(qemu_get_ram_ptr(ram_addr), val);
1450         break;
1451     default:
1452         abort();
1453     }
1454     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1455     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1456     /* we remove the notdirty callback only if the code has been
1457        flushed */
1458     if (dirty_flags == 0xff)
1459         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1460 }
1461
1462 static const MemoryRegionOps notdirty_mem_ops = {
1463     .read = error_mem_read,
1464     .write = notdirty_mem_write,
1465     .endianness = DEVICE_NATIVE_ENDIAN,
1466 };
1467
1468 /* Generate a debug exception if a watchpoint has been hit.  */
1469 static void check_watchpoint(int offset, int len_mask, int flags)
1470 {
1471     CPUArchState *env = cpu_single_env;
1472     target_ulong pc, cs_base;
1473     target_ulong vaddr;
1474     CPUWatchpoint *wp;
1475     int cpu_flags;
1476
1477     if (env->watchpoint_hit) {
1478         /* We re-entered the check after replacing the TB. Now raise
1479          * the debug interrupt so that is will trigger after the
1480          * current instruction. */
1481         cpu_interrupt(ENV_GET_CPU(env), CPU_INTERRUPT_DEBUG);
1482         return;
1483     }
1484     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1485     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1486         if ((vaddr == (wp->vaddr & len_mask) ||
1487              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1488             wp->flags |= BP_WATCHPOINT_HIT;
1489             if (!env->watchpoint_hit) {
1490                 env->watchpoint_hit = wp;
1491                 tb_check_watchpoint(env);
1492                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1493                     env->exception_index = EXCP_DEBUG;
1494                     cpu_loop_exit(env);
1495                 } else {
1496                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1497                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1498                     cpu_resume_from_signal(env, NULL);
1499                 }
1500             }
1501         } else {
1502             wp->flags &= ~BP_WATCHPOINT_HIT;
1503         }
1504     }
1505 }
1506
1507 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1508    so these check for a hit then pass through to the normal out-of-line
1509    phys routines.  */
1510 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1511                                unsigned size)
1512 {
1513     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1514     switch (size) {
1515     case 1: return ldub_phys(addr);
1516     case 2: return lduw_phys(addr);
1517     case 4: return ldl_phys(addr);
1518     default: abort();
1519     }
1520 }
1521
1522 static void watch_mem_write(void *opaque, hwaddr addr,
1523                             uint64_t val, unsigned size)
1524 {
1525     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1526     switch (size) {
1527     case 1:
1528         stb_phys(addr, val);
1529         break;
1530     case 2:
1531         stw_phys(addr, val);
1532         break;
1533     case 4:
1534         stl_phys(addr, val);
1535         break;
1536     default: abort();
1537     }
1538 }
1539
1540 static const MemoryRegionOps watch_mem_ops = {
1541     .read = watch_mem_read,
1542     .write = watch_mem_write,
1543     .endianness = DEVICE_NATIVE_ENDIAN,
1544 };
1545
1546 static uint64_t subpage_read(void *opaque, hwaddr addr,
1547                              unsigned len)
1548 {
1549     subpage_t *mmio = opaque;
1550     unsigned int idx = SUBPAGE_IDX(addr);
1551     MemoryRegionSection *section;
1552 #if defined(DEBUG_SUBPAGE)
1553     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1554            mmio, len, addr, idx);
1555 #endif
1556
1557     section = &phys_sections[mmio->sub_section[idx]];
1558     addr += mmio->base;
1559     addr -= section->offset_within_address_space;
1560     addr += section->offset_within_region;
1561     return io_mem_read(section->mr, addr, len);
1562 }
1563
1564 static void subpage_write(void *opaque, hwaddr addr,
1565                           uint64_t value, unsigned len)
1566 {
1567     subpage_t *mmio = opaque;
1568     unsigned int idx = SUBPAGE_IDX(addr);
1569     MemoryRegionSection *section;
1570 #if defined(DEBUG_SUBPAGE)
1571     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1572            " idx %d value %"PRIx64"\n",
1573            __func__, mmio, len, addr, idx, value);
1574 #endif
1575
1576     section = &phys_sections[mmio->sub_section[idx]];
1577     addr += mmio->base;
1578     addr -= section->offset_within_address_space;
1579     addr += section->offset_within_region;
1580     io_mem_write(section->mr, addr, value, len);
1581 }
1582
1583 static const MemoryRegionOps subpage_ops = {
1584     .read = subpage_read,
1585     .write = subpage_write,
1586     .endianness = DEVICE_NATIVE_ENDIAN,
1587 };
1588
1589 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1590                                  unsigned size)
1591 {
1592     ram_addr_t raddr = addr;
1593     void *ptr = qemu_get_ram_ptr(raddr);
1594     switch (size) {
1595     case 1: return ldub_p(ptr);
1596     case 2: return lduw_p(ptr);
1597     case 4: return ldl_p(ptr);
1598     default: abort();
1599     }
1600 }
1601
1602 static void subpage_ram_write(void *opaque, hwaddr addr,
1603                               uint64_t value, unsigned size)
1604 {
1605     ram_addr_t raddr = addr;
1606     void *ptr = qemu_get_ram_ptr(raddr);
1607     switch (size) {
1608     case 1: return stb_p(ptr, value);
1609     case 2: return stw_p(ptr, value);
1610     case 4: return stl_p(ptr, value);
1611     default: abort();
1612     }
1613 }
1614
1615 static const MemoryRegionOps subpage_ram_ops = {
1616     .read = subpage_ram_read,
1617     .write = subpage_ram_write,
1618     .endianness = DEVICE_NATIVE_ENDIAN,
1619 };
1620
1621 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1622                              uint16_t section)
1623 {
1624     int idx, eidx;
1625
1626     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1627         return -1;
1628     idx = SUBPAGE_IDX(start);
1629     eidx = SUBPAGE_IDX(end);
1630 #if defined(DEBUG_SUBPAGE)
1631     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1632            mmio, start, end, idx, eidx, memory);
1633 #endif
1634     if (memory_region_is_ram(phys_sections[section].mr)) {
1635         MemoryRegionSection new_section = phys_sections[section];
1636         new_section.mr = &io_mem_subpage_ram;
1637         section = phys_section_add(&new_section);
1638     }
1639     for (; idx <= eidx; idx++) {
1640         mmio->sub_section[idx] = section;
1641     }
1642
1643     return 0;
1644 }
1645
1646 static subpage_t *subpage_init(hwaddr base)
1647 {
1648     subpage_t *mmio;
1649
1650     mmio = g_malloc0(sizeof(subpage_t));
1651
1652     mmio->base = base;
1653     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1654                           "subpage", TARGET_PAGE_SIZE);
1655     mmio->iomem.subpage = true;
1656 #if defined(DEBUG_SUBPAGE)
1657     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1658            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1659 #endif
1660     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1661
1662     return mmio;
1663 }
1664
1665 static uint16_t dummy_section(MemoryRegion *mr)
1666 {
1667     MemoryRegionSection section = {
1668         .mr = mr,
1669         .offset_within_address_space = 0,
1670         .offset_within_region = 0,
1671         .size = UINT64_MAX,
1672     };
1673
1674     return phys_section_add(&section);
1675 }
1676
1677 MemoryRegion *iotlb_to_region(hwaddr index)
1678 {
1679     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1680 }
1681
1682 static void io_mem_init(void)
1683 {
1684     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1685     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1686     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1687                           "unassigned", UINT64_MAX);
1688     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1689                           "notdirty", UINT64_MAX);
1690     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1691                           "subpage-ram", UINT64_MAX);
1692     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1693                           "watch", UINT64_MAX);
1694 }
1695
1696 static void mem_begin(MemoryListener *listener)
1697 {
1698     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1699
1700     destroy_all_mappings(d);
1701     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1702 }
1703
1704 static void core_begin(MemoryListener *listener)
1705 {
1706     phys_sections_clear();
1707     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1708     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1709     phys_section_rom = dummy_section(&io_mem_rom);
1710     phys_section_watch = dummy_section(&io_mem_watch);
1711 }
1712
1713 static void tcg_commit(MemoryListener *listener)
1714 {
1715     CPUArchState *env;
1716
1717     /* since each CPU stores ram addresses in its TLB cache, we must
1718        reset the modified entries */
1719     /* XXX: slow ! */
1720     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1721         tlb_flush(env, 1);
1722     }
1723 }
1724
1725 static void core_log_global_start(MemoryListener *listener)
1726 {
1727     cpu_physical_memory_set_dirty_tracking(1);
1728 }
1729
1730 static void core_log_global_stop(MemoryListener *listener)
1731 {
1732     cpu_physical_memory_set_dirty_tracking(0);
1733 }
1734
1735 static void io_region_add(MemoryListener *listener,
1736                           MemoryRegionSection *section)
1737 {
1738     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1739
1740     mrio->mr = section->mr;
1741     mrio->offset = section->offset_within_region;
1742     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1743                  section->offset_within_address_space, section->size);
1744     ioport_register(&mrio->iorange);
1745 }
1746
1747 static void io_region_del(MemoryListener *listener,
1748                           MemoryRegionSection *section)
1749 {
1750     isa_unassign_ioport(section->offset_within_address_space, section->size);
1751 }
1752
1753 static MemoryListener core_memory_listener = {
1754     .begin = core_begin,
1755     .log_global_start = core_log_global_start,
1756     .log_global_stop = core_log_global_stop,
1757     .priority = 1,
1758 };
1759
1760 static MemoryListener io_memory_listener = {
1761     .region_add = io_region_add,
1762     .region_del = io_region_del,
1763     .priority = 0,
1764 };
1765
1766 static MemoryListener tcg_memory_listener = {
1767     .commit = tcg_commit,
1768 };
1769
1770 void address_space_init_dispatch(AddressSpace *as)
1771 {
1772     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1773
1774     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1775     d->listener = (MemoryListener) {
1776         .begin = mem_begin,
1777         .region_add = mem_add,
1778         .region_nop = mem_add,
1779         .priority = 0,
1780     };
1781     as->dispatch = d;
1782     memory_listener_register(&d->listener, as);
1783 }
1784
1785 void address_space_destroy_dispatch(AddressSpace *as)
1786 {
1787     AddressSpaceDispatch *d = as->dispatch;
1788
1789     memory_listener_unregister(&d->listener);
1790     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1791     g_free(d);
1792     as->dispatch = NULL;
1793 }
1794
1795 static void memory_map_init(void)
1796 {
1797     system_memory = g_malloc(sizeof(*system_memory));
1798     memory_region_init(system_memory, "system", INT64_MAX);
1799     address_space_init(&address_space_memory, system_memory);
1800     address_space_memory.name = "memory";
1801
1802     system_io = g_malloc(sizeof(*system_io));
1803     memory_region_init(system_io, "io", 65536);
1804     address_space_init(&address_space_io, system_io);
1805     address_space_io.name = "I/O";
1806
1807     memory_listener_register(&core_memory_listener, &address_space_memory);
1808     memory_listener_register(&io_memory_listener, &address_space_io);
1809     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1810
1811     dma_context_init(&dma_context_memory, &address_space_memory,
1812                      NULL, NULL, NULL);
1813 }
1814
1815 MemoryRegion *get_system_memory(void)
1816 {
1817     return system_memory;
1818 }
1819
1820 MemoryRegion *get_system_io(void)
1821 {
1822     return system_io;
1823 }
1824
1825 #endif /* !defined(CONFIG_USER_ONLY) */
1826
1827 /* physical memory access (slow version, mainly for debug) */
1828 #if defined(CONFIG_USER_ONLY)
1829 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1830                         uint8_t *buf, int len, int is_write)
1831 {
1832     int l, flags;
1833     target_ulong page;
1834     void * p;
1835
1836     while (len > 0) {
1837         page = addr & TARGET_PAGE_MASK;
1838         l = (page + TARGET_PAGE_SIZE) - addr;
1839         if (l > len)
1840             l = len;
1841         flags = page_get_flags(page);
1842         if (!(flags & PAGE_VALID))
1843             return -1;
1844         if (is_write) {
1845             if (!(flags & PAGE_WRITE))
1846                 return -1;
1847             /* XXX: this code should not depend on lock_user */
1848             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1849                 return -1;
1850             memcpy(p, buf, l);
1851             unlock_user(p, addr, l);
1852         } else {
1853             if (!(flags & PAGE_READ))
1854                 return -1;
1855             /* XXX: this code should not depend on lock_user */
1856             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1857                 return -1;
1858             memcpy(buf, p, l);
1859             unlock_user(p, addr, 0);
1860         }
1861         len -= l;
1862         buf += l;
1863         addr += l;
1864     }
1865     return 0;
1866 }
1867
1868 #else
1869
1870 static void invalidate_and_set_dirty(hwaddr addr,
1871                                      hwaddr length)
1872 {
1873     if (!cpu_physical_memory_is_dirty(addr)) {
1874         /* invalidate code */
1875         tb_invalidate_phys_page_range(addr, addr + length, 0);
1876         /* set dirty bit */
1877         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1878     }
1879     xen_modified_memory(addr, length);
1880 }
1881
1882 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1883                       int len, bool is_write)
1884 {
1885     AddressSpaceDispatch *d = as->dispatch;
1886     int l;
1887     uint8_t *ptr;
1888     uint32_t val;
1889     hwaddr page;
1890     MemoryRegionSection *section;
1891
1892     while (len > 0) {
1893         page = addr & TARGET_PAGE_MASK;
1894         l = (page + TARGET_PAGE_SIZE) - addr;
1895         if (l > len)
1896             l = len;
1897         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1898
1899         if (is_write) {
1900             if (!memory_region_is_ram(section->mr)) {
1901                 hwaddr addr1;
1902                 addr1 = memory_region_section_addr(section, addr);
1903                 /* XXX: could force cpu_single_env to NULL to avoid
1904                    potential bugs */
1905                 if (l >= 4 && ((addr1 & 3) == 0)) {
1906                     /* 32 bit write access */
1907                     val = ldl_p(buf);
1908                     io_mem_write(section->mr, addr1, val, 4);
1909                     l = 4;
1910                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1911                     /* 16 bit write access */
1912                     val = lduw_p(buf);
1913                     io_mem_write(section->mr, addr1, val, 2);
1914                     l = 2;
1915                 } else {
1916                     /* 8 bit write access */
1917                     val = ldub_p(buf);
1918                     io_mem_write(section->mr, addr1, val, 1);
1919                     l = 1;
1920                 }
1921             } else if (!section->readonly) {
1922                 ram_addr_t addr1;
1923                 addr1 = memory_region_get_ram_addr(section->mr)
1924                     + memory_region_section_addr(section, addr);
1925                 /* RAM case */
1926                 ptr = qemu_get_ram_ptr(addr1);
1927                 memcpy(ptr, buf, l);
1928                 invalidate_and_set_dirty(addr1, l);
1929                 qemu_put_ram_ptr(ptr);
1930             }
1931         } else {
1932             if (!(memory_region_is_ram(section->mr) ||
1933                   memory_region_is_romd(section->mr))) {
1934                 hwaddr addr1;
1935                 /* I/O case */
1936                 addr1 = memory_region_section_addr(section, addr);
1937                 if (l >= 4 && ((addr1 & 3) == 0)) {
1938                     /* 32 bit read access */
1939                     val = io_mem_read(section->mr, addr1, 4);
1940                     stl_p(buf, val);
1941                     l = 4;
1942                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1943                     /* 16 bit read access */
1944                     val = io_mem_read(section->mr, addr1, 2);
1945                     stw_p(buf, val);
1946                     l = 2;
1947                 } else {
1948                     /* 8 bit read access */
1949                     val = io_mem_read(section->mr, addr1, 1);
1950                     stb_p(buf, val);
1951                     l = 1;
1952                 }
1953             } else {
1954                 /* RAM case */
1955                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1956                                        + memory_region_section_addr(section,
1957                                                                     addr));
1958                 memcpy(buf, ptr, l);
1959                 qemu_put_ram_ptr(ptr);
1960             }
1961         }
1962         len -= l;
1963         buf += l;
1964         addr += l;
1965     }
1966 }
1967
1968 void address_space_write(AddressSpace *as, hwaddr addr,
1969                          const uint8_t *buf, int len)
1970 {
1971     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1972 }
1973
1974 /**
1975  * address_space_read: read from an address space.
1976  *
1977  * @as: #AddressSpace to be accessed
1978  * @addr: address within that address space
1979  * @buf: buffer with the data transferred
1980  */
1981 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1982 {
1983     address_space_rw(as, addr, buf, len, false);
1984 }
1985
1986
1987 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1988                             int len, int is_write)
1989 {
1990     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1991 }
1992
1993 /* used for ROM loading : can write in RAM and ROM */
1994 void cpu_physical_memory_write_rom(hwaddr addr,
1995                                    const uint8_t *buf, int len)
1996 {
1997     AddressSpaceDispatch *d = address_space_memory.dispatch;
1998     int l;
1999     uint8_t *ptr;
2000     hwaddr page;
2001     MemoryRegionSection *section;
2002
2003     while (len > 0) {
2004         page = addr & TARGET_PAGE_MASK;
2005         l = (page + TARGET_PAGE_SIZE) - addr;
2006         if (l > len)
2007             l = len;
2008         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2009
2010         if (!(memory_region_is_ram(section->mr) ||
2011               memory_region_is_romd(section->mr))) {
2012             /* do nothing */
2013         } else {
2014             unsigned long addr1;
2015             addr1 = memory_region_get_ram_addr(section->mr)
2016                 + memory_region_section_addr(section, addr);
2017             /* ROM/RAM case */
2018             ptr = qemu_get_ram_ptr(addr1);
2019             memcpy(ptr, buf, l);
2020             invalidate_and_set_dirty(addr1, l);
2021             qemu_put_ram_ptr(ptr);
2022         }
2023         len -= l;
2024         buf += l;
2025         addr += l;
2026     }
2027 }
2028
2029 typedef struct {
2030     void *buffer;
2031     hwaddr addr;
2032     hwaddr len;
2033 } BounceBuffer;
2034
2035 static BounceBuffer bounce;
2036
2037 typedef struct MapClient {
2038     void *opaque;
2039     void (*callback)(void *opaque);
2040     QLIST_ENTRY(MapClient) link;
2041 } MapClient;
2042
2043 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2044     = QLIST_HEAD_INITIALIZER(map_client_list);
2045
2046 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2047 {
2048     MapClient *client = g_malloc(sizeof(*client));
2049
2050     client->opaque = opaque;
2051     client->callback = callback;
2052     QLIST_INSERT_HEAD(&map_client_list, client, link);
2053     return client;
2054 }
2055
2056 static void cpu_unregister_map_client(void *_client)
2057 {
2058     MapClient *client = (MapClient *)_client;
2059
2060     QLIST_REMOVE(client, link);
2061     g_free(client);
2062 }
2063
2064 static void cpu_notify_map_clients(void)
2065 {
2066     MapClient *client;
2067
2068     while (!QLIST_EMPTY(&map_client_list)) {
2069         client = QLIST_FIRST(&map_client_list);
2070         client->callback(client->opaque);
2071         cpu_unregister_map_client(client);
2072     }
2073 }
2074
2075 /* Map a physical memory region into a host virtual address.
2076  * May map a subset of the requested range, given by and returned in *plen.
2077  * May return NULL if resources needed to perform the mapping are exhausted.
2078  * Use only for reads OR writes - not for read-modify-write operations.
2079  * Use cpu_register_map_client() to know when retrying the map operation is
2080  * likely to succeed.
2081  */
2082 void *address_space_map(AddressSpace *as,
2083                         hwaddr addr,
2084                         hwaddr *plen,
2085                         bool is_write)
2086 {
2087     AddressSpaceDispatch *d = as->dispatch;
2088     hwaddr len = *plen;
2089     hwaddr todo = 0;
2090     int l;
2091     hwaddr page;
2092     MemoryRegionSection *section;
2093     ram_addr_t raddr = RAM_ADDR_MAX;
2094     ram_addr_t rlen;
2095     void *ret;
2096
2097     while (len > 0) {
2098         page = addr & TARGET_PAGE_MASK;
2099         l = (page + TARGET_PAGE_SIZE) - addr;
2100         if (l > len)
2101             l = len;
2102         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2103
2104         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2105             if (todo || bounce.buffer) {
2106                 break;
2107             }
2108             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2109             bounce.addr = addr;
2110             bounce.len = l;
2111             if (!is_write) {
2112                 address_space_read(as, addr, bounce.buffer, l);
2113             }
2114
2115             *plen = l;
2116             return bounce.buffer;
2117         }
2118         if (!todo) {
2119             raddr = memory_region_get_ram_addr(section->mr)
2120                 + memory_region_section_addr(section, addr);
2121         }
2122
2123         len -= l;
2124         addr += l;
2125         todo += l;
2126     }
2127     rlen = todo;
2128     ret = qemu_ram_ptr_length(raddr, &rlen);
2129     *plen = rlen;
2130     return ret;
2131 }
2132
2133 /* Unmaps a memory region previously mapped by address_space_map().
2134  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2135  * the amount of memory that was actually read or written by the caller.
2136  */
2137 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2138                          int is_write, hwaddr access_len)
2139 {
2140     if (buffer != bounce.buffer) {
2141         if (is_write) {
2142             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2143             while (access_len) {
2144                 unsigned l;
2145                 l = TARGET_PAGE_SIZE;
2146                 if (l > access_len)
2147                     l = access_len;
2148                 invalidate_and_set_dirty(addr1, l);
2149                 addr1 += l;
2150                 access_len -= l;
2151             }
2152         }
2153         if (xen_enabled()) {
2154             xen_invalidate_map_cache_entry(buffer);
2155         }
2156         return;
2157     }
2158     if (is_write) {
2159         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2160     }
2161     qemu_vfree(bounce.buffer);
2162     bounce.buffer = NULL;
2163     cpu_notify_map_clients();
2164 }
2165
2166 void *cpu_physical_memory_map(hwaddr addr,
2167                               hwaddr *plen,
2168                               int is_write)
2169 {
2170     return address_space_map(&address_space_memory, addr, plen, is_write);
2171 }
2172
2173 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2174                                int is_write, hwaddr access_len)
2175 {
2176     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2177 }
2178
2179 /* warning: addr must be aligned */
2180 static inline uint32_t ldl_phys_internal(hwaddr addr,
2181                                          enum device_endian endian)
2182 {
2183     uint8_t *ptr;
2184     uint32_t val;
2185     MemoryRegionSection *section;
2186
2187     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2188
2189     if (!(memory_region_is_ram(section->mr) ||
2190           memory_region_is_romd(section->mr))) {
2191         /* I/O case */
2192         addr = memory_region_section_addr(section, addr);
2193         val = io_mem_read(section->mr, addr, 4);
2194 #if defined(TARGET_WORDS_BIGENDIAN)
2195         if (endian == DEVICE_LITTLE_ENDIAN) {
2196             val = bswap32(val);
2197         }
2198 #else
2199         if (endian == DEVICE_BIG_ENDIAN) {
2200             val = bswap32(val);
2201         }
2202 #endif
2203     } else {
2204         /* RAM case */
2205         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2206                                 & TARGET_PAGE_MASK)
2207                                + memory_region_section_addr(section, addr));
2208         switch (endian) {
2209         case DEVICE_LITTLE_ENDIAN:
2210             val = ldl_le_p(ptr);
2211             break;
2212         case DEVICE_BIG_ENDIAN:
2213             val = ldl_be_p(ptr);
2214             break;
2215         default:
2216             val = ldl_p(ptr);
2217             break;
2218         }
2219     }
2220     return val;
2221 }
2222
2223 uint32_t ldl_phys(hwaddr addr)
2224 {
2225     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2226 }
2227
2228 uint32_t ldl_le_phys(hwaddr addr)
2229 {
2230     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2231 }
2232
2233 uint32_t ldl_be_phys(hwaddr addr)
2234 {
2235     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2236 }
2237
2238 /* warning: addr must be aligned */
2239 static inline uint64_t ldq_phys_internal(hwaddr addr,
2240                                          enum device_endian endian)
2241 {
2242     uint8_t *ptr;
2243     uint64_t val;
2244     MemoryRegionSection *section;
2245
2246     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2247
2248     if (!(memory_region_is_ram(section->mr) ||
2249           memory_region_is_romd(section->mr))) {
2250         /* I/O case */
2251         addr = memory_region_section_addr(section, addr);
2252
2253         /* XXX This is broken when device endian != cpu endian.
2254                Fix and add "endian" variable check */
2255 #ifdef TARGET_WORDS_BIGENDIAN
2256         val = io_mem_read(section->mr, addr, 4) << 32;
2257         val |= io_mem_read(section->mr, addr + 4, 4);
2258 #else
2259         val = io_mem_read(section->mr, addr, 4);
2260         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2261 #endif
2262     } else {
2263         /* RAM case */
2264         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2265                                 & TARGET_PAGE_MASK)
2266                                + memory_region_section_addr(section, addr));
2267         switch (endian) {
2268         case DEVICE_LITTLE_ENDIAN:
2269             val = ldq_le_p(ptr);
2270             break;
2271         case DEVICE_BIG_ENDIAN:
2272             val = ldq_be_p(ptr);
2273             break;
2274         default:
2275             val = ldq_p(ptr);
2276             break;
2277         }
2278     }
2279     return val;
2280 }
2281
2282 uint64_t ldq_phys(hwaddr addr)
2283 {
2284     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2285 }
2286
2287 uint64_t ldq_le_phys(hwaddr addr)
2288 {
2289     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2290 }
2291
2292 uint64_t ldq_be_phys(hwaddr addr)
2293 {
2294     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2295 }
2296
2297 /* XXX: optimize */
2298 uint32_t ldub_phys(hwaddr addr)
2299 {
2300     uint8_t val;
2301     cpu_physical_memory_read(addr, &val, 1);
2302     return val;
2303 }
2304
2305 /* warning: addr must be aligned */
2306 static inline uint32_t lduw_phys_internal(hwaddr addr,
2307                                           enum device_endian endian)
2308 {
2309     uint8_t *ptr;
2310     uint64_t val;
2311     MemoryRegionSection *section;
2312
2313     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2314
2315     if (!(memory_region_is_ram(section->mr) ||
2316           memory_region_is_romd(section->mr))) {
2317         /* I/O case */
2318         addr = memory_region_section_addr(section, addr);
2319         val = io_mem_read(section->mr, addr, 2);
2320 #if defined(TARGET_WORDS_BIGENDIAN)
2321         if (endian == DEVICE_LITTLE_ENDIAN) {
2322             val = bswap16(val);
2323         }
2324 #else
2325         if (endian == DEVICE_BIG_ENDIAN) {
2326             val = bswap16(val);
2327         }
2328 #endif
2329     } else {
2330         /* RAM case */
2331         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2332                                 & TARGET_PAGE_MASK)
2333                                + memory_region_section_addr(section, addr));
2334         switch (endian) {
2335         case DEVICE_LITTLE_ENDIAN:
2336             val = lduw_le_p(ptr);
2337             break;
2338         case DEVICE_BIG_ENDIAN:
2339             val = lduw_be_p(ptr);
2340             break;
2341         default:
2342             val = lduw_p(ptr);
2343             break;
2344         }
2345     }
2346     return val;
2347 }
2348
2349 uint32_t lduw_phys(hwaddr addr)
2350 {
2351     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2352 }
2353
2354 uint32_t lduw_le_phys(hwaddr addr)
2355 {
2356     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2357 }
2358
2359 uint32_t lduw_be_phys(hwaddr addr)
2360 {
2361     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2362 }
2363
2364 /* warning: addr must be aligned. The ram page is not masked as dirty
2365    and the code inside is not invalidated. It is useful if the dirty
2366    bits are used to track modified PTEs */
2367 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2368 {
2369     uint8_t *ptr;
2370     MemoryRegionSection *section;
2371
2372     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2373
2374     if (!memory_region_is_ram(section->mr) || section->readonly) {
2375         addr = memory_region_section_addr(section, addr);
2376         if (memory_region_is_ram(section->mr)) {
2377             section = &phys_sections[phys_section_rom];
2378         }
2379         io_mem_write(section->mr, addr, val, 4);
2380     } else {
2381         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2382                                & TARGET_PAGE_MASK)
2383             + memory_region_section_addr(section, addr);
2384         ptr = qemu_get_ram_ptr(addr1);
2385         stl_p(ptr, val);
2386
2387         if (unlikely(in_migration)) {
2388             if (!cpu_physical_memory_is_dirty(addr1)) {
2389                 /* invalidate code */
2390                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2391                 /* set dirty bit */
2392                 cpu_physical_memory_set_dirty_flags(
2393                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2394             }
2395         }
2396     }
2397 }
2398
2399 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2400 {
2401     uint8_t *ptr;
2402     MemoryRegionSection *section;
2403
2404     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2405
2406     if (!memory_region_is_ram(section->mr) || section->readonly) {
2407         addr = memory_region_section_addr(section, addr);
2408         if (memory_region_is_ram(section->mr)) {
2409             section = &phys_sections[phys_section_rom];
2410         }
2411 #ifdef TARGET_WORDS_BIGENDIAN
2412         io_mem_write(section->mr, addr, val >> 32, 4);
2413         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2414 #else
2415         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2416         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2417 #endif
2418     } else {
2419         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2420                                 & TARGET_PAGE_MASK)
2421                                + memory_region_section_addr(section, addr));
2422         stq_p(ptr, val);
2423     }
2424 }
2425
2426 /* warning: addr must be aligned */
2427 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2428                                      enum device_endian endian)
2429 {
2430     uint8_t *ptr;
2431     MemoryRegionSection *section;
2432
2433     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2434
2435     if (!memory_region_is_ram(section->mr) || section->readonly) {
2436         addr = memory_region_section_addr(section, addr);
2437         if (memory_region_is_ram(section->mr)) {
2438             section = &phys_sections[phys_section_rom];
2439         }
2440 #if defined(TARGET_WORDS_BIGENDIAN)
2441         if (endian == DEVICE_LITTLE_ENDIAN) {
2442             val = bswap32(val);
2443         }
2444 #else
2445         if (endian == DEVICE_BIG_ENDIAN) {
2446             val = bswap32(val);
2447         }
2448 #endif
2449         io_mem_write(section->mr, addr, val, 4);
2450     } else {
2451         unsigned long addr1;
2452         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2453             + memory_region_section_addr(section, addr);
2454         /* RAM case */
2455         ptr = qemu_get_ram_ptr(addr1);
2456         switch (endian) {
2457         case DEVICE_LITTLE_ENDIAN:
2458             stl_le_p(ptr, val);
2459             break;
2460         case DEVICE_BIG_ENDIAN:
2461             stl_be_p(ptr, val);
2462             break;
2463         default:
2464             stl_p(ptr, val);
2465             break;
2466         }
2467         invalidate_and_set_dirty(addr1, 4);
2468     }
2469 }
2470
2471 void stl_phys(hwaddr addr, uint32_t val)
2472 {
2473     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2474 }
2475
2476 void stl_le_phys(hwaddr addr, uint32_t val)
2477 {
2478     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2479 }
2480
2481 void stl_be_phys(hwaddr addr, uint32_t val)
2482 {
2483     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2484 }
2485
2486 /* XXX: optimize */
2487 void stb_phys(hwaddr addr, uint32_t val)
2488 {
2489     uint8_t v = val;
2490     cpu_physical_memory_write(addr, &v, 1);
2491 }
2492
2493 /* warning: addr must be aligned */
2494 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2495                                      enum device_endian endian)
2496 {
2497     uint8_t *ptr;
2498     MemoryRegionSection *section;
2499
2500     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2501
2502     if (!memory_region_is_ram(section->mr) || section->readonly) {
2503         addr = memory_region_section_addr(section, addr);
2504         if (memory_region_is_ram(section->mr)) {
2505             section = &phys_sections[phys_section_rom];
2506         }
2507 #if defined(TARGET_WORDS_BIGENDIAN)
2508         if (endian == DEVICE_LITTLE_ENDIAN) {
2509             val = bswap16(val);
2510         }
2511 #else
2512         if (endian == DEVICE_BIG_ENDIAN) {
2513             val = bswap16(val);
2514         }
2515 #endif
2516         io_mem_write(section->mr, addr, val, 2);
2517     } else {
2518         unsigned long addr1;
2519         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2520             + memory_region_section_addr(section, addr);
2521         /* RAM case */
2522         ptr = qemu_get_ram_ptr(addr1);
2523         switch (endian) {
2524         case DEVICE_LITTLE_ENDIAN:
2525             stw_le_p(ptr, val);
2526             break;
2527         case DEVICE_BIG_ENDIAN:
2528             stw_be_p(ptr, val);
2529             break;
2530         default:
2531             stw_p(ptr, val);
2532             break;
2533         }
2534         invalidate_and_set_dirty(addr1, 2);
2535     }
2536 }
2537
2538 void stw_phys(hwaddr addr, uint32_t val)
2539 {
2540     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2541 }
2542
2543 void stw_le_phys(hwaddr addr, uint32_t val)
2544 {
2545     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2546 }
2547
2548 void stw_be_phys(hwaddr addr, uint32_t val)
2549 {
2550     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2551 }
2552
2553 /* XXX: optimize */
2554 void stq_phys(hwaddr addr, uint64_t val)
2555 {
2556     val = tswap64(val);
2557     cpu_physical_memory_write(addr, &val, 8);
2558 }
2559
2560 void stq_le_phys(hwaddr addr, uint64_t val)
2561 {
2562     val = cpu_to_le64(val);
2563     cpu_physical_memory_write(addr, &val, 8);
2564 }
2565
2566 void stq_be_phys(hwaddr addr, uint64_t val)
2567 {
2568     val = cpu_to_be64(val);
2569     cpu_physical_memory_write(addr, &val, 8);
2570 }
2571
2572 /* virtual memory access for debug (includes writing to ROM) */
2573 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2574                         uint8_t *buf, int len, int is_write)
2575 {
2576     int l;
2577     hwaddr phys_addr;
2578     target_ulong page;
2579
2580     while (len > 0) {
2581         page = addr & TARGET_PAGE_MASK;
2582         phys_addr = cpu_get_phys_page_debug(env, page);
2583         /* if no physical page mapped, return an error */
2584         if (phys_addr == -1)
2585             return -1;
2586         l = (page + TARGET_PAGE_SIZE) - addr;
2587         if (l > len)
2588             l = len;
2589         phys_addr += (addr & ~TARGET_PAGE_MASK);
2590         if (is_write)
2591             cpu_physical_memory_write_rom(phys_addr, buf, l);
2592         else
2593             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2594         len -= l;
2595         buf += l;
2596         addr += l;
2597     }
2598     return 0;
2599 }
2600 #endif
2601
2602 #if !defined(CONFIG_USER_ONLY)
2603
2604 /*
2605  * A helper function for the _utterly broken_ virtio device model to find out if
2606  * it's running on a big endian machine. Don't do this at home kids!
2607  */
2608 bool virtio_is_big_endian(void);
2609 bool virtio_is_big_endian(void)
2610 {
2611 #if defined(TARGET_WORDS_BIGENDIAN)
2612     return true;
2613 #else
2614     return false;
2615 #endif
2616 }
2617
2618 #endif
2619
2620 #ifndef CONFIG_USER_ONLY
2621 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2622 {
2623     MemoryRegionSection *section;
2624
2625     section = phys_page_find(address_space_memory.dispatch,
2626                              phys_addr >> TARGET_PAGE_BITS);
2627
2628     return !(memory_region_is_ram(section->mr) ||
2629              memory_region_is_romd(section->mr));
2630 }
2631 #endif
This page took 0.163131 seconds and 4 git commands to generate.