]> Git Repo - qemu.git/blob - exec.c
ae5a4b4430cbc50dbd00a06e846c656df2bded88
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUState *cpu = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     cpu->interrupt_request &= ~0x01;
231     tlb_flush(cpu->env_ptr, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUState),
244         VMSTATE_UINT32(interrupt_request, CPUState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #else
249 #define vmstate_cpu_common vmstate_dummy
250 #endif
251
252 CPUState *qemu_get_cpu(int index)
253 {
254     CPUArchState *env = first_cpu;
255     CPUState *cpu = NULL;
256
257     while (env) {
258         cpu = ENV_GET_CPU(env);
259         if (cpu->cpu_index == index) {
260             break;
261         }
262         env = env->next_cpu;
263     }
264
265     return env ? cpu : NULL;
266 }
267
268 void cpu_exec_init(CPUArchState *env)
269 {
270     CPUState *cpu = ENV_GET_CPU(env);
271     CPUClass *cc = CPU_GET_CLASS(cpu);
272     CPUArchState **penv;
273     int cpu_index;
274
275 #if defined(CONFIG_USER_ONLY)
276     cpu_list_lock();
277 #endif
278     env->next_cpu = NULL;
279     penv = &first_cpu;
280     cpu_index = 0;
281     while (*penv != NULL) {
282         penv = &(*penv)->next_cpu;
283         cpu_index++;
284     }
285     cpu->cpu_index = cpu_index;
286     cpu->numa_node = 0;
287     QTAILQ_INIT(&env->breakpoints);
288     QTAILQ_INIT(&env->watchpoints);
289 #ifndef CONFIG_USER_ONLY
290     cpu->thread_id = qemu_get_thread_id();
291 #endif
292     *penv = env;
293 #if defined(CONFIG_USER_ONLY)
294     cpu_list_unlock();
295 #endif
296     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, cpu);
297 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
298     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
299                     cpu_save, cpu_load, env);
300     assert(cc->vmsd == NULL);
301 #endif
302     if (cc->vmsd != NULL) {
303         vmstate_register(NULL, cpu_index, cc->vmsd, cpu);
304     }
305 }
306
307 #if defined(TARGET_HAS_ICE)
308 #if defined(CONFIG_USER_ONLY)
309 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
310 {
311     tb_invalidate_phys_page_range(pc, pc + 1, 0);
312 }
313 #else
314 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
315 {
316     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
317             (pc & ~TARGET_PAGE_MASK));
318 }
319 #endif
320 #endif /* TARGET_HAS_ICE */
321
322 #if defined(CONFIG_USER_ONLY)
323 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
324
325 {
326 }
327
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     return -ENOSYS;
332 }
333 #else
334 /* Add a watchpoint.  */
335 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
336                           int flags, CPUWatchpoint **watchpoint)
337 {
338     target_ulong len_mask = ~(len - 1);
339     CPUWatchpoint *wp;
340
341     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
342     if ((len & (len - 1)) || (addr & ~len_mask) ||
343             len == 0 || len > TARGET_PAGE_SIZE) {
344         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
345                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
346         return -EINVAL;
347     }
348     wp = g_malloc(sizeof(*wp));
349
350     wp->vaddr = addr;
351     wp->len_mask = len_mask;
352     wp->flags = flags;
353
354     /* keep all GDB-injected watchpoints in front */
355     if (flags & BP_GDB)
356         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
357     else
358         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
359
360     tlb_flush_page(env, addr);
361
362     if (watchpoint)
363         *watchpoint = wp;
364     return 0;
365 }
366
367 /* Remove a specific watchpoint.  */
368 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
369                           int flags)
370 {
371     target_ulong len_mask = ~(len - 1);
372     CPUWatchpoint *wp;
373
374     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
375         if (addr == wp->vaddr && len_mask == wp->len_mask
376                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
377             cpu_watchpoint_remove_by_ref(env, wp);
378             return 0;
379         }
380     }
381     return -ENOENT;
382 }
383
384 /* Remove a specific watchpoint by reference.  */
385 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
386 {
387     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
388
389     tlb_flush_page(env, watchpoint->vaddr);
390
391     g_free(watchpoint);
392 }
393
394 /* Remove all matching watchpoints.  */
395 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
396 {
397     CPUWatchpoint *wp, *next;
398
399     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
400         if (wp->flags & mask)
401             cpu_watchpoint_remove_by_ref(env, wp);
402     }
403 }
404 #endif
405
406 /* Add a breakpoint.  */
407 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
408                           CPUBreakpoint **breakpoint)
409 {
410 #if defined(TARGET_HAS_ICE)
411     CPUBreakpoint *bp;
412
413     bp = g_malloc(sizeof(*bp));
414
415     bp->pc = pc;
416     bp->flags = flags;
417
418     /* keep all GDB-injected breakpoints in front */
419     if (flags & BP_GDB)
420         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
421     else
422         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
423
424     breakpoint_invalidate(env, pc);
425
426     if (breakpoint)
427         *breakpoint = bp;
428     return 0;
429 #else
430     return -ENOSYS;
431 #endif
432 }
433
434 /* Remove a specific breakpoint.  */
435 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
436 {
437 #if defined(TARGET_HAS_ICE)
438     CPUBreakpoint *bp;
439
440     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
441         if (bp->pc == pc && bp->flags == flags) {
442             cpu_breakpoint_remove_by_ref(env, bp);
443             return 0;
444         }
445     }
446     return -ENOENT;
447 #else
448     return -ENOSYS;
449 #endif
450 }
451
452 /* Remove a specific breakpoint by reference.  */
453 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
454 {
455 #if defined(TARGET_HAS_ICE)
456     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
457
458     breakpoint_invalidate(env, breakpoint->pc);
459
460     g_free(breakpoint);
461 #endif
462 }
463
464 /* Remove all matching breakpoints. */
465 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
466 {
467 #if defined(TARGET_HAS_ICE)
468     CPUBreakpoint *bp, *next;
469
470     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
471         if (bp->flags & mask)
472             cpu_breakpoint_remove_by_ref(env, bp);
473     }
474 #endif
475 }
476
477 /* enable or disable single step mode. EXCP_DEBUG is returned by the
478    CPU loop after each instruction */
479 void cpu_single_step(CPUArchState *env, int enabled)
480 {
481 #if defined(TARGET_HAS_ICE)
482     if (env->singlestep_enabled != enabled) {
483         env->singlestep_enabled = enabled;
484         if (kvm_enabled())
485             kvm_update_guest_debug(env, 0);
486         else {
487             /* must flush all the translated code to avoid inconsistencies */
488             /* XXX: only flush what is necessary */
489             tb_flush(env);
490         }
491     }
492 #endif
493 }
494
495 void cpu_exit(CPUArchState *env)
496 {
497     CPUState *cpu = ENV_GET_CPU(env);
498
499     cpu->exit_request = 1;
500     cpu->tcg_exit_req = 1;
501 }
502
503 void cpu_abort(CPUArchState *env, const char *fmt, ...)
504 {
505     va_list ap;
506     va_list ap2;
507
508     va_start(ap, fmt);
509     va_copy(ap2, ap);
510     fprintf(stderr, "qemu: fatal: ");
511     vfprintf(stderr, fmt, ap);
512     fprintf(stderr, "\n");
513     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
514     if (qemu_log_enabled()) {
515         qemu_log("qemu: fatal: ");
516         qemu_log_vprintf(fmt, ap2);
517         qemu_log("\n");
518         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
519         qemu_log_flush();
520         qemu_log_close();
521     }
522     va_end(ap2);
523     va_end(ap);
524 #if defined(CONFIG_USER_ONLY)
525     {
526         struct sigaction act;
527         sigfillset(&act.sa_mask);
528         act.sa_handler = SIG_DFL;
529         sigaction(SIGABRT, &act, NULL);
530     }
531 #endif
532     abort();
533 }
534
535 CPUArchState *cpu_copy(CPUArchState *env)
536 {
537     CPUArchState *new_env = cpu_init(env->cpu_model_str);
538     CPUArchState *next_cpu = new_env->next_cpu;
539 #if defined(TARGET_HAS_ICE)
540     CPUBreakpoint *bp;
541     CPUWatchpoint *wp;
542 #endif
543
544     memcpy(new_env, env, sizeof(CPUArchState));
545
546     /* Preserve chaining. */
547     new_env->next_cpu = next_cpu;
548
549     /* Clone all break/watchpoints.
550        Note: Once we support ptrace with hw-debug register access, make sure
551        BP_CPU break/watchpoints are handled correctly on clone. */
552     QTAILQ_INIT(&env->breakpoints);
553     QTAILQ_INIT(&env->watchpoints);
554 #if defined(TARGET_HAS_ICE)
555     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
556         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
557     }
558     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
559         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
560                               wp->flags, NULL);
561     }
562 #endif
563
564     return new_env;
565 }
566
567 #if !defined(CONFIG_USER_ONLY)
568 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
569                                       uintptr_t length)
570 {
571     uintptr_t start1;
572
573     /* we modify the TLB cache so that the dirty bit will be set again
574        when accessing the range */
575     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
576     /* Check that we don't span multiple blocks - this breaks the
577        address comparisons below.  */
578     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
579             != (end - 1) - start) {
580         abort();
581     }
582     cpu_tlb_reset_dirty_all(start1, length);
583
584 }
585
586 /* Note: start and end must be within the same ram block.  */
587 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
588                                      int dirty_flags)
589 {
590     uintptr_t length;
591
592     start &= TARGET_PAGE_MASK;
593     end = TARGET_PAGE_ALIGN(end);
594
595     length = end - start;
596     if (length == 0)
597         return;
598     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
599
600     if (tcg_enabled()) {
601         tlb_reset_dirty_range_all(start, end, length);
602     }
603 }
604
605 static int cpu_physical_memory_set_dirty_tracking(int enable)
606 {
607     int ret = 0;
608     in_migration = enable;
609     return ret;
610 }
611
612 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
613                                                    MemoryRegionSection *section,
614                                                    target_ulong vaddr,
615                                                    hwaddr paddr,
616                                                    int prot,
617                                                    target_ulong *address)
618 {
619     hwaddr iotlb;
620     CPUWatchpoint *wp;
621
622     if (memory_region_is_ram(section->mr)) {
623         /* Normal RAM.  */
624         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
625             + memory_region_section_addr(section, paddr);
626         if (!section->readonly) {
627             iotlb |= phys_section_notdirty;
628         } else {
629             iotlb |= phys_section_rom;
630         }
631     } else {
632         /* IO handlers are currently passed a physical address.
633            It would be nice to pass an offset from the base address
634            of that region.  This would avoid having to special case RAM,
635            and avoid full address decoding in every device.
636            We can't use the high bits of pd for this because
637            IO_MEM_ROMD uses these as a ram address.  */
638         iotlb = section - phys_sections;
639         iotlb += memory_region_section_addr(section, paddr);
640     }
641
642     /* Make accesses to pages with watchpoints go via the
643        watchpoint trap routines.  */
644     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
645         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
646             /* Avoid trapping reads of pages with a write breakpoint. */
647             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
648                 iotlb = phys_section_watch + paddr;
649                 *address |= TLB_MMIO;
650                 break;
651             }
652         }
653     }
654
655     return iotlb;
656 }
657 #endif /* defined(CONFIG_USER_ONLY) */
658
659 #if !defined(CONFIG_USER_ONLY)
660
661 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
662 typedef struct subpage_t {
663     MemoryRegion iomem;
664     hwaddr base;
665     uint16_t sub_section[TARGET_PAGE_SIZE];
666 } subpage_t;
667
668 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
669                              uint16_t section);
670 static subpage_t *subpage_init(hwaddr base);
671 static void destroy_page_desc(uint16_t section_index)
672 {
673     MemoryRegionSection *section = &phys_sections[section_index];
674     MemoryRegion *mr = section->mr;
675
676     if (mr->subpage) {
677         subpage_t *subpage = container_of(mr, subpage_t, iomem);
678         memory_region_destroy(&subpage->iomem);
679         g_free(subpage);
680     }
681 }
682
683 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
684 {
685     unsigned i;
686     PhysPageEntry *p;
687
688     if (lp->ptr == PHYS_MAP_NODE_NIL) {
689         return;
690     }
691
692     p = phys_map_nodes[lp->ptr];
693     for (i = 0; i < L2_SIZE; ++i) {
694         if (!p[i].is_leaf) {
695             destroy_l2_mapping(&p[i], level - 1);
696         } else {
697             destroy_page_desc(p[i].ptr);
698         }
699     }
700     lp->is_leaf = 0;
701     lp->ptr = PHYS_MAP_NODE_NIL;
702 }
703
704 static void destroy_all_mappings(AddressSpaceDispatch *d)
705 {
706     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
707     phys_map_nodes_reset();
708 }
709
710 static uint16_t phys_section_add(MemoryRegionSection *section)
711 {
712     if (phys_sections_nb == phys_sections_nb_alloc) {
713         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
714         phys_sections = g_renew(MemoryRegionSection, phys_sections,
715                                 phys_sections_nb_alloc);
716     }
717     phys_sections[phys_sections_nb] = *section;
718     return phys_sections_nb++;
719 }
720
721 static void phys_sections_clear(void)
722 {
723     phys_sections_nb = 0;
724 }
725
726 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
727 {
728     subpage_t *subpage;
729     hwaddr base = section->offset_within_address_space
730         & TARGET_PAGE_MASK;
731     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
732     MemoryRegionSection subsection = {
733         .offset_within_address_space = base,
734         .size = TARGET_PAGE_SIZE,
735     };
736     hwaddr start, end;
737
738     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
739
740     if (!(existing->mr->subpage)) {
741         subpage = subpage_init(base);
742         subsection.mr = &subpage->iomem;
743         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
744                       phys_section_add(&subsection));
745     } else {
746         subpage = container_of(existing->mr, subpage_t, iomem);
747     }
748     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
749     end = start + section->size - 1;
750     subpage_register(subpage, start, end, phys_section_add(section));
751 }
752
753
754 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
755 {
756     hwaddr start_addr = section->offset_within_address_space;
757     ram_addr_t size = section->size;
758     hwaddr addr;
759     uint16_t section_index = phys_section_add(section);
760
761     assert(size);
762
763     addr = start_addr;
764     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
765                   section_index);
766 }
767
768 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
769 {
770     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
771     MemoryRegionSection now = *section, remain = *section;
772
773     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
774         || (now.size < TARGET_PAGE_SIZE)) {
775         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
776                        - now.offset_within_address_space,
777                        now.size);
778         register_subpage(d, &now);
779         remain.size -= now.size;
780         remain.offset_within_address_space += now.size;
781         remain.offset_within_region += now.size;
782     }
783     while (remain.size >= TARGET_PAGE_SIZE) {
784         now = remain;
785         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
786             now.size = TARGET_PAGE_SIZE;
787             register_subpage(d, &now);
788         } else {
789             now.size &= TARGET_PAGE_MASK;
790             register_multipage(d, &now);
791         }
792         remain.size -= now.size;
793         remain.offset_within_address_space += now.size;
794         remain.offset_within_region += now.size;
795     }
796     now = remain;
797     if (now.size) {
798         register_subpage(d, &now);
799     }
800 }
801
802 void qemu_flush_coalesced_mmio_buffer(void)
803 {
804     if (kvm_enabled())
805         kvm_flush_coalesced_mmio_buffer();
806 }
807
808 void qemu_mutex_lock_ramlist(void)
809 {
810     qemu_mutex_lock(&ram_list.mutex);
811 }
812
813 void qemu_mutex_unlock_ramlist(void)
814 {
815     qemu_mutex_unlock(&ram_list.mutex);
816 }
817
818 #if defined(__linux__) && !defined(TARGET_S390X)
819
820 #include <sys/vfs.h>
821
822 #define HUGETLBFS_MAGIC       0x958458f6
823
824 static long gethugepagesize(const char *path)
825 {
826     struct statfs fs;
827     int ret;
828
829     do {
830         ret = statfs(path, &fs);
831     } while (ret != 0 && errno == EINTR);
832
833     if (ret != 0) {
834         perror(path);
835         return 0;
836     }
837
838     if (fs.f_type != HUGETLBFS_MAGIC)
839         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
840
841     return fs.f_bsize;
842 }
843
844 static void *file_ram_alloc(RAMBlock *block,
845                             ram_addr_t memory,
846                             const char *path)
847 {
848     char *filename;
849     void *area;
850     int fd;
851 #ifdef MAP_POPULATE
852     int flags;
853 #endif
854     unsigned long hpagesize;
855
856     hpagesize = gethugepagesize(path);
857     if (!hpagesize) {
858         return NULL;
859     }
860
861     if (memory < hpagesize) {
862         return NULL;
863     }
864
865     if (kvm_enabled() && !kvm_has_sync_mmu()) {
866         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
867         return NULL;
868     }
869
870     filename = g_strdup_printf("%s/qemu_back_mem.XXXXXX", path);
871
872     fd = mkstemp(filename);
873     if (fd < 0) {
874         perror("unable to create backing store for hugepages");
875         g_free(filename);
876         return NULL;
877     }
878     unlink(filename);
879     g_free(filename);
880
881     memory = (memory+hpagesize-1) & ~(hpagesize-1);
882
883     /*
884      * ftruncate is not supported by hugetlbfs in older
885      * hosts, so don't bother bailing out on errors.
886      * If anything goes wrong with it under other filesystems,
887      * mmap will fail.
888      */
889     if (ftruncate(fd, memory))
890         perror("ftruncate");
891
892 #ifdef MAP_POPULATE
893     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
894      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
895      * to sidestep this quirk.
896      */
897     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
898     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
899 #else
900     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
901 #endif
902     if (area == MAP_FAILED) {
903         perror("file_ram_alloc: can't mmap RAM pages");
904         close(fd);
905         return (NULL);
906     }
907     block->fd = fd;
908     return area;
909 }
910 #endif
911
912 static ram_addr_t find_ram_offset(ram_addr_t size)
913 {
914     RAMBlock *block, *next_block;
915     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
916
917     if (QTAILQ_EMPTY(&ram_list.blocks))
918         return 0;
919
920     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
921         ram_addr_t end, next = RAM_ADDR_MAX;
922
923         end = block->offset + block->length;
924
925         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
926             if (next_block->offset >= end) {
927                 next = MIN(next, next_block->offset);
928             }
929         }
930         if (next - end >= size && next - end < mingap) {
931             offset = end;
932             mingap = next - end;
933         }
934     }
935
936     if (offset == RAM_ADDR_MAX) {
937         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
938                 (uint64_t)size);
939         abort();
940     }
941
942     return offset;
943 }
944
945 ram_addr_t last_ram_offset(void)
946 {
947     RAMBlock *block;
948     ram_addr_t last = 0;
949
950     QTAILQ_FOREACH(block, &ram_list.blocks, next)
951         last = MAX(last, block->offset + block->length);
952
953     return last;
954 }
955
956 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
957 {
958     int ret;
959     QemuOpts *machine_opts;
960
961     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
962     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
963     if (machine_opts &&
964         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
965         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
966         if (ret) {
967             perror("qemu_madvise");
968             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
969                             "but dump_guest_core=off specified\n");
970         }
971     }
972 }
973
974 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
975 {
976     RAMBlock *new_block, *block;
977
978     new_block = NULL;
979     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
980         if (block->offset == addr) {
981             new_block = block;
982             break;
983         }
984     }
985     assert(new_block);
986     assert(!new_block->idstr[0]);
987
988     if (dev) {
989         char *id = qdev_get_dev_path(dev);
990         if (id) {
991             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
992             g_free(id);
993         }
994     }
995     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
996
997     /* This assumes the iothread lock is taken here too.  */
998     qemu_mutex_lock_ramlist();
999     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1000         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1001             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1002                     new_block->idstr);
1003             abort();
1004         }
1005     }
1006     qemu_mutex_unlock_ramlist();
1007 }
1008
1009 static int memory_try_enable_merging(void *addr, size_t len)
1010 {
1011     QemuOpts *opts;
1012
1013     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1014     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1015         /* disabled by the user */
1016         return 0;
1017     }
1018
1019     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1020 }
1021
1022 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1023                                    MemoryRegion *mr)
1024 {
1025     RAMBlock *block, *new_block;
1026
1027     size = TARGET_PAGE_ALIGN(size);
1028     new_block = g_malloc0(sizeof(*new_block));
1029
1030     /* This assumes the iothread lock is taken here too.  */
1031     qemu_mutex_lock_ramlist();
1032     new_block->mr = mr;
1033     new_block->offset = find_ram_offset(size);
1034     if (host) {
1035         new_block->host = host;
1036         new_block->flags |= RAM_PREALLOC_MASK;
1037     } else {
1038         if (mem_path) {
1039 #if defined (__linux__) && !defined(TARGET_S390X)
1040             new_block->host = file_ram_alloc(new_block, size, mem_path);
1041             if (!new_block->host) {
1042                 new_block->host = qemu_vmalloc(size);
1043                 memory_try_enable_merging(new_block->host, size);
1044             }
1045 #else
1046             fprintf(stderr, "-mem-path option unsupported\n");
1047             exit(1);
1048 #endif
1049         } else {
1050             if (xen_enabled()) {
1051                 xen_ram_alloc(new_block->offset, size, mr);
1052             } else if (kvm_enabled()) {
1053                 /* some s390/kvm configurations have special constraints */
1054                 new_block->host = kvm_vmalloc(size);
1055             } else {
1056                 new_block->host = qemu_vmalloc(size);
1057             }
1058             memory_try_enable_merging(new_block->host, size);
1059         }
1060     }
1061     new_block->length = size;
1062
1063     /* Keep the list sorted from biggest to smallest block.  */
1064     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1065         if (block->length < new_block->length) {
1066             break;
1067         }
1068     }
1069     if (block) {
1070         QTAILQ_INSERT_BEFORE(block, new_block, next);
1071     } else {
1072         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1073     }
1074     ram_list.mru_block = NULL;
1075
1076     ram_list.version++;
1077     qemu_mutex_unlock_ramlist();
1078
1079     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1080                                        last_ram_offset() >> TARGET_PAGE_BITS);
1081     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1082            0, size >> TARGET_PAGE_BITS);
1083     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1084
1085     qemu_ram_setup_dump(new_block->host, size);
1086     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1087
1088     if (kvm_enabled())
1089         kvm_setup_guest_memory(new_block->host, size);
1090
1091     return new_block->offset;
1092 }
1093
1094 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1095 {
1096     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1097 }
1098
1099 void qemu_ram_free_from_ptr(ram_addr_t addr)
1100 {
1101     RAMBlock *block;
1102
1103     /* This assumes the iothread lock is taken here too.  */
1104     qemu_mutex_lock_ramlist();
1105     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1106         if (addr == block->offset) {
1107             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1108             ram_list.mru_block = NULL;
1109             ram_list.version++;
1110             g_free(block);
1111             break;
1112         }
1113     }
1114     qemu_mutex_unlock_ramlist();
1115 }
1116
1117 void qemu_ram_free(ram_addr_t addr)
1118 {
1119     RAMBlock *block;
1120
1121     /* This assumes the iothread lock is taken here too.  */
1122     qemu_mutex_lock_ramlist();
1123     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1124         if (addr == block->offset) {
1125             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1126             ram_list.mru_block = NULL;
1127             ram_list.version++;
1128             if (block->flags & RAM_PREALLOC_MASK) {
1129                 ;
1130             } else if (mem_path) {
1131 #if defined (__linux__) && !defined(TARGET_S390X)
1132                 if (block->fd) {
1133                     munmap(block->host, block->length);
1134                     close(block->fd);
1135                 } else {
1136                     qemu_vfree(block->host);
1137                 }
1138 #else
1139                 abort();
1140 #endif
1141             } else {
1142 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1143                 munmap(block->host, block->length);
1144 #else
1145                 if (xen_enabled()) {
1146                     xen_invalidate_map_cache_entry(block->host);
1147                 } else {
1148                     qemu_vfree(block->host);
1149                 }
1150 #endif
1151             }
1152             g_free(block);
1153             break;
1154         }
1155     }
1156     qemu_mutex_unlock_ramlist();
1157
1158 }
1159
1160 #ifndef _WIN32
1161 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1162 {
1163     RAMBlock *block;
1164     ram_addr_t offset;
1165     int flags;
1166     void *area, *vaddr;
1167
1168     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1169         offset = addr - block->offset;
1170         if (offset < block->length) {
1171             vaddr = block->host + offset;
1172             if (block->flags & RAM_PREALLOC_MASK) {
1173                 ;
1174             } else {
1175                 flags = MAP_FIXED;
1176                 munmap(vaddr, length);
1177                 if (mem_path) {
1178 #if defined(__linux__) && !defined(TARGET_S390X)
1179                     if (block->fd) {
1180 #ifdef MAP_POPULATE
1181                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1182                             MAP_PRIVATE;
1183 #else
1184                         flags |= MAP_PRIVATE;
1185 #endif
1186                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1187                                     flags, block->fd, offset);
1188                     } else {
1189                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1190                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1191                                     flags, -1, 0);
1192                     }
1193 #else
1194                     abort();
1195 #endif
1196                 } else {
1197 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1198                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1199                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1200                                 flags, -1, 0);
1201 #else
1202                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1203                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1204                                 flags, -1, 0);
1205 #endif
1206                 }
1207                 if (area != vaddr) {
1208                     fprintf(stderr, "Could not remap addr: "
1209                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1210                             length, addr);
1211                     exit(1);
1212                 }
1213                 memory_try_enable_merging(vaddr, length);
1214                 qemu_ram_setup_dump(vaddr, length);
1215             }
1216             return;
1217         }
1218     }
1219 }
1220 #endif /* !_WIN32 */
1221
1222 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1223    With the exception of the softmmu code in this file, this should
1224    only be used for local memory (e.g. video ram) that the device owns,
1225    and knows it isn't going to access beyond the end of the block.
1226
1227    It should not be used for general purpose DMA.
1228    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1229  */
1230 void *qemu_get_ram_ptr(ram_addr_t addr)
1231 {
1232     RAMBlock *block;
1233
1234     /* The list is protected by the iothread lock here.  */
1235     block = ram_list.mru_block;
1236     if (block && addr - block->offset < block->length) {
1237         goto found;
1238     }
1239     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1240         if (addr - block->offset < block->length) {
1241             goto found;
1242         }
1243     }
1244
1245     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1246     abort();
1247
1248 found:
1249     ram_list.mru_block = block;
1250     if (xen_enabled()) {
1251         /* We need to check if the requested address is in the RAM
1252          * because we don't want to map the entire memory in QEMU.
1253          * In that case just map until the end of the page.
1254          */
1255         if (block->offset == 0) {
1256             return xen_map_cache(addr, 0, 0);
1257         } else if (block->host == NULL) {
1258             block->host =
1259                 xen_map_cache(block->offset, block->length, 1);
1260         }
1261     }
1262     return block->host + (addr - block->offset);
1263 }
1264
1265 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1266  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1267  *
1268  * ??? Is this still necessary?
1269  */
1270 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1271 {
1272     RAMBlock *block;
1273
1274     /* The list is protected by the iothread lock here.  */
1275     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1276         if (addr - block->offset < block->length) {
1277             if (xen_enabled()) {
1278                 /* We need to check if the requested address is in the RAM
1279                  * because we don't want to map the entire memory in QEMU.
1280                  * In that case just map until the end of the page.
1281                  */
1282                 if (block->offset == 0) {
1283                     return xen_map_cache(addr, 0, 0);
1284                 } else if (block->host == NULL) {
1285                     block->host =
1286                         xen_map_cache(block->offset, block->length, 1);
1287                 }
1288             }
1289             return block->host + (addr - block->offset);
1290         }
1291     }
1292
1293     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1294     abort();
1295
1296     return NULL;
1297 }
1298
1299 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1300  * but takes a size argument */
1301 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1302 {
1303     if (*size == 0) {
1304         return NULL;
1305     }
1306     if (xen_enabled()) {
1307         return xen_map_cache(addr, *size, 1);
1308     } else {
1309         RAMBlock *block;
1310
1311         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1312             if (addr - block->offset < block->length) {
1313                 if (addr - block->offset + *size > block->length)
1314                     *size = block->length - addr + block->offset;
1315                 return block->host + (addr - block->offset);
1316             }
1317         }
1318
1319         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1320         abort();
1321     }
1322 }
1323
1324 void qemu_put_ram_ptr(void *addr)
1325 {
1326     trace_qemu_put_ram_ptr(addr);
1327 }
1328
1329 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1330 {
1331     RAMBlock *block;
1332     uint8_t *host = ptr;
1333
1334     if (xen_enabled()) {
1335         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1336         return 0;
1337     }
1338
1339     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1340         /* This case append when the block is not mapped. */
1341         if (block->host == NULL) {
1342             continue;
1343         }
1344         if (host - block->host < block->length) {
1345             *ram_addr = block->offset + (host - block->host);
1346             return 0;
1347         }
1348     }
1349
1350     return -1;
1351 }
1352
1353 /* Some of the softmmu routines need to translate from a host pointer
1354    (typically a TLB entry) back to a ram offset.  */
1355 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1356 {
1357     ram_addr_t ram_addr;
1358
1359     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1360         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1361         abort();
1362     }
1363     return ram_addr;
1364 }
1365
1366 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1367                                     unsigned size)
1368 {
1369 #ifdef DEBUG_UNASSIGNED
1370     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1371 #endif
1372 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1373     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1374 #endif
1375     return 0;
1376 }
1377
1378 static void unassigned_mem_write(void *opaque, hwaddr addr,
1379                                  uint64_t val, unsigned size)
1380 {
1381 #ifdef DEBUG_UNASSIGNED
1382     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1383 #endif
1384 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1385     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1386 #endif
1387 }
1388
1389 static const MemoryRegionOps unassigned_mem_ops = {
1390     .read = unassigned_mem_read,
1391     .write = unassigned_mem_write,
1392     .endianness = DEVICE_NATIVE_ENDIAN,
1393 };
1394
1395 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1396                                unsigned size)
1397 {
1398     abort();
1399 }
1400
1401 static void error_mem_write(void *opaque, hwaddr addr,
1402                             uint64_t value, unsigned size)
1403 {
1404     abort();
1405 }
1406
1407 static const MemoryRegionOps error_mem_ops = {
1408     .read = error_mem_read,
1409     .write = error_mem_write,
1410     .endianness = DEVICE_NATIVE_ENDIAN,
1411 };
1412
1413 static const MemoryRegionOps rom_mem_ops = {
1414     .read = error_mem_read,
1415     .write = unassigned_mem_write,
1416     .endianness = DEVICE_NATIVE_ENDIAN,
1417 };
1418
1419 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1420                                uint64_t val, unsigned size)
1421 {
1422     int dirty_flags;
1423     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1424     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1425 #if !defined(CONFIG_USER_ONLY)
1426         tb_invalidate_phys_page_fast(ram_addr, size);
1427         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1428 #endif
1429     }
1430     switch (size) {
1431     case 1:
1432         stb_p(qemu_get_ram_ptr(ram_addr), val);
1433         break;
1434     case 2:
1435         stw_p(qemu_get_ram_ptr(ram_addr), val);
1436         break;
1437     case 4:
1438         stl_p(qemu_get_ram_ptr(ram_addr), val);
1439         break;
1440     default:
1441         abort();
1442     }
1443     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1444     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1445     /* we remove the notdirty callback only if the code has been
1446        flushed */
1447     if (dirty_flags == 0xff)
1448         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1449 }
1450
1451 static const MemoryRegionOps notdirty_mem_ops = {
1452     .read = error_mem_read,
1453     .write = notdirty_mem_write,
1454     .endianness = DEVICE_NATIVE_ENDIAN,
1455 };
1456
1457 /* Generate a debug exception if a watchpoint has been hit.  */
1458 static void check_watchpoint(int offset, int len_mask, int flags)
1459 {
1460     CPUArchState *env = cpu_single_env;
1461     target_ulong pc, cs_base;
1462     target_ulong vaddr;
1463     CPUWatchpoint *wp;
1464     int cpu_flags;
1465
1466     if (env->watchpoint_hit) {
1467         /* We re-entered the check after replacing the TB. Now raise
1468          * the debug interrupt so that is will trigger after the
1469          * current instruction. */
1470         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1471         return;
1472     }
1473     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1474     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1475         if ((vaddr == (wp->vaddr & len_mask) ||
1476              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1477             wp->flags |= BP_WATCHPOINT_HIT;
1478             if (!env->watchpoint_hit) {
1479                 env->watchpoint_hit = wp;
1480                 tb_check_watchpoint(env);
1481                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1482                     env->exception_index = EXCP_DEBUG;
1483                     cpu_loop_exit(env);
1484                 } else {
1485                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1486                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1487                     cpu_resume_from_signal(env, NULL);
1488                 }
1489             }
1490         } else {
1491             wp->flags &= ~BP_WATCHPOINT_HIT;
1492         }
1493     }
1494 }
1495
1496 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1497    so these check for a hit then pass through to the normal out-of-line
1498    phys routines.  */
1499 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1500                                unsigned size)
1501 {
1502     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1503     switch (size) {
1504     case 1: return ldub_phys(addr);
1505     case 2: return lduw_phys(addr);
1506     case 4: return ldl_phys(addr);
1507     default: abort();
1508     }
1509 }
1510
1511 static void watch_mem_write(void *opaque, hwaddr addr,
1512                             uint64_t val, unsigned size)
1513 {
1514     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1515     switch (size) {
1516     case 1:
1517         stb_phys(addr, val);
1518         break;
1519     case 2:
1520         stw_phys(addr, val);
1521         break;
1522     case 4:
1523         stl_phys(addr, val);
1524         break;
1525     default: abort();
1526     }
1527 }
1528
1529 static const MemoryRegionOps watch_mem_ops = {
1530     .read = watch_mem_read,
1531     .write = watch_mem_write,
1532     .endianness = DEVICE_NATIVE_ENDIAN,
1533 };
1534
1535 static uint64_t subpage_read(void *opaque, hwaddr addr,
1536                              unsigned len)
1537 {
1538     subpage_t *mmio = opaque;
1539     unsigned int idx = SUBPAGE_IDX(addr);
1540     MemoryRegionSection *section;
1541 #if defined(DEBUG_SUBPAGE)
1542     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1543            mmio, len, addr, idx);
1544 #endif
1545
1546     section = &phys_sections[mmio->sub_section[idx]];
1547     addr += mmio->base;
1548     addr -= section->offset_within_address_space;
1549     addr += section->offset_within_region;
1550     return io_mem_read(section->mr, addr, len);
1551 }
1552
1553 static void subpage_write(void *opaque, hwaddr addr,
1554                           uint64_t value, unsigned len)
1555 {
1556     subpage_t *mmio = opaque;
1557     unsigned int idx = SUBPAGE_IDX(addr);
1558     MemoryRegionSection *section;
1559 #if defined(DEBUG_SUBPAGE)
1560     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1561            " idx %d value %"PRIx64"\n",
1562            __func__, mmio, len, addr, idx, value);
1563 #endif
1564
1565     section = &phys_sections[mmio->sub_section[idx]];
1566     addr += mmio->base;
1567     addr -= section->offset_within_address_space;
1568     addr += section->offset_within_region;
1569     io_mem_write(section->mr, addr, value, len);
1570 }
1571
1572 static const MemoryRegionOps subpage_ops = {
1573     .read = subpage_read,
1574     .write = subpage_write,
1575     .endianness = DEVICE_NATIVE_ENDIAN,
1576 };
1577
1578 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1579                                  unsigned size)
1580 {
1581     ram_addr_t raddr = addr;
1582     void *ptr = qemu_get_ram_ptr(raddr);
1583     switch (size) {
1584     case 1: return ldub_p(ptr);
1585     case 2: return lduw_p(ptr);
1586     case 4: return ldl_p(ptr);
1587     default: abort();
1588     }
1589 }
1590
1591 static void subpage_ram_write(void *opaque, hwaddr addr,
1592                               uint64_t value, unsigned size)
1593 {
1594     ram_addr_t raddr = addr;
1595     void *ptr = qemu_get_ram_ptr(raddr);
1596     switch (size) {
1597     case 1: return stb_p(ptr, value);
1598     case 2: return stw_p(ptr, value);
1599     case 4: return stl_p(ptr, value);
1600     default: abort();
1601     }
1602 }
1603
1604 static const MemoryRegionOps subpage_ram_ops = {
1605     .read = subpage_ram_read,
1606     .write = subpage_ram_write,
1607     .endianness = DEVICE_NATIVE_ENDIAN,
1608 };
1609
1610 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1611                              uint16_t section)
1612 {
1613     int idx, eidx;
1614
1615     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1616         return -1;
1617     idx = SUBPAGE_IDX(start);
1618     eidx = SUBPAGE_IDX(end);
1619 #if defined(DEBUG_SUBPAGE)
1620     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1621            mmio, start, end, idx, eidx, memory);
1622 #endif
1623     if (memory_region_is_ram(phys_sections[section].mr)) {
1624         MemoryRegionSection new_section = phys_sections[section];
1625         new_section.mr = &io_mem_subpage_ram;
1626         section = phys_section_add(&new_section);
1627     }
1628     for (; idx <= eidx; idx++) {
1629         mmio->sub_section[idx] = section;
1630     }
1631
1632     return 0;
1633 }
1634
1635 static subpage_t *subpage_init(hwaddr base)
1636 {
1637     subpage_t *mmio;
1638
1639     mmio = g_malloc0(sizeof(subpage_t));
1640
1641     mmio->base = base;
1642     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1643                           "subpage", TARGET_PAGE_SIZE);
1644     mmio->iomem.subpage = true;
1645 #if defined(DEBUG_SUBPAGE)
1646     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1647            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1648 #endif
1649     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1650
1651     return mmio;
1652 }
1653
1654 static uint16_t dummy_section(MemoryRegion *mr)
1655 {
1656     MemoryRegionSection section = {
1657         .mr = mr,
1658         .offset_within_address_space = 0,
1659         .offset_within_region = 0,
1660         .size = UINT64_MAX,
1661     };
1662
1663     return phys_section_add(&section);
1664 }
1665
1666 MemoryRegion *iotlb_to_region(hwaddr index)
1667 {
1668     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1669 }
1670
1671 static void io_mem_init(void)
1672 {
1673     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1674     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1675     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1676                           "unassigned", UINT64_MAX);
1677     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1678                           "notdirty", UINT64_MAX);
1679     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1680                           "subpage-ram", UINT64_MAX);
1681     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1682                           "watch", UINT64_MAX);
1683 }
1684
1685 static void mem_begin(MemoryListener *listener)
1686 {
1687     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1688
1689     destroy_all_mappings(d);
1690     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1691 }
1692
1693 static void core_begin(MemoryListener *listener)
1694 {
1695     phys_sections_clear();
1696     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1697     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1698     phys_section_rom = dummy_section(&io_mem_rom);
1699     phys_section_watch = dummy_section(&io_mem_watch);
1700 }
1701
1702 static void tcg_commit(MemoryListener *listener)
1703 {
1704     CPUArchState *env;
1705
1706     /* since each CPU stores ram addresses in its TLB cache, we must
1707        reset the modified entries */
1708     /* XXX: slow ! */
1709     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1710         tlb_flush(env, 1);
1711     }
1712 }
1713
1714 static void core_log_global_start(MemoryListener *listener)
1715 {
1716     cpu_physical_memory_set_dirty_tracking(1);
1717 }
1718
1719 static void core_log_global_stop(MemoryListener *listener)
1720 {
1721     cpu_physical_memory_set_dirty_tracking(0);
1722 }
1723
1724 static void io_region_add(MemoryListener *listener,
1725                           MemoryRegionSection *section)
1726 {
1727     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1728
1729     mrio->mr = section->mr;
1730     mrio->offset = section->offset_within_region;
1731     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1732                  section->offset_within_address_space, section->size);
1733     ioport_register(&mrio->iorange);
1734 }
1735
1736 static void io_region_del(MemoryListener *listener,
1737                           MemoryRegionSection *section)
1738 {
1739     isa_unassign_ioport(section->offset_within_address_space, section->size);
1740 }
1741
1742 static MemoryListener core_memory_listener = {
1743     .begin = core_begin,
1744     .log_global_start = core_log_global_start,
1745     .log_global_stop = core_log_global_stop,
1746     .priority = 1,
1747 };
1748
1749 static MemoryListener io_memory_listener = {
1750     .region_add = io_region_add,
1751     .region_del = io_region_del,
1752     .priority = 0,
1753 };
1754
1755 static MemoryListener tcg_memory_listener = {
1756     .commit = tcg_commit,
1757 };
1758
1759 void address_space_init_dispatch(AddressSpace *as)
1760 {
1761     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1762
1763     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1764     d->listener = (MemoryListener) {
1765         .begin = mem_begin,
1766         .region_add = mem_add,
1767         .region_nop = mem_add,
1768         .priority = 0,
1769     };
1770     as->dispatch = d;
1771     memory_listener_register(&d->listener, as);
1772 }
1773
1774 void address_space_destroy_dispatch(AddressSpace *as)
1775 {
1776     AddressSpaceDispatch *d = as->dispatch;
1777
1778     memory_listener_unregister(&d->listener);
1779     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1780     g_free(d);
1781     as->dispatch = NULL;
1782 }
1783
1784 static void memory_map_init(void)
1785 {
1786     system_memory = g_malloc(sizeof(*system_memory));
1787     memory_region_init(system_memory, "system", INT64_MAX);
1788     address_space_init(&address_space_memory, system_memory);
1789     address_space_memory.name = "memory";
1790
1791     system_io = g_malloc(sizeof(*system_io));
1792     memory_region_init(system_io, "io", 65536);
1793     address_space_init(&address_space_io, system_io);
1794     address_space_io.name = "I/O";
1795
1796     memory_listener_register(&core_memory_listener, &address_space_memory);
1797     memory_listener_register(&io_memory_listener, &address_space_io);
1798     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1799
1800     dma_context_init(&dma_context_memory, &address_space_memory,
1801                      NULL, NULL, NULL);
1802 }
1803
1804 MemoryRegion *get_system_memory(void)
1805 {
1806     return system_memory;
1807 }
1808
1809 MemoryRegion *get_system_io(void)
1810 {
1811     return system_io;
1812 }
1813
1814 #endif /* !defined(CONFIG_USER_ONLY) */
1815
1816 /* physical memory access (slow version, mainly for debug) */
1817 #if defined(CONFIG_USER_ONLY)
1818 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1819                         uint8_t *buf, int len, int is_write)
1820 {
1821     int l, flags;
1822     target_ulong page;
1823     void * p;
1824
1825     while (len > 0) {
1826         page = addr & TARGET_PAGE_MASK;
1827         l = (page + TARGET_PAGE_SIZE) - addr;
1828         if (l > len)
1829             l = len;
1830         flags = page_get_flags(page);
1831         if (!(flags & PAGE_VALID))
1832             return -1;
1833         if (is_write) {
1834             if (!(flags & PAGE_WRITE))
1835                 return -1;
1836             /* XXX: this code should not depend on lock_user */
1837             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1838                 return -1;
1839             memcpy(p, buf, l);
1840             unlock_user(p, addr, l);
1841         } else {
1842             if (!(flags & PAGE_READ))
1843                 return -1;
1844             /* XXX: this code should not depend on lock_user */
1845             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1846                 return -1;
1847             memcpy(buf, p, l);
1848             unlock_user(p, addr, 0);
1849         }
1850         len -= l;
1851         buf += l;
1852         addr += l;
1853     }
1854     return 0;
1855 }
1856
1857 #else
1858
1859 static void invalidate_and_set_dirty(hwaddr addr,
1860                                      hwaddr length)
1861 {
1862     if (!cpu_physical_memory_is_dirty(addr)) {
1863         /* invalidate code */
1864         tb_invalidate_phys_page_range(addr, addr + length, 0);
1865         /* set dirty bit */
1866         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1867     }
1868     xen_modified_memory(addr, length);
1869 }
1870
1871 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1872                       int len, bool is_write)
1873 {
1874     AddressSpaceDispatch *d = as->dispatch;
1875     int l;
1876     uint8_t *ptr;
1877     uint32_t val;
1878     hwaddr page;
1879     MemoryRegionSection *section;
1880
1881     while (len > 0) {
1882         page = addr & TARGET_PAGE_MASK;
1883         l = (page + TARGET_PAGE_SIZE) - addr;
1884         if (l > len)
1885             l = len;
1886         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1887
1888         if (is_write) {
1889             if (!memory_region_is_ram(section->mr)) {
1890                 hwaddr addr1;
1891                 addr1 = memory_region_section_addr(section, addr);
1892                 /* XXX: could force cpu_single_env to NULL to avoid
1893                    potential bugs */
1894                 if (l >= 4 && ((addr1 & 3) == 0)) {
1895                     /* 32 bit write access */
1896                     val = ldl_p(buf);
1897                     io_mem_write(section->mr, addr1, val, 4);
1898                     l = 4;
1899                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1900                     /* 16 bit write access */
1901                     val = lduw_p(buf);
1902                     io_mem_write(section->mr, addr1, val, 2);
1903                     l = 2;
1904                 } else {
1905                     /* 8 bit write access */
1906                     val = ldub_p(buf);
1907                     io_mem_write(section->mr, addr1, val, 1);
1908                     l = 1;
1909                 }
1910             } else if (!section->readonly) {
1911                 ram_addr_t addr1;
1912                 addr1 = memory_region_get_ram_addr(section->mr)
1913                     + memory_region_section_addr(section, addr);
1914                 /* RAM case */
1915                 ptr = qemu_get_ram_ptr(addr1);
1916                 memcpy(ptr, buf, l);
1917                 invalidate_and_set_dirty(addr1, l);
1918                 qemu_put_ram_ptr(ptr);
1919             }
1920         } else {
1921             if (!(memory_region_is_ram(section->mr) ||
1922                   memory_region_is_romd(section->mr))) {
1923                 hwaddr addr1;
1924                 /* I/O case */
1925                 addr1 = memory_region_section_addr(section, addr);
1926                 if (l >= 4 && ((addr1 & 3) == 0)) {
1927                     /* 32 bit read access */
1928                     val = io_mem_read(section->mr, addr1, 4);
1929                     stl_p(buf, val);
1930                     l = 4;
1931                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1932                     /* 16 bit read access */
1933                     val = io_mem_read(section->mr, addr1, 2);
1934                     stw_p(buf, val);
1935                     l = 2;
1936                 } else {
1937                     /* 8 bit read access */
1938                     val = io_mem_read(section->mr, addr1, 1);
1939                     stb_p(buf, val);
1940                     l = 1;
1941                 }
1942             } else {
1943                 /* RAM case */
1944                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1945                                        + memory_region_section_addr(section,
1946                                                                     addr));
1947                 memcpy(buf, ptr, l);
1948                 qemu_put_ram_ptr(ptr);
1949             }
1950         }
1951         len -= l;
1952         buf += l;
1953         addr += l;
1954     }
1955 }
1956
1957 void address_space_write(AddressSpace *as, hwaddr addr,
1958                          const uint8_t *buf, int len)
1959 {
1960     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1961 }
1962
1963 /**
1964  * address_space_read: read from an address space.
1965  *
1966  * @as: #AddressSpace to be accessed
1967  * @addr: address within that address space
1968  * @buf: buffer with the data transferred
1969  */
1970 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1971 {
1972     address_space_rw(as, addr, buf, len, false);
1973 }
1974
1975
1976 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1977                             int len, int is_write)
1978 {
1979     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1980 }
1981
1982 /* used for ROM loading : can write in RAM and ROM */
1983 void cpu_physical_memory_write_rom(hwaddr addr,
1984                                    const uint8_t *buf, int len)
1985 {
1986     AddressSpaceDispatch *d = address_space_memory.dispatch;
1987     int l;
1988     uint8_t *ptr;
1989     hwaddr page;
1990     MemoryRegionSection *section;
1991
1992     while (len > 0) {
1993         page = addr & TARGET_PAGE_MASK;
1994         l = (page + TARGET_PAGE_SIZE) - addr;
1995         if (l > len)
1996             l = len;
1997         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1998
1999         if (!(memory_region_is_ram(section->mr) ||
2000               memory_region_is_romd(section->mr))) {
2001             /* do nothing */
2002         } else {
2003             unsigned long addr1;
2004             addr1 = memory_region_get_ram_addr(section->mr)
2005                 + memory_region_section_addr(section, addr);
2006             /* ROM/RAM case */
2007             ptr = qemu_get_ram_ptr(addr1);
2008             memcpy(ptr, buf, l);
2009             invalidate_and_set_dirty(addr1, l);
2010             qemu_put_ram_ptr(ptr);
2011         }
2012         len -= l;
2013         buf += l;
2014         addr += l;
2015     }
2016 }
2017
2018 typedef struct {
2019     void *buffer;
2020     hwaddr addr;
2021     hwaddr len;
2022 } BounceBuffer;
2023
2024 static BounceBuffer bounce;
2025
2026 typedef struct MapClient {
2027     void *opaque;
2028     void (*callback)(void *opaque);
2029     QLIST_ENTRY(MapClient) link;
2030 } MapClient;
2031
2032 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2033     = QLIST_HEAD_INITIALIZER(map_client_list);
2034
2035 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2036 {
2037     MapClient *client = g_malloc(sizeof(*client));
2038
2039     client->opaque = opaque;
2040     client->callback = callback;
2041     QLIST_INSERT_HEAD(&map_client_list, client, link);
2042     return client;
2043 }
2044
2045 static void cpu_unregister_map_client(void *_client)
2046 {
2047     MapClient *client = (MapClient *)_client;
2048
2049     QLIST_REMOVE(client, link);
2050     g_free(client);
2051 }
2052
2053 static void cpu_notify_map_clients(void)
2054 {
2055     MapClient *client;
2056
2057     while (!QLIST_EMPTY(&map_client_list)) {
2058         client = QLIST_FIRST(&map_client_list);
2059         client->callback(client->opaque);
2060         cpu_unregister_map_client(client);
2061     }
2062 }
2063
2064 /* Map a physical memory region into a host virtual address.
2065  * May map a subset of the requested range, given by and returned in *plen.
2066  * May return NULL if resources needed to perform the mapping are exhausted.
2067  * Use only for reads OR writes - not for read-modify-write operations.
2068  * Use cpu_register_map_client() to know when retrying the map operation is
2069  * likely to succeed.
2070  */
2071 void *address_space_map(AddressSpace *as,
2072                         hwaddr addr,
2073                         hwaddr *plen,
2074                         bool is_write)
2075 {
2076     AddressSpaceDispatch *d = as->dispatch;
2077     hwaddr len = *plen;
2078     hwaddr todo = 0;
2079     int l;
2080     hwaddr page;
2081     MemoryRegionSection *section;
2082     ram_addr_t raddr = RAM_ADDR_MAX;
2083     ram_addr_t rlen;
2084     void *ret;
2085
2086     while (len > 0) {
2087         page = addr & TARGET_PAGE_MASK;
2088         l = (page + TARGET_PAGE_SIZE) - addr;
2089         if (l > len)
2090             l = len;
2091         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2092
2093         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2094             if (todo || bounce.buffer) {
2095                 break;
2096             }
2097             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2098             bounce.addr = addr;
2099             bounce.len = l;
2100             if (!is_write) {
2101                 address_space_read(as, addr, bounce.buffer, l);
2102             }
2103
2104             *plen = l;
2105             return bounce.buffer;
2106         }
2107         if (!todo) {
2108             raddr = memory_region_get_ram_addr(section->mr)
2109                 + memory_region_section_addr(section, addr);
2110         }
2111
2112         len -= l;
2113         addr += l;
2114         todo += l;
2115     }
2116     rlen = todo;
2117     ret = qemu_ram_ptr_length(raddr, &rlen);
2118     *plen = rlen;
2119     return ret;
2120 }
2121
2122 /* Unmaps a memory region previously mapped by address_space_map().
2123  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2124  * the amount of memory that was actually read or written by the caller.
2125  */
2126 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2127                          int is_write, hwaddr access_len)
2128 {
2129     if (buffer != bounce.buffer) {
2130         if (is_write) {
2131             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2132             while (access_len) {
2133                 unsigned l;
2134                 l = TARGET_PAGE_SIZE;
2135                 if (l > access_len)
2136                     l = access_len;
2137                 invalidate_and_set_dirty(addr1, l);
2138                 addr1 += l;
2139                 access_len -= l;
2140             }
2141         }
2142         if (xen_enabled()) {
2143             xen_invalidate_map_cache_entry(buffer);
2144         }
2145         return;
2146     }
2147     if (is_write) {
2148         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2149     }
2150     qemu_vfree(bounce.buffer);
2151     bounce.buffer = NULL;
2152     cpu_notify_map_clients();
2153 }
2154
2155 void *cpu_physical_memory_map(hwaddr addr,
2156                               hwaddr *plen,
2157                               int is_write)
2158 {
2159     return address_space_map(&address_space_memory, addr, plen, is_write);
2160 }
2161
2162 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2163                                int is_write, hwaddr access_len)
2164 {
2165     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2166 }
2167
2168 /* warning: addr must be aligned */
2169 static inline uint32_t ldl_phys_internal(hwaddr addr,
2170                                          enum device_endian endian)
2171 {
2172     uint8_t *ptr;
2173     uint32_t val;
2174     MemoryRegionSection *section;
2175
2176     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2177
2178     if (!(memory_region_is_ram(section->mr) ||
2179           memory_region_is_romd(section->mr))) {
2180         /* I/O case */
2181         addr = memory_region_section_addr(section, addr);
2182         val = io_mem_read(section->mr, addr, 4);
2183 #if defined(TARGET_WORDS_BIGENDIAN)
2184         if (endian == DEVICE_LITTLE_ENDIAN) {
2185             val = bswap32(val);
2186         }
2187 #else
2188         if (endian == DEVICE_BIG_ENDIAN) {
2189             val = bswap32(val);
2190         }
2191 #endif
2192     } else {
2193         /* RAM case */
2194         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2195                                 & TARGET_PAGE_MASK)
2196                                + memory_region_section_addr(section, addr));
2197         switch (endian) {
2198         case DEVICE_LITTLE_ENDIAN:
2199             val = ldl_le_p(ptr);
2200             break;
2201         case DEVICE_BIG_ENDIAN:
2202             val = ldl_be_p(ptr);
2203             break;
2204         default:
2205             val = ldl_p(ptr);
2206             break;
2207         }
2208     }
2209     return val;
2210 }
2211
2212 uint32_t ldl_phys(hwaddr addr)
2213 {
2214     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2215 }
2216
2217 uint32_t ldl_le_phys(hwaddr addr)
2218 {
2219     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2220 }
2221
2222 uint32_t ldl_be_phys(hwaddr addr)
2223 {
2224     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2225 }
2226
2227 /* warning: addr must be aligned */
2228 static inline uint64_t ldq_phys_internal(hwaddr addr,
2229                                          enum device_endian endian)
2230 {
2231     uint8_t *ptr;
2232     uint64_t val;
2233     MemoryRegionSection *section;
2234
2235     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2236
2237     if (!(memory_region_is_ram(section->mr) ||
2238           memory_region_is_romd(section->mr))) {
2239         /* I/O case */
2240         addr = memory_region_section_addr(section, addr);
2241
2242         /* XXX This is broken when device endian != cpu endian.
2243                Fix and add "endian" variable check */
2244 #ifdef TARGET_WORDS_BIGENDIAN
2245         val = io_mem_read(section->mr, addr, 4) << 32;
2246         val |= io_mem_read(section->mr, addr + 4, 4);
2247 #else
2248         val = io_mem_read(section->mr, addr, 4);
2249         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2250 #endif
2251     } else {
2252         /* RAM case */
2253         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2254                                 & TARGET_PAGE_MASK)
2255                                + memory_region_section_addr(section, addr));
2256         switch (endian) {
2257         case DEVICE_LITTLE_ENDIAN:
2258             val = ldq_le_p(ptr);
2259             break;
2260         case DEVICE_BIG_ENDIAN:
2261             val = ldq_be_p(ptr);
2262             break;
2263         default:
2264             val = ldq_p(ptr);
2265             break;
2266         }
2267     }
2268     return val;
2269 }
2270
2271 uint64_t ldq_phys(hwaddr addr)
2272 {
2273     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2274 }
2275
2276 uint64_t ldq_le_phys(hwaddr addr)
2277 {
2278     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2279 }
2280
2281 uint64_t ldq_be_phys(hwaddr addr)
2282 {
2283     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2284 }
2285
2286 /* XXX: optimize */
2287 uint32_t ldub_phys(hwaddr addr)
2288 {
2289     uint8_t val;
2290     cpu_physical_memory_read(addr, &val, 1);
2291     return val;
2292 }
2293
2294 /* warning: addr must be aligned */
2295 static inline uint32_t lduw_phys_internal(hwaddr addr,
2296                                           enum device_endian endian)
2297 {
2298     uint8_t *ptr;
2299     uint64_t val;
2300     MemoryRegionSection *section;
2301
2302     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2303
2304     if (!(memory_region_is_ram(section->mr) ||
2305           memory_region_is_romd(section->mr))) {
2306         /* I/O case */
2307         addr = memory_region_section_addr(section, addr);
2308         val = io_mem_read(section->mr, addr, 2);
2309 #if defined(TARGET_WORDS_BIGENDIAN)
2310         if (endian == DEVICE_LITTLE_ENDIAN) {
2311             val = bswap16(val);
2312         }
2313 #else
2314         if (endian == DEVICE_BIG_ENDIAN) {
2315             val = bswap16(val);
2316         }
2317 #endif
2318     } else {
2319         /* RAM case */
2320         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2321                                 & TARGET_PAGE_MASK)
2322                                + memory_region_section_addr(section, addr));
2323         switch (endian) {
2324         case DEVICE_LITTLE_ENDIAN:
2325             val = lduw_le_p(ptr);
2326             break;
2327         case DEVICE_BIG_ENDIAN:
2328             val = lduw_be_p(ptr);
2329             break;
2330         default:
2331             val = lduw_p(ptr);
2332             break;
2333         }
2334     }
2335     return val;
2336 }
2337
2338 uint32_t lduw_phys(hwaddr addr)
2339 {
2340     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2341 }
2342
2343 uint32_t lduw_le_phys(hwaddr addr)
2344 {
2345     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2346 }
2347
2348 uint32_t lduw_be_phys(hwaddr addr)
2349 {
2350     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2351 }
2352
2353 /* warning: addr must be aligned. The ram page is not masked as dirty
2354    and the code inside is not invalidated. It is useful if the dirty
2355    bits are used to track modified PTEs */
2356 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2357 {
2358     uint8_t *ptr;
2359     MemoryRegionSection *section;
2360
2361     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2362
2363     if (!memory_region_is_ram(section->mr) || section->readonly) {
2364         addr = memory_region_section_addr(section, addr);
2365         if (memory_region_is_ram(section->mr)) {
2366             section = &phys_sections[phys_section_rom];
2367         }
2368         io_mem_write(section->mr, addr, val, 4);
2369     } else {
2370         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2371                                & TARGET_PAGE_MASK)
2372             + memory_region_section_addr(section, addr);
2373         ptr = qemu_get_ram_ptr(addr1);
2374         stl_p(ptr, val);
2375
2376         if (unlikely(in_migration)) {
2377             if (!cpu_physical_memory_is_dirty(addr1)) {
2378                 /* invalidate code */
2379                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2380                 /* set dirty bit */
2381                 cpu_physical_memory_set_dirty_flags(
2382                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2383             }
2384         }
2385     }
2386 }
2387
2388 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2389 {
2390     uint8_t *ptr;
2391     MemoryRegionSection *section;
2392
2393     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2394
2395     if (!memory_region_is_ram(section->mr) || section->readonly) {
2396         addr = memory_region_section_addr(section, addr);
2397         if (memory_region_is_ram(section->mr)) {
2398             section = &phys_sections[phys_section_rom];
2399         }
2400 #ifdef TARGET_WORDS_BIGENDIAN
2401         io_mem_write(section->mr, addr, val >> 32, 4);
2402         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2403 #else
2404         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2405         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2406 #endif
2407     } else {
2408         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2409                                 & TARGET_PAGE_MASK)
2410                                + memory_region_section_addr(section, addr));
2411         stq_p(ptr, val);
2412     }
2413 }
2414
2415 /* warning: addr must be aligned */
2416 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2417                                      enum device_endian endian)
2418 {
2419     uint8_t *ptr;
2420     MemoryRegionSection *section;
2421
2422     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2423
2424     if (!memory_region_is_ram(section->mr) || section->readonly) {
2425         addr = memory_region_section_addr(section, addr);
2426         if (memory_region_is_ram(section->mr)) {
2427             section = &phys_sections[phys_section_rom];
2428         }
2429 #if defined(TARGET_WORDS_BIGENDIAN)
2430         if (endian == DEVICE_LITTLE_ENDIAN) {
2431             val = bswap32(val);
2432         }
2433 #else
2434         if (endian == DEVICE_BIG_ENDIAN) {
2435             val = bswap32(val);
2436         }
2437 #endif
2438         io_mem_write(section->mr, addr, val, 4);
2439     } else {
2440         unsigned long addr1;
2441         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2442             + memory_region_section_addr(section, addr);
2443         /* RAM case */
2444         ptr = qemu_get_ram_ptr(addr1);
2445         switch (endian) {
2446         case DEVICE_LITTLE_ENDIAN:
2447             stl_le_p(ptr, val);
2448             break;
2449         case DEVICE_BIG_ENDIAN:
2450             stl_be_p(ptr, val);
2451             break;
2452         default:
2453             stl_p(ptr, val);
2454             break;
2455         }
2456         invalidate_and_set_dirty(addr1, 4);
2457     }
2458 }
2459
2460 void stl_phys(hwaddr addr, uint32_t val)
2461 {
2462     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2463 }
2464
2465 void stl_le_phys(hwaddr addr, uint32_t val)
2466 {
2467     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2468 }
2469
2470 void stl_be_phys(hwaddr addr, uint32_t val)
2471 {
2472     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2473 }
2474
2475 /* XXX: optimize */
2476 void stb_phys(hwaddr addr, uint32_t val)
2477 {
2478     uint8_t v = val;
2479     cpu_physical_memory_write(addr, &v, 1);
2480 }
2481
2482 /* warning: addr must be aligned */
2483 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2484                                      enum device_endian endian)
2485 {
2486     uint8_t *ptr;
2487     MemoryRegionSection *section;
2488
2489     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2490
2491     if (!memory_region_is_ram(section->mr) || section->readonly) {
2492         addr = memory_region_section_addr(section, addr);
2493         if (memory_region_is_ram(section->mr)) {
2494             section = &phys_sections[phys_section_rom];
2495         }
2496 #if defined(TARGET_WORDS_BIGENDIAN)
2497         if (endian == DEVICE_LITTLE_ENDIAN) {
2498             val = bswap16(val);
2499         }
2500 #else
2501         if (endian == DEVICE_BIG_ENDIAN) {
2502             val = bswap16(val);
2503         }
2504 #endif
2505         io_mem_write(section->mr, addr, val, 2);
2506     } else {
2507         unsigned long addr1;
2508         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2509             + memory_region_section_addr(section, addr);
2510         /* RAM case */
2511         ptr = qemu_get_ram_ptr(addr1);
2512         switch (endian) {
2513         case DEVICE_LITTLE_ENDIAN:
2514             stw_le_p(ptr, val);
2515             break;
2516         case DEVICE_BIG_ENDIAN:
2517             stw_be_p(ptr, val);
2518             break;
2519         default:
2520             stw_p(ptr, val);
2521             break;
2522         }
2523         invalidate_and_set_dirty(addr1, 2);
2524     }
2525 }
2526
2527 void stw_phys(hwaddr addr, uint32_t val)
2528 {
2529     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2530 }
2531
2532 void stw_le_phys(hwaddr addr, uint32_t val)
2533 {
2534     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2535 }
2536
2537 void stw_be_phys(hwaddr addr, uint32_t val)
2538 {
2539     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2540 }
2541
2542 /* XXX: optimize */
2543 void stq_phys(hwaddr addr, uint64_t val)
2544 {
2545     val = tswap64(val);
2546     cpu_physical_memory_write(addr, &val, 8);
2547 }
2548
2549 void stq_le_phys(hwaddr addr, uint64_t val)
2550 {
2551     val = cpu_to_le64(val);
2552     cpu_physical_memory_write(addr, &val, 8);
2553 }
2554
2555 void stq_be_phys(hwaddr addr, uint64_t val)
2556 {
2557     val = cpu_to_be64(val);
2558     cpu_physical_memory_write(addr, &val, 8);
2559 }
2560
2561 /* virtual memory access for debug (includes writing to ROM) */
2562 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2563                         uint8_t *buf, int len, int is_write)
2564 {
2565     int l;
2566     hwaddr phys_addr;
2567     target_ulong page;
2568
2569     while (len > 0) {
2570         page = addr & TARGET_PAGE_MASK;
2571         phys_addr = cpu_get_phys_page_debug(env, page);
2572         /* if no physical page mapped, return an error */
2573         if (phys_addr == -1)
2574             return -1;
2575         l = (page + TARGET_PAGE_SIZE) - addr;
2576         if (l > len)
2577             l = len;
2578         phys_addr += (addr & ~TARGET_PAGE_MASK);
2579         if (is_write)
2580             cpu_physical_memory_write_rom(phys_addr, buf, l);
2581         else
2582             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2583         len -= l;
2584         buf += l;
2585         addr += l;
2586     }
2587     return 0;
2588 }
2589 #endif
2590
2591 #if !defined(CONFIG_USER_ONLY)
2592
2593 /*
2594  * A helper function for the _utterly broken_ virtio device model to find out if
2595  * it's running on a big endian machine. Don't do this at home kids!
2596  */
2597 bool virtio_is_big_endian(void);
2598 bool virtio_is_big_endian(void)
2599 {
2600 #if defined(TARGET_WORDS_BIGENDIAN)
2601     return true;
2602 #else
2603     return false;
2604 #endif
2605 }
2606
2607 #endif
2608
2609 #ifndef CONFIG_USER_ONLY
2610 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2611 {
2612     MemoryRegionSection *section;
2613
2614     section = phys_page_find(address_space_memory.dispatch,
2615                              phys_addr >> TARGET_PAGE_BITS);
2616
2617     return !(memory_region_is_ram(section->mr) ||
2618              memory_region_is_romd(section->mr));
2619 }
2620 #endif
This page took 0.157101 seconds and 2 git commands to generate.