]> Git Repo - qemu.git/blob - exec.c
migration: move exit condition to migration thread
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUArchState *env = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     env->interrupt_request &= ~0x01;
231     tlb_flush(env, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUArchState),
244         VMSTATE_UINT32(interrupt_request, CPUArchState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #endif
249
250 CPUState *qemu_get_cpu(int index)
251 {
252     CPUArchState *env = first_cpu;
253     CPUState *cpu = NULL;
254
255     while (env) {
256         cpu = ENV_GET_CPU(env);
257         if (cpu->cpu_index == index) {
258             break;
259         }
260         env = env->next_cpu;
261     }
262
263     return cpu;
264 }
265
266 void cpu_exec_init(CPUArchState *env)
267 {
268     CPUState *cpu = ENV_GET_CPU(env);
269     CPUArchState **penv;
270     int cpu_index;
271
272 #if defined(CONFIG_USER_ONLY)
273     cpu_list_lock();
274 #endif
275     env->next_cpu = NULL;
276     penv = &first_cpu;
277     cpu_index = 0;
278     while (*penv != NULL) {
279         penv = &(*penv)->next_cpu;
280         cpu_index++;
281     }
282     cpu->cpu_index = cpu_index;
283     cpu->numa_node = 0;
284     QTAILQ_INIT(&env->breakpoints);
285     QTAILQ_INIT(&env->watchpoints);
286 #ifndef CONFIG_USER_ONLY
287     cpu->thread_id = qemu_get_thread_id();
288 #endif
289     *penv = env;
290 #if defined(CONFIG_USER_ONLY)
291     cpu_list_unlock();
292 #endif
293 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
294     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
295     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
296                     cpu_save, cpu_load, env);
297 #endif
298 }
299
300 #if defined(TARGET_HAS_ICE)
301 #if defined(CONFIG_USER_ONLY)
302 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
303 {
304     tb_invalidate_phys_page_range(pc, pc + 1, 0);
305 }
306 #else
307 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
308 {
309     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
310             (pc & ~TARGET_PAGE_MASK));
311 }
312 #endif
313 #endif /* TARGET_HAS_ICE */
314
315 #if defined(CONFIG_USER_ONLY)
316 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
317
318 {
319 }
320
321 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
322                           int flags, CPUWatchpoint **watchpoint)
323 {
324     return -ENOSYS;
325 }
326 #else
327 /* Add a watchpoint.  */
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     target_ulong len_mask = ~(len - 1);
332     CPUWatchpoint *wp;
333
334     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
335     if ((len & (len - 1)) || (addr & ~len_mask) ||
336             len == 0 || len > TARGET_PAGE_SIZE) {
337         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
338                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
339         return -EINVAL;
340     }
341     wp = g_malloc(sizeof(*wp));
342
343     wp->vaddr = addr;
344     wp->len_mask = len_mask;
345     wp->flags = flags;
346
347     /* keep all GDB-injected watchpoints in front */
348     if (flags & BP_GDB)
349         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
350     else
351         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
352
353     tlb_flush_page(env, addr);
354
355     if (watchpoint)
356         *watchpoint = wp;
357     return 0;
358 }
359
360 /* Remove a specific watchpoint.  */
361 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
362                           int flags)
363 {
364     target_ulong len_mask = ~(len - 1);
365     CPUWatchpoint *wp;
366
367     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
368         if (addr == wp->vaddr && len_mask == wp->len_mask
369                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
370             cpu_watchpoint_remove_by_ref(env, wp);
371             return 0;
372         }
373     }
374     return -ENOENT;
375 }
376
377 /* Remove a specific watchpoint by reference.  */
378 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
379 {
380     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
381
382     tlb_flush_page(env, watchpoint->vaddr);
383
384     g_free(watchpoint);
385 }
386
387 /* Remove all matching watchpoints.  */
388 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
389 {
390     CPUWatchpoint *wp, *next;
391
392     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
393         if (wp->flags & mask)
394             cpu_watchpoint_remove_by_ref(env, wp);
395     }
396 }
397 #endif
398
399 /* Add a breakpoint.  */
400 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
401                           CPUBreakpoint **breakpoint)
402 {
403 #if defined(TARGET_HAS_ICE)
404     CPUBreakpoint *bp;
405
406     bp = g_malloc(sizeof(*bp));
407
408     bp->pc = pc;
409     bp->flags = flags;
410
411     /* keep all GDB-injected breakpoints in front */
412     if (flags & BP_GDB)
413         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
414     else
415         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
416
417     breakpoint_invalidate(env, pc);
418
419     if (breakpoint)
420         *breakpoint = bp;
421     return 0;
422 #else
423     return -ENOSYS;
424 #endif
425 }
426
427 /* Remove a specific breakpoint.  */
428 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
429 {
430 #if defined(TARGET_HAS_ICE)
431     CPUBreakpoint *bp;
432
433     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
434         if (bp->pc == pc && bp->flags == flags) {
435             cpu_breakpoint_remove_by_ref(env, bp);
436             return 0;
437         }
438     }
439     return -ENOENT;
440 #else
441     return -ENOSYS;
442 #endif
443 }
444
445 /* Remove a specific breakpoint by reference.  */
446 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
447 {
448 #if defined(TARGET_HAS_ICE)
449     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
450
451     breakpoint_invalidate(env, breakpoint->pc);
452
453     g_free(breakpoint);
454 #endif
455 }
456
457 /* Remove all matching breakpoints. */
458 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
459 {
460 #if defined(TARGET_HAS_ICE)
461     CPUBreakpoint *bp, *next;
462
463     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
464         if (bp->flags & mask)
465             cpu_breakpoint_remove_by_ref(env, bp);
466     }
467 #endif
468 }
469
470 /* enable or disable single step mode. EXCP_DEBUG is returned by the
471    CPU loop after each instruction */
472 void cpu_single_step(CPUArchState *env, int enabled)
473 {
474 #if defined(TARGET_HAS_ICE)
475     if (env->singlestep_enabled != enabled) {
476         env->singlestep_enabled = enabled;
477         if (kvm_enabled())
478             kvm_update_guest_debug(env, 0);
479         else {
480             /* must flush all the translated code to avoid inconsistencies */
481             /* XXX: only flush what is necessary */
482             tb_flush(env);
483         }
484     }
485 #endif
486 }
487
488 void cpu_reset_interrupt(CPUArchState *env, int mask)
489 {
490     env->interrupt_request &= ~mask;
491 }
492
493 void cpu_exit(CPUArchState *env)
494 {
495     env->exit_request = 1;
496     cpu_unlink_tb(env);
497 }
498
499 void cpu_abort(CPUArchState *env, const char *fmt, ...)
500 {
501     va_list ap;
502     va_list ap2;
503
504     va_start(ap, fmt);
505     va_copy(ap2, ap);
506     fprintf(stderr, "qemu: fatal: ");
507     vfprintf(stderr, fmt, ap);
508     fprintf(stderr, "\n");
509     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
510     if (qemu_log_enabled()) {
511         qemu_log("qemu: fatal: ");
512         qemu_log_vprintf(fmt, ap2);
513         qemu_log("\n");
514         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
515         qemu_log_flush();
516         qemu_log_close();
517     }
518     va_end(ap2);
519     va_end(ap);
520 #if defined(CONFIG_USER_ONLY)
521     {
522         struct sigaction act;
523         sigfillset(&act.sa_mask);
524         act.sa_handler = SIG_DFL;
525         sigaction(SIGABRT, &act, NULL);
526     }
527 #endif
528     abort();
529 }
530
531 CPUArchState *cpu_copy(CPUArchState *env)
532 {
533     CPUArchState *new_env = cpu_init(env->cpu_model_str);
534     CPUArchState *next_cpu = new_env->next_cpu;
535 #if defined(TARGET_HAS_ICE)
536     CPUBreakpoint *bp;
537     CPUWatchpoint *wp;
538 #endif
539
540     memcpy(new_env, env, sizeof(CPUArchState));
541
542     /* Preserve chaining. */
543     new_env->next_cpu = next_cpu;
544
545     /* Clone all break/watchpoints.
546        Note: Once we support ptrace with hw-debug register access, make sure
547        BP_CPU break/watchpoints are handled correctly on clone. */
548     QTAILQ_INIT(&env->breakpoints);
549     QTAILQ_INIT(&env->watchpoints);
550 #if defined(TARGET_HAS_ICE)
551     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
552         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
553     }
554     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
555         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
556                               wp->flags, NULL);
557     }
558 #endif
559
560     return new_env;
561 }
562
563 #if !defined(CONFIG_USER_ONLY)
564 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
565                                       uintptr_t length)
566 {
567     uintptr_t start1;
568
569     /* we modify the TLB cache so that the dirty bit will be set again
570        when accessing the range */
571     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
572     /* Check that we don't span multiple blocks - this breaks the
573        address comparisons below.  */
574     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
575             != (end - 1) - start) {
576         abort();
577     }
578     cpu_tlb_reset_dirty_all(start1, length);
579
580 }
581
582 /* Note: start and end must be within the same ram block.  */
583 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
584                                      int dirty_flags)
585 {
586     uintptr_t length;
587
588     start &= TARGET_PAGE_MASK;
589     end = TARGET_PAGE_ALIGN(end);
590
591     length = end - start;
592     if (length == 0)
593         return;
594     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
595
596     if (tcg_enabled()) {
597         tlb_reset_dirty_range_all(start, end, length);
598     }
599 }
600
601 static int cpu_physical_memory_set_dirty_tracking(int enable)
602 {
603     int ret = 0;
604     in_migration = enable;
605     return ret;
606 }
607
608 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
609                                                    MemoryRegionSection *section,
610                                                    target_ulong vaddr,
611                                                    hwaddr paddr,
612                                                    int prot,
613                                                    target_ulong *address)
614 {
615     hwaddr iotlb;
616     CPUWatchpoint *wp;
617
618     if (memory_region_is_ram(section->mr)) {
619         /* Normal RAM.  */
620         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
621             + memory_region_section_addr(section, paddr);
622         if (!section->readonly) {
623             iotlb |= phys_section_notdirty;
624         } else {
625             iotlb |= phys_section_rom;
626         }
627     } else {
628         /* IO handlers are currently passed a physical address.
629            It would be nice to pass an offset from the base address
630            of that region.  This would avoid having to special case RAM,
631            and avoid full address decoding in every device.
632            We can't use the high bits of pd for this because
633            IO_MEM_ROMD uses these as a ram address.  */
634         iotlb = section - phys_sections;
635         iotlb += memory_region_section_addr(section, paddr);
636     }
637
638     /* Make accesses to pages with watchpoints go via the
639        watchpoint trap routines.  */
640     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
641         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
642             /* Avoid trapping reads of pages with a write breakpoint. */
643             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
644                 iotlb = phys_section_watch + paddr;
645                 *address |= TLB_MMIO;
646                 break;
647             }
648         }
649     }
650
651     return iotlb;
652 }
653 #endif /* defined(CONFIG_USER_ONLY) */
654
655 #if !defined(CONFIG_USER_ONLY)
656
657 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
658 typedef struct subpage_t {
659     MemoryRegion iomem;
660     hwaddr base;
661     uint16_t sub_section[TARGET_PAGE_SIZE];
662 } subpage_t;
663
664 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
665                              uint16_t section);
666 static subpage_t *subpage_init(hwaddr base);
667 static void destroy_page_desc(uint16_t section_index)
668 {
669     MemoryRegionSection *section = &phys_sections[section_index];
670     MemoryRegion *mr = section->mr;
671
672     if (mr->subpage) {
673         subpage_t *subpage = container_of(mr, subpage_t, iomem);
674         memory_region_destroy(&subpage->iomem);
675         g_free(subpage);
676     }
677 }
678
679 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
680 {
681     unsigned i;
682     PhysPageEntry *p;
683
684     if (lp->ptr == PHYS_MAP_NODE_NIL) {
685         return;
686     }
687
688     p = phys_map_nodes[lp->ptr];
689     for (i = 0; i < L2_SIZE; ++i) {
690         if (!p[i].is_leaf) {
691             destroy_l2_mapping(&p[i], level - 1);
692         } else {
693             destroy_page_desc(p[i].ptr);
694         }
695     }
696     lp->is_leaf = 0;
697     lp->ptr = PHYS_MAP_NODE_NIL;
698 }
699
700 static void destroy_all_mappings(AddressSpaceDispatch *d)
701 {
702     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
703     phys_map_nodes_reset();
704 }
705
706 static uint16_t phys_section_add(MemoryRegionSection *section)
707 {
708     if (phys_sections_nb == phys_sections_nb_alloc) {
709         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
710         phys_sections = g_renew(MemoryRegionSection, phys_sections,
711                                 phys_sections_nb_alloc);
712     }
713     phys_sections[phys_sections_nb] = *section;
714     return phys_sections_nb++;
715 }
716
717 static void phys_sections_clear(void)
718 {
719     phys_sections_nb = 0;
720 }
721
722 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
723 {
724     subpage_t *subpage;
725     hwaddr base = section->offset_within_address_space
726         & TARGET_PAGE_MASK;
727     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
728     MemoryRegionSection subsection = {
729         .offset_within_address_space = base,
730         .size = TARGET_PAGE_SIZE,
731     };
732     hwaddr start, end;
733
734     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
735
736     if (!(existing->mr->subpage)) {
737         subpage = subpage_init(base);
738         subsection.mr = &subpage->iomem;
739         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
740                       phys_section_add(&subsection));
741     } else {
742         subpage = container_of(existing->mr, subpage_t, iomem);
743     }
744     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
745     end = start + section->size - 1;
746     subpage_register(subpage, start, end, phys_section_add(section));
747 }
748
749
750 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
751 {
752     hwaddr start_addr = section->offset_within_address_space;
753     ram_addr_t size = section->size;
754     hwaddr addr;
755     uint16_t section_index = phys_section_add(section);
756
757     assert(size);
758
759     addr = start_addr;
760     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
761                   section_index);
762 }
763
764 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
765 {
766     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
767     MemoryRegionSection now = *section, remain = *section;
768
769     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
770         || (now.size < TARGET_PAGE_SIZE)) {
771         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
772                        - now.offset_within_address_space,
773                        now.size);
774         register_subpage(d, &now);
775         remain.size -= now.size;
776         remain.offset_within_address_space += now.size;
777         remain.offset_within_region += now.size;
778     }
779     while (remain.size >= TARGET_PAGE_SIZE) {
780         now = remain;
781         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
782             now.size = TARGET_PAGE_SIZE;
783             register_subpage(d, &now);
784         } else {
785             now.size &= TARGET_PAGE_MASK;
786             register_multipage(d, &now);
787         }
788         remain.size -= now.size;
789         remain.offset_within_address_space += now.size;
790         remain.offset_within_region += now.size;
791     }
792     now = remain;
793     if (now.size) {
794         register_subpage(d, &now);
795     }
796 }
797
798 void qemu_flush_coalesced_mmio_buffer(void)
799 {
800     if (kvm_enabled())
801         kvm_flush_coalesced_mmio_buffer();
802 }
803
804 void qemu_mutex_lock_ramlist(void)
805 {
806     qemu_mutex_lock(&ram_list.mutex);
807 }
808
809 void qemu_mutex_unlock_ramlist(void)
810 {
811     qemu_mutex_unlock(&ram_list.mutex);
812 }
813
814 #if defined(__linux__) && !defined(TARGET_S390X)
815
816 #include <sys/vfs.h>
817
818 #define HUGETLBFS_MAGIC       0x958458f6
819
820 static long gethugepagesize(const char *path)
821 {
822     struct statfs fs;
823     int ret;
824
825     do {
826         ret = statfs(path, &fs);
827     } while (ret != 0 && errno == EINTR);
828
829     if (ret != 0) {
830         perror(path);
831         return 0;
832     }
833
834     if (fs.f_type != HUGETLBFS_MAGIC)
835         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
836
837     return fs.f_bsize;
838 }
839
840 static void *file_ram_alloc(RAMBlock *block,
841                             ram_addr_t memory,
842                             const char *path)
843 {
844     char *filename;
845     void *area;
846     int fd;
847 #ifdef MAP_POPULATE
848     int flags;
849 #endif
850     unsigned long hpagesize;
851
852     hpagesize = gethugepagesize(path);
853     if (!hpagesize) {
854         return NULL;
855     }
856
857     if (memory < hpagesize) {
858         return NULL;
859     }
860
861     if (kvm_enabled() && !kvm_has_sync_mmu()) {
862         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
863         return NULL;
864     }
865
866     if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
867         return NULL;
868     }
869
870     fd = mkstemp(filename);
871     if (fd < 0) {
872         perror("unable to create backing store for hugepages");
873         free(filename);
874         return NULL;
875     }
876     unlink(filename);
877     free(filename);
878
879     memory = (memory+hpagesize-1) & ~(hpagesize-1);
880
881     /*
882      * ftruncate is not supported by hugetlbfs in older
883      * hosts, so don't bother bailing out on errors.
884      * If anything goes wrong with it under other filesystems,
885      * mmap will fail.
886      */
887     if (ftruncate(fd, memory))
888         perror("ftruncate");
889
890 #ifdef MAP_POPULATE
891     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
892      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
893      * to sidestep this quirk.
894      */
895     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
896     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
897 #else
898     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
899 #endif
900     if (area == MAP_FAILED) {
901         perror("file_ram_alloc: can't mmap RAM pages");
902         close(fd);
903         return (NULL);
904     }
905     block->fd = fd;
906     return area;
907 }
908 #endif
909
910 static ram_addr_t find_ram_offset(ram_addr_t size)
911 {
912     RAMBlock *block, *next_block;
913     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
914
915     if (QTAILQ_EMPTY(&ram_list.blocks))
916         return 0;
917
918     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
919         ram_addr_t end, next = RAM_ADDR_MAX;
920
921         end = block->offset + block->length;
922
923         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
924             if (next_block->offset >= end) {
925                 next = MIN(next, next_block->offset);
926             }
927         }
928         if (next - end >= size && next - end < mingap) {
929             offset = end;
930             mingap = next - end;
931         }
932     }
933
934     if (offset == RAM_ADDR_MAX) {
935         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
936                 (uint64_t)size);
937         abort();
938     }
939
940     return offset;
941 }
942
943 ram_addr_t last_ram_offset(void)
944 {
945     RAMBlock *block;
946     ram_addr_t last = 0;
947
948     QTAILQ_FOREACH(block, &ram_list.blocks, next)
949         last = MAX(last, block->offset + block->length);
950
951     return last;
952 }
953
954 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
955 {
956     int ret;
957     QemuOpts *machine_opts;
958
959     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
960     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
961     if (machine_opts &&
962         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
963         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
964         if (ret) {
965             perror("qemu_madvise");
966             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
967                             "but dump_guest_core=off specified\n");
968         }
969     }
970 }
971
972 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
973 {
974     RAMBlock *new_block, *block;
975
976     new_block = NULL;
977     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
978         if (block->offset == addr) {
979             new_block = block;
980             break;
981         }
982     }
983     assert(new_block);
984     assert(!new_block->idstr[0]);
985
986     if (dev) {
987         char *id = qdev_get_dev_path(dev);
988         if (id) {
989             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
990             g_free(id);
991         }
992     }
993     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
994
995     /* This assumes the iothread lock is taken here too.  */
996     qemu_mutex_lock_ramlist();
997     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
998         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
999             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1000                     new_block->idstr);
1001             abort();
1002         }
1003     }
1004     qemu_mutex_unlock_ramlist();
1005 }
1006
1007 static int memory_try_enable_merging(void *addr, size_t len)
1008 {
1009     QemuOpts *opts;
1010
1011     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1012     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1013         /* disabled by the user */
1014         return 0;
1015     }
1016
1017     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1018 }
1019
1020 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1021                                    MemoryRegion *mr)
1022 {
1023     RAMBlock *block, *new_block;
1024
1025     size = TARGET_PAGE_ALIGN(size);
1026     new_block = g_malloc0(sizeof(*new_block));
1027
1028     /* This assumes the iothread lock is taken here too.  */
1029     qemu_mutex_lock_ramlist();
1030     new_block->mr = mr;
1031     new_block->offset = find_ram_offset(size);
1032     if (host) {
1033         new_block->host = host;
1034         new_block->flags |= RAM_PREALLOC_MASK;
1035     } else {
1036         if (mem_path) {
1037 #if defined (__linux__) && !defined(TARGET_S390X)
1038             new_block->host = file_ram_alloc(new_block, size, mem_path);
1039             if (!new_block->host) {
1040                 new_block->host = qemu_vmalloc(size);
1041                 memory_try_enable_merging(new_block->host, size);
1042             }
1043 #else
1044             fprintf(stderr, "-mem-path option unsupported\n");
1045             exit(1);
1046 #endif
1047         } else {
1048             if (xen_enabled()) {
1049                 xen_ram_alloc(new_block->offset, size, mr);
1050             } else if (kvm_enabled()) {
1051                 /* some s390/kvm configurations have special constraints */
1052                 new_block->host = kvm_vmalloc(size);
1053             } else {
1054                 new_block->host = qemu_vmalloc(size);
1055             }
1056             memory_try_enable_merging(new_block->host, size);
1057         }
1058     }
1059     new_block->length = size;
1060
1061     /* Keep the list sorted from biggest to smallest block.  */
1062     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1063         if (block->length < new_block->length) {
1064             break;
1065         }
1066     }
1067     if (block) {
1068         QTAILQ_INSERT_BEFORE(block, new_block, next);
1069     } else {
1070         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1071     }
1072     ram_list.mru_block = NULL;
1073
1074     ram_list.version++;
1075     qemu_mutex_unlock_ramlist();
1076
1077     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1078                                        last_ram_offset() >> TARGET_PAGE_BITS);
1079     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1080            0, size >> TARGET_PAGE_BITS);
1081     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1082
1083     qemu_ram_setup_dump(new_block->host, size);
1084     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1085
1086     if (kvm_enabled())
1087         kvm_setup_guest_memory(new_block->host, size);
1088
1089     return new_block->offset;
1090 }
1091
1092 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1093 {
1094     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1095 }
1096
1097 void qemu_ram_free_from_ptr(ram_addr_t addr)
1098 {
1099     RAMBlock *block;
1100
1101     /* This assumes the iothread lock is taken here too.  */
1102     qemu_mutex_lock_ramlist();
1103     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1104         if (addr == block->offset) {
1105             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1106             ram_list.mru_block = NULL;
1107             ram_list.version++;
1108             g_free(block);
1109             break;
1110         }
1111     }
1112     qemu_mutex_unlock_ramlist();
1113 }
1114
1115 void qemu_ram_free(ram_addr_t addr)
1116 {
1117     RAMBlock *block;
1118
1119     /* This assumes the iothread lock is taken here too.  */
1120     qemu_mutex_lock_ramlist();
1121     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1122         if (addr == block->offset) {
1123             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1124             ram_list.mru_block = NULL;
1125             ram_list.version++;
1126             if (block->flags & RAM_PREALLOC_MASK) {
1127                 ;
1128             } else if (mem_path) {
1129 #if defined (__linux__) && !defined(TARGET_S390X)
1130                 if (block->fd) {
1131                     munmap(block->host, block->length);
1132                     close(block->fd);
1133                 } else {
1134                     qemu_vfree(block->host);
1135                 }
1136 #else
1137                 abort();
1138 #endif
1139             } else {
1140 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1141                 munmap(block->host, block->length);
1142 #else
1143                 if (xen_enabled()) {
1144                     xen_invalidate_map_cache_entry(block->host);
1145                 } else {
1146                     qemu_vfree(block->host);
1147                 }
1148 #endif
1149             }
1150             g_free(block);
1151             break;
1152         }
1153     }
1154     qemu_mutex_unlock_ramlist();
1155
1156 }
1157
1158 #ifndef _WIN32
1159 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1160 {
1161     RAMBlock *block;
1162     ram_addr_t offset;
1163     int flags;
1164     void *area, *vaddr;
1165
1166     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1167         offset = addr - block->offset;
1168         if (offset < block->length) {
1169             vaddr = block->host + offset;
1170             if (block->flags & RAM_PREALLOC_MASK) {
1171                 ;
1172             } else {
1173                 flags = MAP_FIXED;
1174                 munmap(vaddr, length);
1175                 if (mem_path) {
1176 #if defined(__linux__) && !defined(TARGET_S390X)
1177                     if (block->fd) {
1178 #ifdef MAP_POPULATE
1179                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1180                             MAP_PRIVATE;
1181 #else
1182                         flags |= MAP_PRIVATE;
1183 #endif
1184                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1185                                     flags, block->fd, offset);
1186                     } else {
1187                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1188                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1189                                     flags, -1, 0);
1190                     }
1191 #else
1192                     abort();
1193 #endif
1194                 } else {
1195 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1196                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1197                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1198                                 flags, -1, 0);
1199 #else
1200                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1201                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1202                                 flags, -1, 0);
1203 #endif
1204                 }
1205                 if (area != vaddr) {
1206                     fprintf(stderr, "Could not remap addr: "
1207                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1208                             length, addr);
1209                     exit(1);
1210                 }
1211                 memory_try_enable_merging(vaddr, length);
1212                 qemu_ram_setup_dump(vaddr, length);
1213             }
1214             return;
1215         }
1216     }
1217 }
1218 #endif /* !_WIN32 */
1219
1220 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1221    With the exception of the softmmu code in this file, this should
1222    only be used for local memory (e.g. video ram) that the device owns,
1223    and knows it isn't going to access beyond the end of the block.
1224
1225    It should not be used for general purpose DMA.
1226    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1227  */
1228 void *qemu_get_ram_ptr(ram_addr_t addr)
1229 {
1230     RAMBlock *block;
1231
1232     /* The list is protected by the iothread lock here.  */
1233     block = ram_list.mru_block;
1234     if (block && addr - block->offset < block->length) {
1235         goto found;
1236     }
1237     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1238         if (addr - block->offset < block->length) {
1239             goto found;
1240         }
1241     }
1242
1243     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1244     abort();
1245
1246 found:
1247     ram_list.mru_block = block;
1248     if (xen_enabled()) {
1249         /* We need to check if the requested address is in the RAM
1250          * because we don't want to map the entire memory in QEMU.
1251          * In that case just map until the end of the page.
1252          */
1253         if (block->offset == 0) {
1254             return xen_map_cache(addr, 0, 0);
1255         } else if (block->host == NULL) {
1256             block->host =
1257                 xen_map_cache(block->offset, block->length, 1);
1258         }
1259     }
1260     return block->host + (addr - block->offset);
1261 }
1262
1263 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1264  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1265  *
1266  * ??? Is this still necessary?
1267  */
1268 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1269 {
1270     RAMBlock *block;
1271
1272     /* The list is protected by the iothread lock here.  */
1273     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1274         if (addr - block->offset < block->length) {
1275             if (xen_enabled()) {
1276                 /* We need to check if the requested address is in the RAM
1277                  * because we don't want to map the entire memory in QEMU.
1278                  * In that case just map until the end of the page.
1279                  */
1280                 if (block->offset == 0) {
1281                     return xen_map_cache(addr, 0, 0);
1282                 } else if (block->host == NULL) {
1283                     block->host =
1284                         xen_map_cache(block->offset, block->length, 1);
1285                 }
1286             }
1287             return block->host + (addr - block->offset);
1288         }
1289     }
1290
1291     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1292     abort();
1293
1294     return NULL;
1295 }
1296
1297 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1298  * but takes a size argument */
1299 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1300 {
1301     if (*size == 0) {
1302         return NULL;
1303     }
1304     if (xen_enabled()) {
1305         return xen_map_cache(addr, *size, 1);
1306     } else {
1307         RAMBlock *block;
1308
1309         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1310             if (addr - block->offset < block->length) {
1311                 if (addr - block->offset + *size > block->length)
1312                     *size = block->length - addr + block->offset;
1313                 return block->host + (addr - block->offset);
1314             }
1315         }
1316
1317         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1318         abort();
1319     }
1320 }
1321
1322 void qemu_put_ram_ptr(void *addr)
1323 {
1324     trace_qemu_put_ram_ptr(addr);
1325 }
1326
1327 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1328 {
1329     RAMBlock *block;
1330     uint8_t *host = ptr;
1331
1332     if (xen_enabled()) {
1333         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1334         return 0;
1335     }
1336
1337     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1338         /* This case append when the block is not mapped. */
1339         if (block->host == NULL) {
1340             continue;
1341         }
1342         if (host - block->host < block->length) {
1343             *ram_addr = block->offset + (host - block->host);
1344             return 0;
1345         }
1346     }
1347
1348     return -1;
1349 }
1350
1351 /* Some of the softmmu routines need to translate from a host pointer
1352    (typically a TLB entry) back to a ram offset.  */
1353 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1354 {
1355     ram_addr_t ram_addr;
1356
1357     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1358         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1359         abort();
1360     }
1361     return ram_addr;
1362 }
1363
1364 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1365                                     unsigned size)
1366 {
1367 #ifdef DEBUG_UNASSIGNED
1368     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1369 #endif
1370 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1371     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1372 #endif
1373     return 0;
1374 }
1375
1376 static void unassigned_mem_write(void *opaque, hwaddr addr,
1377                                  uint64_t val, unsigned size)
1378 {
1379 #ifdef DEBUG_UNASSIGNED
1380     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1381 #endif
1382 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1383     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1384 #endif
1385 }
1386
1387 static const MemoryRegionOps unassigned_mem_ops = {
1388     .read = unassigned_mem_read,
1389     .write = unassigned_mem_write,
1390     .endianness = DEVICE_NATIVE_ENDIAN,
1391 };
1392
1393 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1394                                unsigned size)
1395 {
1396     abort();
1397 }
1398
1399 static void error_mem_write(void *opaque, hwaddr addr,
1400                             uint64_t value, unsigned size)
1401 {
1402     abort();
1403 }
1404
1405 static const MemoryRegionOps error_mem_ops = {
1406     .read = error_mem_read,
1407     .write = error_mem_write,
1408     .endianness = DEVICE_NATIVE_ENDIAN,
1409 };
1410
1411 static const MemoryRegionOps rom_mem_ops = {
1412     .read = error_mem_read,
1413     .write = unassigned_mem_write,
1414     .endianness = DEVICE_NATIVE_ENDIAN,
1415 };
1416
1417 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1418                                uint64_t val, unsigned size)
1419 {
1420     int dirty_flags;
1421     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1422     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1423 #if !defined(CONFIG_USER_ONLY)
1424         tb_invalidate_phys_page_fast(ram_addr, size);
1425         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1426 #endif
1427     }
1428     switch (size) {
1429     case 1:
1430         stb_p(qemu_get_ram_ptr(ram_addr), val);
1431         break;
1432     case 2:
1433         stw_p(qemu_get_ram_ptr(ram_addr), val);
1434         break;
1435     case 4:
1436         stl_p(qemu_get_ram_ptr(ram_addr), val);
1437         break;
1438     default:
1439         abort();
1440     }
1441     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1442     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1443     /* we remove the notdirty callback only if the code has been
1444        flushed */
1445     if (dirty_flags == 0xff)
1446         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1447 }
1448
1449 static const MemoryRegionOps notdirty_mem_ops = {
1450     .read = error_mem_read,
1451     .write = notdirty_mem_write,
1452     .endianness = DEVICE_NATIVE_ENDIAN,
1453 };
1454
1455 /* Generate a debug exception if a watchpoint has been hit.  */
1456 static void check_watchpoint(int offset, int len_mask, int flags)
1457 {
1458     CPUArchState *env = cpu_single_env;
1459     target_ulong pc, cs_base;
1460     target_ulong vaddr;
1461     CPUWatchpoint *wp;
1462     int cpu_flags;
1463
1464     if (env->watchpoint_hit) {
1465         /* We re-entered the check after replacing the TB. Now raise
1466          * the debug interrupt so that is will trigger after the
1467          * current instruction. */
1468         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1469         return;
1470     }
1471     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1472     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1473         if ((vaddr == (wp->vaddr & len_mask) ||
1474              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1475             wp->flags |= BP_WATCHPOINT_HIT;
1476             if (!env->watchpoint_hit) {
1477                 env->watchpoint_hit = wp;
1478                 tb_check_watchpoint(env);
1479                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1480                     env->exception_index = EXCP_DEBUG;
1481                     cpu_loop_exit(env);
1482                 } else {
1483                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1484                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1485                     cpu_resume_from_signal(env, NULL);
1486                 }
1487             }
1488         } else {
1489             wp->flags &= ~BP_WATCHPOINT_HIT;
1490         }
1491     }
1492 }
1493
1494 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1495    so these check for a hit then pass through to the normal out-of-line
1496    phys routines.  */
1497 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1498                                unsigned size)
1499 {
1500     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1501     switch (size) {
1502     case 1: return ldub_phys(addr);
1503     case 2: return lduw_phys(addr);
1504     case 4: return ldl_phys(addr);
1505     default: abort();
1506     }
1507 }
1508
1509 static void watch_mem_write(void *opaque, hwaddr addr,
1510                             uint64_t val, unsigned size)
1511 {
1512     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1513     switch (size) {
1514     case 1:
1515         stb_phys(addr, val);
1516         break;
1517     case 2:
1518         stw_phys(addr, val);
1519         break;
1520     case 4:
1521         stl_phys(addr, val);
1522         break;
1523     default: abort();
1524     }
1525 }
1526
1527 static const MemoryRegionOps watch_mem_ops = {
1528     .read = watch_mem_read,
1529     .write = watch_mem_write,
1530     .endianness = DEVICE_NATIVE_ENDIAN,
1531 };
1532
1533 static uint64_t subpage_read(void *opaque, hwaddr addr,
1534                              unsigned len)
1535 {
1536     subpage_t *mmio = opaque;
1537     unsigned int idx = SUBPAGE_IDX(addr);
1538     MemoryRegionSection *section;
1539 #if defined(DEBUG_SUBPAGE)
1540     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1541            mmio, len, addr, idx);
1542 #endif
1543
1544     section = &phys_sections[mmio->sub_section[idx]];
1545     addr += mmio->base;
1546     addr -= section->offset_within_address_space;
1547     addr += section->offset_within_region;
1548     return io_mem_read(section->mr, addr, len);
1549 }
1550
1551 static void subpage_write(void *opaque, hwaddr addr,
1552                           uint64_t value, unsigned len)
1553 {
1554     subpage_t *mmio = opaque;
1555     unsigned int idx = SUBPAGE_IDX(addr);
1556     MemoryRegionSection *section;
1557 #if defined(DEBUG_SUBPAGE)
1558     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1559            " idx %d value %"PRIx64"\n",
1560            __func__, mmio, len, addr, idx, value);
1561 #endif
1562
1563     section = &phys_sections[mmio->sub_section[idx]];
1564     addr += mmio->base;
1565     addr -= section->offset_within_address_space;
1566     addr += section->offset_within_region;
1567     io_mem_write(section->mr, addr, value, len);
1568 }
1569
1570 static const MemoryRegionOps subpage_ops = {
1571     .read = subpage_read,
1572     .write = subpage_write,
1573     .endianness = DEVICE_NATIVE_ENDIAN,
1574 };
1575
1576 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1577                                  unsigned size)
1578 {
1579     ram_addr_t raddr = addr;
1580     void *ptr = qemu_get_ram_ptr(raddr);
1581     switch (size) {
1582     case 1: return ldub_p(ptr);
1583     case 2: return lduw_p(ptr);
1584     case 4: return ldl_p(ptr);
1585     default: abort();
1586     }
1587 }
1588
1589 static void subpage_ram_write(void *opaque, hwaddr addr,
1590                               uint64_t value, unsigned size)
1591 {
1592     ram_addr_t raddr = addr;
1593     void *ptr = qemu_get_ram_ptr(raddr);
1594     switch (size) {
1595     case 1: return stb_p(ptr, value);
1596     case 2: return stw_p(ptr, value);
1597     case 4: return stl_p(ptr, value);
1598     default: abort();
1599     }
1600 }
1601
1602 static const MemoryRegionOps subpage_ram_ops = {
1603     .read = subpage_ram_read,
1604     .write = subpage_ram_write,
1605     .endianness = DEVICE_NATIVE_ENDIAN,
1606 };
1607
1608 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1609                              uint16_t section)
1610 {
1611     int idx, eidx;
1612
1613     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1614         return -1;
1615     idx = SUBPAGE_IDX(start);
1616     eidx = SUBPAGE_IDX(end);
1617 #if defined(DEBUG_SUBPAGE)
1618     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1619            mmio, start, end, idx, eidx, memory);
1620 #endif
1621     if (memory_region_is_ram(phys_sections[section].mr)) {
1622         MemoryRegionSection new_section = phys_sections[section];
1623         new_section.mr = &io_mem_subpage_ram;
1624         section = phys_section_add(&new_section);
1625     }
1626     for (; idx <= eidx; idx++) {
1627         mmio->sub_section[idx] = section;
1628     }
1629
1630     return 0;
1631 }
1632
1633 static subpage_t *subpage_init(hwaddr base)
1634 {
1635     subpage_t *mmio;
1636
1637     mmio = g_malloc0(sizeof(subpage_t));
1638
1639     mmio->base = base;
1640     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1641                           "subpage", TARGET_PAGE_SIZE);
1642     mmio->iomem.subpage = true;
1643 #if defined(DEBUG_SUBPAGE)
1644     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1645            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1646 #endif
1647     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1648
1649     return mmio;
1650 }
1651
1652 static uint16_t dummy_section(MemoryRegion *mr)
1653 {
1654     MemoryRegionSection section = {
1655         .mr = mr,
1656         .offset_within_address_space = 0,
1657         .offset_within_region = 0,
1658         .size = UINT64_MAX,
1659     };
1660
1661     return phys_section_add(&section);
1662 }
1663
1664 MemoryRegion *iotlb_to_region(hwaddr index)
1665 {
1666     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1667 }
1668
1669 static void io_mem_init(void)
1670 {
1671     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1672     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1673     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1674                           "unassigned", UINT64_MAX);
1675     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1676                           "notdirty", UINT64_MAX);
1677     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1678                           "subpage-ram", UINT64_MAX);
1679     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1680                           "watch", UINT64_MAX);
1681 }
1682
1683 static void mem_begin(MemoryListener *listener)
1684 {
1685     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1686
1687     destroy_all_mappings(d);
1688     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1689 }
1690
1691 static void core_begin(MemoryListener *listener)
1692 {
1693     phys_sections_clear();
1694     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1695     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1696     phys_section_rom = dummy_section(&io_mem_rom);
1697     phys_section_watch = dummy_section(&io_mem_watch);
1698 }
1699
1700 static void tcg_commit(MemoryListener *listener)
1701 {
1702     CPUArchState *env;
1703
1704     /* since each CPU stores ram addresses in its TLB cache, we must
1705        reset the modified entries */
1706     /* XXX: slow ! */
1707     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1708         tlb_flush(env, 1);
1709     }
1710 }
1711
1712 static void core_log_global_start(MemoryListener *listener)
1713 {
1714     cpu_physical_memory_set_dirty_tracking(1);
1715 }
1716
1717 static void core_log_global_stop(MemoryListener *listener)
1718 {
1719     cpu_physical_memory_set_dirty_tracking(0);
1720 }
1721
1722 static void io_region_add(MemoryListener *listener,
1723                           MemoryRegionSection *section)
1724 {
1725     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1726
1727     mrio->mr = section->mr;
1728     mrio->offset = section->offset_within_region;
1729     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1730                  section->offset_within_address_space, section->size);
1731     ioport_register(&mrio->iorange);
1732 }
1733
1734 static void io_region_del(MemoryListener *listener,
1735                           MemoryRegionSection *section)
1736 {
1737     isa_unassign_ioport(section->offset_within_address_space, section->size);
1738 }
1739
1740 static MemoryListener core_memory_listener = {
1741     .begin = core_begin,
1742     .log_global_start = core_log_global_start,
1743     .log_global_stop = core_log_global_stop,
1744     .priority = 1,
1745 };
1746
1747 static MemoryListener io_memory_listener = {
1748     .region_add = io_region_add,
1749     .region_del = io_region_del,
1750     .priority = 0,
1751 };
1752
1753 static MemoryListener tcg_memory_listener = {
1754     .commit = tcg_commit,
1755 };
1756
1757 void address_space_init_dispatch(AddressSpace *as)
1758 {
1759     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1760
1761     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1762     d->listener = (MemoryListener) {
1763         .begin = mem_begin,
1764         .region_add = mem_add,
1765         .region_nop = mem_add,
1766         .priority = 0,
1767     };
1768     as->dispatch = d;
1769     memory_listener_register(&d->listener, as);
1770 }
1771
1772 void address_space_destroy_dispatch(AddressSpace *as)
1773 {
1774     AddressSpaceDispatch *d = as->dispatch;
1775
1776     memory_listener_unregister(&d->listener);
1777     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1778     g_free(d);
1779     as->dispatch = NULL;
1780 }
1781
1782 static void memory_map_init(void)
1783 {
1784     system_memory = g_malloc(sizeof(*system_memory));
1785     memory_region_init(system_memory, "system", INT64_MAX);
1786     address_space_init(&address_space_memory, system_memory);
1787     address_space_memory.name = "memory";
1788
1789     system_io = g_malloc(sizeof(*system_io));
1790     memory_region_init(system_io, "io", 65536);
1791     address_space_init(&address_space_io, system_io);
1792     address_space_io.name = "I/O";
1793
1794     memory_listener_register(&core_memory_listener, &address_space_memory);
1795     memory_listener_register(&io_memory_listener, &address_space_io);
1796     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1797
1798     dma_context_init(&dma_context_memory, &address_space_memory,
1799                      NULL, NULL, NULL);
1800 }
1801
1802 MemoryRegion *get_system_memory(void)
1803 {
1804     return system_memory;
1805 }
1806
1807 MemoryRegion *get_system_io(void)
1808 {
1809     return system_io;
1810 }
1811
1812 #endif /* !defined(CONFIG_USER_ONLY) */
1813
1814 /* physical memory access (slow version, mainly for debug) */
1815 #if defined(CONFIG_USER_ONLY)
1816 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1817                         uint8_t *buf, int len, int is_write)
1818 {
1819     int l, flags;
1820     target_ulong page;
1821     void * p;
1822
1823     while (len > 0) {
1824         page = addr & TARGET_PAGE_MASK;
1825         l = (page + TARGET_PAGE_SIZE) - addr;
1826         if (l > len)
1827             l = len;
1828         flags = page_get_flags(page);
1829         if (!(flags & PAGE_VALID))
1830             return -1;
1831         if (is_write) {
1832             if (!(flags & PAGE_WRITE))
1833                 return -1;
1834             /* XXX: this code should not depend on lock_user */
1835             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1836                 return -1;
1837             memcpy(p, buf, l);
1838             unlock_user(p, addr, l);
1839         } else {
1840             if (!(flags & PAGE_READ))
1841                 return -1;
1842             /* XXX: this code should not depend on lock_user */
1843             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1844                 return -1;
1845             memcpy(buf, p, l);
1846             unlock_user(p, addr, 0);
1847         }
1848         len -= l;
1849         buf += l;
1850         addr += l;
1851     }
1852     return 0;
1853 }
1854
1855 #else
1856
1857 static void invalidate_and_set_dirty(hwaddr addr,
1858                                      hwaddr length)
1859 {
1860     if (!cpu_physical_memory_is_dirty(addr)) {
1861         /* invalidate code */
1862         tb_invalidate_phys_page_range(addr, addr + length, 0);
1863         /* set dirty bit */
1864         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1865     }
1866     xen_modified_memory(addr, length);
1867 }
1868
1869 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1870                       int len, bool is_write)
1871 {
1872     AddressSpaceDispatch *d = as->dispatch;
1873     int l;
1874     uint8_t *ptr;
1875     uint32_t val;
1876     hwaddr page;
1877     MemoryRegionSection *section;
1878
1879     while (len > 0) {
1880         page = addr & TARGET_PAGE_MASK;
1881         l = (page + TARGET_PAGE_SIZE) - addr;
1882         if (l > len)
1883             l = len;
1884         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1885
1886         if (is_write) {
1887             if (!memory_region_is_ram(section->mr)) {
1888                 hwaddr addr1;
1889                 addr1 = memory_region_section_addr(section, addr);
1890                 /* XXX: could force cpu_single_env to NULL to avoid
1891                    potential bugs */
1892                 if (l >= 4 && ((addr1 & 3) == 0)) {
1893                     /* 32 bit write access */
1894                     val = ldl_p(buf);
1895                     io_mem_write(section->mr, addr1, val, 4);
1896                     l = 4;
1897                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1898                     /* 16 bit write access */
1899                     val = lduw_p(buf);
1900                     io_mem_write(section->mr, addr1, val, 2);
1901                     l = 2;
1902                 } else {
1903                     /* 8 bit write access */
1904                     val = ldub_p(buf);
1905                     io_mem_write(section->mr, addr1, val, 1);
1906                     l = 1;
1907                 }
1908             } else if (!section->readonly) {
1909                 ram_addr_t addr1;
1910                 addr1 = memory_region_get_ram_addr(section->mr)
1911                     + memory_region_section_addr(section, addr);
1912                 /* RAM case */
1913                 ptr = qemu_get_ram_ptr(addr1);
1914                 memcpy(ptr, buf, l);
1915                 invalidate_and_set_dirty(addr1, l);
1916                 qemu_put_ram_ptr(ptr);
1917             }
1918         } else {
1919             if (!(memory_region_is_ram(section->mr) ||
1920                   memory_region_is_romd(section->mr))) {
1921                 hwaddr addr1;
1922                 /* I/O case */
1923                 addr1 = memory_region_section_addr(section, addr);
1924                 if (l >= 4 && ((addr1 & 3) == 0)) {
1925                     /* 32 bit read access */
1926                     val = io_mem_read(section->mr, addr1, 4);
1927                     stl_p(buf, val);
1928                     l = 4;
1929                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1930                     /* 16 bit read access */
1931                     val = io_mem_read(section->mr, addr1, 2);
1932                     stw_p(buf, val);
1933                     l = 2;
1934                 } else {
1935                     /* 8 bit read access */
1936                     val = io_mem_read(section->mr, addr1, 1);
1937                     stb_p(buf, val);
1938                     l = 1;
1939                 }
1940             } else {
1941                 /* RAM case */
1942                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1943                                        + memory_region_section_addr(section,
1944                                                                     addr));
1945                 memcpy(buf, ptr, l);
1946                 qemu_put_ram_ptr(ptr);
1947             }
1948         }
1949         len -= l;
1950         buf += l;
1951         addr += l;
1952     }
1953 }
1954
1955 void address_space_write(AddressSpace *as, hwaddr addr,
1956                          const uint8_t *buf, int len)
1957 {
1958     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1959 }
1960
1961 /**
1962  * address_space_read: read from an address space.
1963  *
1964  * @as: #AddressSpace to be accessed
1965  * @addr: address within that address space
1966  * @buf: buffer with the data transferred
1967  */
1968 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1969 {
1970     address_space_rw(as, addr, buf, len, false);
1971 }
1972
1973
1974 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1975                             int len, int is_write)
1976 {
1977     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1978 }
1979
1980 /* used for ROM loading : can write in RAM and ROM */
1981 void cpu_physical_memory_write_rom(hwaddr addr,
1982                                    const uint8_t *buf, int len)
1983 {
1984     AddressSpaceDispatch *d = address_space_memory.dispatch;
1985     int l;
1986     uint8_t *ptr;
1987     hwaddr page;
1988     MemoryRegionSection *section;
1989
1990     while (len > 0) {
1991         page = addr & TARGET_PAGE_MASK;
1992         l = (page + TARGET_PAGE_SIZE) - addr;
1993         if (l > len)
1994             l = len;
1995         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1996
1997         if (!(memory_region_is_ram(section->mr) ||
1998               memory_region_is_romd(section->mr))) {
1999             /* do nothing */
2000         } else {
2001             unsigned long addr1;
2002             addr1 = memory_region_get_ram_addr(section->mr)
2003                 + memory_region_section_addr(section, addr);
2004             /* ROM/RAM case */
2005             ptr = qemu_get_ram_ptr(addr1);
2006             memcpy(ptr, buf, l);
2007             invalidate_and_set_dirty(addr1, l);
2008             qemu_put_ram_ptr(ptr);
2009         }
2010         len -= l;
2011         buf += l;
2012         addr += l;
2013     }
2014 }
2015
2016 typedef struct {
2017     void *buffer;
2018     hwaddr addr;
2019     hwaddr len;
2020 } BounceBuffer;
2021
2022 static BounceBuffer bounce;
2023
2024 typedef struct MapClient {
2025     void *opaque;
2026     void (*callback)(void *opaque);
2027     QLIST_ENTRY(MapClient) link;
2028 } MapClient;
2029
2030 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2031     = QLIST_HEAD_INITIALIZER(map_client_list);
2032
2033 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2034 {
2035     MapClient *client = g_malloc(sizeof(*client));
2036
2037     client->opaque = opaque;
2038     client->callback = callback;
2039     QLIST_INSERT_HEAD(&map_client_list, client, link);
2040     return client;
2041 }
2042
2043 static void cpu_unregister_map_client(void *_client)
2044 {
2045     MapClient *client = (MapClient *)_client;
2046
2047     QLIST_REMOVE(client, link);
2048     g_free(client);
2049 }
2050
2051 static void cpu_notify_map_clients(void)
2052 {
2053     MapClient *client;
2054
2055     while (!QLIST_EMPTY(&map_client_list)) {
2056         client = QLIST_FIRST(&map_client_list);
2057         client->callback(client->opaque);
2058         cpu_unregister_map_client(client);
2059     }
2060 }
2061
2062 /* Map a physical memory region into a host virtual address.
2063  * May map a subset of the requested range, given by and returned in *plen.
2064  * May return NULL if resources needed to perform the mapping are exhausted.
2065  * Use only for reads OR writes - not for read-modify-write operations.
2066  * Use cpu_register_map_client() to know when retrying the map operation is
2067  * likely to succeed.
2068  */
2069 void *address_space_map(AddressSpace *as,
2070                         hwaddr addr,
2071                         hwaddr *plen,
2072                         bool is_write)
2073 {
2074     AddressSpaceDispatch *d = as->dispatch;
2075     hwaddr len = *plen;
2076     hwaddr todo = 0;
2077     int l;
2078     hwaddr page;
2079     MemoryRegionSection *section;
2080     ram_addr_t raddr = RAM_ADDR_MAX;
2081     ram_addr_t rlen;
2082     void *ret;
2083
2084     while (len > 0) {
2085         page = addr & TARGET_PAGE_MASK;
2086         l = (page + TARGET_PAGE_SIZE) - addr;
2087         if (l > len)
2088             l = len;
2089         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2090
2091         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2092             if (todo || bounce.buffer) {
2093                 break;
2094             }
2095             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2096             bounce.addr = addr;
2097             bounce.len = l;
2098             if (!is_write) {
2099                 address_space_read(as, addr, bounce.buffer, l);
2100             }
2101
2102             *plen = l;
2103             return bounce.buffer;
2104         }
2105         if (!todo) {
2106             raddr = memory_region_get_ram_addr(section->mr)
2107                 + memory_region_section_addr(section, addr);
2108         }
2109
2110         len -= l;
2111         addr += l;
2112         todo += l;
2113     }
2114     rlen = todo;
2115     ret = qemu_ram_ptr_length(raddr, &rlen);
2116     *plen = rlen;
2117     return ret;
2118 }
2119
2120 /* Unmaps a memory region previously mapped by address_space_map().
2121  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2122  * the amount of memory that was actually read or written by the caller.
2123  */
2124 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2125                          int is_write, hwaddr access_len)
2126 {
2127     if (buffer != bounce.buffer) {
2128         if (is_write) {
2129             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2130             while (access_len) {
2131                 unsigned l;
2132                 l = TARGET_PAGE_SIZE;
2133                 if (l > access_len)
2134                     l = access_len;
2135                 invalidate_and_set_dirty(addr1, l);
2136                 addr1 += l;
2137                 access_len -= l;
2138             }
2139         }
2140         if (xen_enabled()) {
2141             xen_invalidate_map_cache_entry(buffer);
2142         }
2143         return;
2144     }
2145     if (is_write) {
2146         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2147     }
2148     qemu_vfree(bounce.buffer);
2149     bounce.buffer = NULL;
2150     cpu_notify_map_clients();
2151 }
2152
2153 void *cpu_physical_memory_map(hwaddr addr,
2154                               hwaddr *plen,
2155                               int is_write)
2156 {
2157     return address_space_map(&address_space_memory, addr, plen, is_write);
2158 }
2159
2160 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2161                                int is_write, hwaddr access_len)
2162 {
2163     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2164 }
2165
2166 /* warning: addr must be aligned */
2167 static inline uint32_t ldl_phys_internal(hwaddr addr,
2168                                          enum device_endian endian)
2169 {
2170     uint8_t *ptr;
2171     uint32_t val;
2172     MemoryRegionSection *section;
2173
2174     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2175
2176     if (!(memory_region_is_ram(section->mr) ||
2177           memory_region_is_romd(section->mr))) {
2178         /* I/O case */
2179         addr = memory_region_section_addr(section, addr);
2180         val = io_mem_read(section->mr, addr, 4);
2181 #if defined(TARGET_WORDS_BIGENDIAN)
2182         if (endian == DEVICE_LITTLE_ENDIAN) {
2183             val = bswap32(val);
2184         }
2185 #else
2186         if (endian == DEVICE_BIG_ENDIAN) {
2187             val = bswap32(val);
2188         }
2189 #endif
2190     } else {
2191         /* RAM case */
2192         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2193                                 & TARGET_PAGE_MASK)
2194                                + memory_region_section_addr(section, addr));
2195         switch (endian) {
2196         case DEVICE_LITTLE_ENDIAN:
2197             val = ldl_le_p(ptr);
2198             break;
2199         case DEVICE_BIG_ENDIAN:
2200             val = ldl_be_p(ptr);
2201             break;
2202         default:
2203             val = ldl_p(ptr);
2204             break;
2205         }
2206     }
2207     return val;
2208 }
2209
2210 uint32_t ldl_phys(hwaddr addr)
2211 {
2212     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2213 }
2214
2215 uint32_t ldl_le_phys(hwaddr addr)
2216 {
2217     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2218 }
2219
2220 uint32_t ldl_be_phys(hwaddr addr)
2221 {
2222     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2223 }
2224
2225 /* warning: addr must be aligned */
2226 static inline uint64_t ldq_phys_internal(hwaddr addr,
2227                                          enum device_endian endian)
2228 {
2229     uint8_t *ptr;
2230     uint64_t val;
2231     MemoryRegionSection *section;
2232
2233     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2234
2235     if (!(memory_region_is_ram(section->mr) ||
2236           memory_region_is_romd(section->mr))) {
2237         /* I/O case */
2238         addr = memory_region_section_addr(section, addr);
2239
2240         /* XXX This is broken when device endian != cpu endian.
2241                Fix and add "endian" variable check */
2242 #ifdef TARGET_WORDS_BIGENDIAN
2243         val = io_mem_read(section->mr, addr, 4) << 32;
2244         val |= io_mem_read(section->mr, addr + 4, 4);
2245 #else
2246         val = io_mem_read(section->mr, addr, 4);
2247         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2248 #endif
2249     } else {
2250         /* RAM case */
2251         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2252                                 & TARGET_PAGE_MASK)
2253                                + memory_region_section_addr(section, addr));
2254         switch (endian) {
2255         case DEVICE_LITTLE_ENDIAN:
2256             val = ldq_le_p(ptr);
2257             break;
2258         case DEVICE_BIG_ENDIAN:
2259             val = ldq_be_p(ptr);
2260             break;
2261         default:
2262             val = ldq_p(ptr);
2263             break;
2264         }
2265     }
2266     return val;
2267 }
2268
2269 uint64_t ldq_phys(hwaddr addr)
2270 {
2271     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2272 }
2273
2274 uint64_t ldq_le_phys(hwaddr addr)
2275 {
2276     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2277 }
2278
2279 uint64_t ldq_be_phys(hwaddr addr)
2280 {
2281     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2282 }
2283
2284 /* XXX: optimize */
2285 uint32_t ldub_phys(hwaddr addr)
2286 {
2287     uint8_t val;
2288     cpu_physical_memory_read(addr, &val, 1);
2289     return val;
2290 }
2291
2292 /* warning: addr must be aligned */
2293 static inline uint32_t lduw_phys_internal(hwaddr addr,
2294                                           enum device_endian endian)
2295 {
2296     uint8_t *ptr;
2297     uint64_t val;
2298     MemoryRegionSection *section;
2299
2300     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2301
2302     if (!(memory_region_is_ram(section->mr) ||
2303           memory_region_is_romd(section->mr))) {
2304         /* I/O case */
2305         addr = memory_region_section_addr(section, addr);
2306         val = io_mem_read(section->mr, addr, 2);
2307 #if defined(TARGET_WORDS_BIGENDIAN)
2308         if (endian == DEVICE_LITTLE_ENDIAN) {
2309             val = bswap16(val);
2310         }
2311 #else
2312         if (endian == DEVICE_BIG_ENDIAN) {
2313             val = bswap16(val);
2314         }
2315 #endif
2316     } else {
2317         /* RAM case */
2318         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2319                                 & TARGET_PAGE_MASK)
2320                                + memory_region_section_addr(section, addr));
2321         switch (endian) {
2322         case DEVICE_LITTLE_ENDIAN:
2323             val = lduw_le_p(ptr);
2324             break;
2325         case DEVICE_BIG_ENDIAN:
2326             val = lduw_be_p(ptr);
2327             break;
2328         default:
2329             val = lduw_p(ptr);
2330             break;
2331         }
2332     }
2333     return val;
2334 }
2335
2336 uint32_t lduw_phys(hwaddr addr)
2337 {
2338     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2339 }
2340
2341 uint32_t lduw_le_phys(hwaddr addr)
2342 {
2343     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2344 }
2345
2346 uint32_t lduw_be_phys(hwaddr addr)
2347 {
2348     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2349 }
2350
2351 /* warning: addr must be aligned. The ram page is not masked as dirty
2352    and the code inside is not invalidated. It is useful if the dirty
2353    bits are used to track modified PTEs */
2354 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2355 {
2356     uint8_t *ptr;
2357     MemoryRegionSection *section;
2358
2359     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2360
2361     if (!memory_region_is_ram(section->mr) || section->readonly) {
2362         addr = memory_region_section_addr(section, addr);
2363         if (memory_region_is_ram(section->mr)) {
2364             section = &phys_sections[phys_section_rom];
2365         }
2366         io_mem_write(section->mr, addr, val, 4);
2367     } else {
2368         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2369                                & TARGET_PAGE_MASK)
2370             + memory_region_section_addr(section, addr);
2371         ptr = qemu_get_ram_ptr(addr1);
2372         stl_p(ptr, val);
2373
2374         if (unlikely(in_migration)) {
2375             if (!cpu_physical_memory_is_dirty(addr1)) {
2376                 /* invalidate code */
2377                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2378                 /* set dirty bit */
2379                 cpu_physical_memory_set_dirty_flags(
2380                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2381             }
2382         }
2383     }
2384 }
2385
2386 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2387 {
2388     uint8_t *ptr;
2389     MemoryRegionSection *section;
2390
2391     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2392
2393     if (!memory_region_is_ram(section->mr) || section->readonly) {
2394         addr = memory_region_section_addr(section, addr);
2395         if (memory_region_is_ram(section->mr)) {
2396             section = &phys_sections[phys_section_rom];
2397         }
2398 #ifdef TARGET_WORDS_BIGENDIAN
2399         io_mem_write(section->mr, addr, val >> 32, 4);
2400         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2401 #else
2402         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2403         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2404 #endif
2405     } else {
2406         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2407                                 & TARGET_PAGE_MASK)
2408                                + memory_region_section_addr(section, addr));
2409         stq_p(ptr, val);
2410     }
2411 }
2412
2413 /* warning: addr must be aligned */
2414 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2415                                      enum device_endian endian)
2416 {
2417     uint8_t *ptr;
2418     MemoryRegionSection *section;
2419
2420     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2421
2422     if (!memory_region_is_ram(section->mr) || section->readonly) {
2423         addr = memory_region_section_addr(section, addr);
2424         if (memory_region_is_ram(section->mr)) {
2425             section = &phys_sections[phys_section_rom];
2426         }
2427 #if defined(TARGET_WORDS_BIGENDIAN)
2428         if (endian == DEVICE_LITTLE_ENDIAN) {
2429             val = bswap32(val);
2430         }
2431 #else
2432         if (endian == DEVICE_BIG_ENDIAN) {
2433             val = bswap32(val);
2434         }
2435 #endif
2436         io_mem_write(section->mr, addr, val, 4);
2437     } else {
2438         unsigned long addr1;
2439         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2440             + memory_region_section_addr(section, addr);
2441         /* RAM case */
2442         ptr = qemu_get_ram_ptr(addr1);
2443         switch (endian) {
2444         case DEVICE_LITTLE_ENDIAN:
2445             stl_le_p(ptr, val);
2446             break;
2447         case DEVICE_BIG_ENDIAN:
2448             stl_be_p(ptr, val);
2449             break;
2450         default:
2451             stl_p(ptr, val);
2452             break;
2453         }
2454         invalidate_and_set_dirty(addr1, 4);
2455     }
2456 }
2457
2458 void stl_phys(hwaddr addr, uint32_t val)
2459 {
2460     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2461 }
2462
2463 void stl_le_phys(hwaddr addr, uint32_t val)
2464 {
2465     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2466 }
2467
2468 void stl_be_phys(hwaddr addr, uint32_t val)
2469 {
2470     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2471 }
2472
2473 /* XXX: optimize */
2474 void stb_phys(hwaddr addr, uint32_t val)
2475 {
2476     uint8_t v = val;
2477     cpu_physical_memory_write(addr, &v, 1);
2478 }
2479
2480 /* warning: addr must be aligned */
2481 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2482                                      enum device_endian endian)
2483 {
2484     uint8_t *ptr;
2485     MemoryRegionSection *section;
2486
2487     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2488
2489     if (!memory_region_is_ram(section->mr) || section->readonly) {
2490         addr = memory_region_section_addr(section, addr);
2491         if (memory_region_is_ram(section->mr)) {
2492             section = &phys_sections[phys_section_rom];
2493         }
2494 #if defined(TARGET_WORDS_BIGENDIAN)
2495         if (endian == DEVICE_LITTLE_ENDIAN) {
2496             val = bswap16(val);
2497         }
2498 #else
2499         if (endian == DEVICE_BIG_ENDIAN) {
2500             val = bswap16(val);
2501         }
2502 #endif
2503         io_mem_write(section->mr, addr, val, 2);
2504     } else {
2505         unsigned long addr1;
2506         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2507             + memory_region_section_addr(section, addr);
2508         /* RAM case */
2509         ptr = qemu_get_ram_ptr(addr1);
2510         switch (endian) {
2511         case DEVICE_LITTLE_ENDIAN:
2512             stw_le_p(ptr, val);
2513             break;
2514         case DEVICE_BIG_ENDIAN:
2515             stw_be_p(ptr, val);
2516             break;
2517         default:
2518             stw_p(ptr, val);
2519             break;
2520         }
2521         invalidate_and_set_dirty(addr1, 2);
2522     }
2523 }
2524
2525 void stw_phys(hwaddr addr, uint32_t val)
2526 {
2527     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2528 }
2529
2530 void stw_le_phys(hwaddr addr, uint32_t val)
2531 {
2532     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2533 }
2534
2535 void stw_be_phys(hwaddr addr, uint32_t val)
2536 {
2537     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2538 }
2539
2540 /* XXX: optimize */
2541 void stq_phys(hwaddr addr, uint64_t val)
2542 {
2543     val = tswap64(val);
2544     cpu_physical_memory_write(addr, &val, 8);
2545 }
2546
2547 void stq_le_phys(hwaddr addr, uint64_t val)
2548 {
2549     val = cpu_to_le64(val);
2550     cpu_physical_memory_write(addr, &val, 8);
2551 }
2552
2553 void stq_be_phys(hwaddr addr, uint64_t val)
2554 {
2555     val = cpu_to_be64(val);
2556     cpu_physical_memory_write(addr, &val, 8);
2557 }
2558
2559 /* virtual memory access for debug (includes writing to ROM) */
2560 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2561                         uint8_t *buf, int len, int is_write)
2562 {
2563     int l;
2564     hwaddr phys_addr;
2565     target_ulong page;
2566
2567     while (len > 0) {
2568         page = addr & TARGET_PAGE_MASK;
2569         phys_addr = cpu_get_phys_page_debug(env, page);
2570         /* if no physical page mapped, return an error */
2571         if (phys_addr == -1)
2572             return -1;
2573         l = (page + TARGET_PAGE_SIZE) - addr;
2574         if (l > len)
2575             l = len;
2576         phys_addr += (addr & ~TARGET_PAGE_MASK);
2577         if (is_write)
2578             cpu_physical_memory_write_rom(phys_addr, buf, l);
2579         else
2580             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2581         len -= l;
2582         buf += l;
2583         addr += l;
2584     }
2585     return 0;
2586 }
2587 #endif
2588
2589 #if !defined(CONFIG_USER_ONLY)
2590
2591 /*
2592  * A helper function for the _utterly broken_ virtio device model to find out if
2593  * it's running on a big endian machine. Don't do this at home kids!
2594  */
2595 bool virtio_is_big_endian(void);
2596 bool virtio_is_big_endian(void)
2597 {
2598 #if defined(TARGET_WORDS_BIGENDIAN)
2599     return true;
2600 #else
2601     return false;
2602 #endif
2603 }
2604
2605 #endif
2606
2607 #ifndef CONFIG_USER_ONLY
2608 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2609 {
2610     MemoryRegionSection *section;
2611
2612     section = phys_page_find(address_space_memory.dispatch,
2613                              phys_addr >> TARGET_PAGE_BITS);
2614
2615     return !(memory_region_is_ram(section->mr) ||
2616              memory_region_is_romd(section->mr));
2617 }
2618 #endif
This page took 0.17813 seconds and 4 git commands to generate.