]> Git Repo - qemu.git/blob - exec.c
exec: make -mem-path filenames deterministic
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUArchState *env = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     env->interrupt_request &= ~0x01;
231     tlb_flush(env, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUArchState),
244         VMSTATE_UINT32(interrupt_request, CPUArchState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #endif
249
250 CPUState *qemu_get_cpu(int index)
251 {
252     CPUArchState *env = first_cpu;
253     CPUState *cpu = NULL;
254
255     while (env) {
256         cpu = ENV_GET_CPU(env);
257         if (cpu->cpu_index == index) {
258             break;
259         }
260         env = env->next_cpu;
261     }
262
263     return cpu;
264 }
265
266 void cpu_exec_init(CPUArchState *env)
267 {
268     CPUState *cpu = ENV_GET_CPU(env);
269     CPUArchState **penv;
270     int cpu_index;
271
272 #if defined(CONFIG_USER_ONLY)
273     cpu_list_lock();
274 #endif
275     env->next_cpu = NULL;
276     penv = &first_cpu;
277     cpu_index = 0;
278     while (*penv != NULL) {
279         penv = &(*penv)->next_cpu;
280         cpu_index++;
281     }
282     cpu->cpu_index = cpu_index;
283     cpu->numa_node = 0;
284     QTAILQ_INIT(&env->breakpoints);
285     QTAILQ_INIT(&env->watchpoints);
286 #ifndef CONFIG_USER_ONLY
287     cpu->thread_id = qemu_get_thread_id();
288 #endif
289     *penv = env;
290 #if defined(CONFIG_USER_ONLY)
291     cpu_list_unlock();
292 #endif
293 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
294     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
295     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
296                     cpu_save, cpu_load, env);
297 #endif
298 }
299
300 #if defined(TARGET_HAS_ICE)
301 #if defined(CONFIG_USER_ONLY)
302 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
303 {
304     tb_invalidate_phys_page_range(pc, pc + 1, 0);
305 }
306 #else
307 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
308 {
309     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
310             (pc & ~TARGET_PAGE_MASK));
311 }
312 #endif
313 #endif /* TARGET_HAS_ICE */
314
315 #if defined(CONFIG_USER_ONLY)
316 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
317
318 {
319 }
320
321 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
322                           int flags, CPUWatchpoint **watchpoint)
323 {
324     return -ENOSYS;
325 }
326 #else
327 /* Add a watchpoint.  */
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     target_ulong len_mask = ~(len - 1);
332     CPUWatchpoint *wp;
333
334     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
335     if ((len & (len - 1)) || (addr & ~len_mask) ||
336             len == 0 || len > TARGET_PAGE_SIZE) {
337         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
338                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
339         return -EINVAL;
340     }
341     wp = g_malloc(sizeof(*wp));
342
343     wp->vaddr = addr;
344     wp->len_mask = len_mask;
345     wp->flags = flags;
346
347     /* keep all GDB-injected watchpoints in front */
348     if (flags & BP_GDB)
349         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
350     else
351         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
352
353     tlb_flush_page(env, addr);
354
355     if (watchpoint)
356         *watchpoint = wp;
357     return 0;
358 }
359
360 /* Remove a specific watchpoint.  */
361 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
362                           int flags)
363 {
364     target_ulong len_mask = ~(len - 1);
365     CPUWatchpoint *wp;
366
367     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
368         if (addr == wp->vaddr && len_mask == wp->len_mask
369                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
370             cpu_watchpoint_remove_by_ref(env, wp);
371             return 0;
372         }
373     }
374     return -ENOENT;
375 }
376
377 /* Remove a specific watchpoint by reference.  */
378 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
379 {
380     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
381
382     tlb_flush_page(env, watchpoint->vaddr);
383
384     g_free(watchpoint);
385 }
386
387 /* Remove all matching watchpoints.  */
388 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
389 {
390     CPUWatchpoint *wp, *next;
391
392     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
393         if (wp->flags & mask)
394             cpu_watchpoint_remove_by_ref(env, wp);
395     }
396 }
397 #endif
398
399 /* Add a breakpoint.  */
400 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
401                           CPUBreakpoint **breakpoint)
402 {
403 #if defined(TARGET_HAS_ICE)
404     CPUBreakpoint *bp;
405
406     bp = g_malloc(sizeof(*bp));
407
408     bp->pc = pc;
409     bp->flags = flags;
410
411     /* keep all GDB-injected breakpoints in front */
412     if (flags & BP_GDB)
413         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
414     else
415         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
416
417     breakpoint_invalidate(env, pc);
418
419     if (breakpoint)
420         *breakpoint = bp;
421     return 0;
422 #else
423     return -ENOSYS;
424 #endif
425 }
426
427 /* Remove a specific breakpoint.  */
428 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
429 {
430 #if defined(TARGET_HAS_ICE)
431     CPUBreakpoint *bp;
432
433     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
434         if (bp->pc == pc && bp->flags == flags) {
435             cpu_breakpoint_remove_by_ref(env, bp);
436             return 0;
437         }
438     }
439     return -ENOENT;
440 #else
441     return -ENOSYS;
442 #endif
443 }
444
445 /* Remove a specific breakpoint by reference.  */
446 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
447 {
448 #if defined(TARGET_HAS_ICE)
449     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
450
451     breakpoint_invalidate(env, breakpoint->pc);
452
453     g_free(breakpoint);
454 #endif
455 }
456
457 /* Remove all matching breakpoints. */
458 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
459 {
460 #if defined(TARGET_HAS_ICE)
461     CPUBreakpoint *bp, *next;
462
463     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
464         if (bp->flags & mask)
465             cpu_breakpoint_remove_by_ref(env, bp);
466     }
467 #endif
468 }
469
470 /* enable or disable single step mode. EXCP_DEBUG is returned by the
471    CPU loop after each instruction */
472 void cpu_single_step(CPUArchState *env, int enabled)
473 {
474 #if defined(TARGET_HAS_ICE)
475     if (env->singlestep_enabled != enabled) {
476         env->singlestep_enabled = enabled;
477         if (kvm_enabled())
478             kvm_update_guest_debug(env, 0);
479         else {
480             /* must flush all the translated code to avoid inconsistencies */
481             /* XXX: only flush what is necessary */
482             tb_flush(env);
483         }
484     }
485 #endif
486 }
487
488 void cpu_reset_interrupt(CPUArchState *env, int mask)
489 {
490     env->interrupt_request &= ~mask;
491 }
492
493 void cpu_exit(CPUArchState *env)
494 {
495     CPUState *cpu = ENV_GET_CPU(env);
496
497     cpu->exit_request = 1;
498     cpu->tcg_exit_req = 1;
499 }
500
501 void cpu_abort(CPUArchState *env, const char *fmt, ...)
502 {
503     va_list ap;
504     va_list ap2;
505
506     va_start(ap, fmt);
507     va_copy(ap2, ap);
508     fprintf(stderr, "qemu: fatal: ");
509     vfprintf(stderr, fmt, ap);
510     fprintf(stderr, "\n");
511     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
512     if (qemu_log_enabled()) {
513         qemu_log("qemu: fatal: ");
514         qemu_log_vprintf(fmt, ap2);
515         qemu_log("\n");
516         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
517         qemu_log_flush();
518         qemu_log_close();
519     }
520     va_end(ap2);
521     va_end(ap);
522 #if defined(CONFIG_USER_ONLY)
523     {
524         struct sigaction act;
525         sigfillset(&act.sa_mask);
526         act.sa_handler = SIG_DFL;
527         sigaction(SIGABRT, &act, NULL);
528     }
529 #endif
530     abort();
531 }
532
533 CPUArchState *cpu_copy(CPUArchState *env)
534 {
535     CPUArchState *new_env = cpu_init(env->cpu_model_str);
536     CPUArchState *next_cpu = new_env->next_cpu;
537 #if defined(TARGET_HAS_ICE)
538     CPUBreakpoint *bp;
539     CPUWatchpoint *wp;
540 #endif
541
542     memcpy(new_env, env, sizeof(CPUArchState));
543
544     /* Preserve chaining. */
545     new_env->next_cpu = next_cpu;
546
547     /* Clone all break/watchpoints.
548        Note: Once we support ptrace with hw-debug register access, make sure
549        BP_CPU break/watchpoints are handled correctly on clone. */
550     QTAILQ_INIT(&env->breakpoints);
551     QTAILQ_INIT(&env->watchpoints);
552 #if defined(TARGET_HAS_ICE)
553     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
554         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
555     }
556     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
557         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
558                               wp->flags, NULL);
559     }
560 #endif
561
562     return new_env;
563 }
564
565 #if !defined(CONFIG_USER_ONLY)
566 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
567                                       uintptr_t length)
568 {
569     uintptr_t start1;
570
571     /* we modify the TLB cache so that the dirty bit will be set again
572        when accessing the range */
573     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
574     /* Check that we don't span multiple blocks - this breaks the
575        address comparisons below.  */
576     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
577             != (end - 1) - start) {
578         abort();
579     }
580     cpu_tlb_reset_dirty_all(start1, length);
581
582 }
583
584 /* Note: start and end must be within the same ram block.  */
585 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
586                                      int dirty_flags)
587 {
588     uintptr_t length;
589
590     start &= TARGET_PAGE_MASK;
591     end = TARGET_PAGE_ALIGN(end);
592
593     length = end - start;
594     if (length == 0)
595         return;
596     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
597
598     if (tcg_enabled()) {
599         tlb_reset_dirty_range_all(start, end, length);
600     }
601 }
602
603 static int cpu_physical_memory_set_dirty_tracking(int enable)
604 {
605     int ret = 0;
606     in_migration = enable;
607     return ret;
608 }
609
610 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
611                                                    MemoryRegionSection *section,
612                                                    target_ulong vaddr,
613                                                    hwaddr paddr,
614                                                    int prot,
615                                                    target_ulong *address)
616 {
617     hwaddr iotlb;
618     CPUWatchpoint *wp;
619
620     if (memory_region_is_ram(section->mr)) {
621         /* Normal RAM.  */
622         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
623             + memory_region_section_addr(section, paddr);
624         if (!section->readonly) {
625             iotlb |= phys_section_notdirty;
626         } else {
627             iotlb |= phys_section_rom;
628         }
629     } else {
630         /* IO handlers are currently passed a physical address.
631            It would be nice to pass an offset from the base address
632            of that region.  This would avoid having to special case RAM,
633            and avoid full address decoding in every device.
634            We can't use the high bits of pd for this because
635            IO_MEM_ROMD uses these as a ram address.  */
636         iotlb = section - phys_sections;
637         iotlb += memory_region_section_addr(section, paddr);
638     }
639
640     /* Make accesses to pages with watchpoints go via the
641        watchpoint trap routines.  */
642     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
643         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
644             /* Avoid trapping reads of pages with a write breakpoint. */
645             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
646                 iotlb = phys_section_watch + paddr;
647                 *address |= TLB_MMIO;
648                 break;
649             }
650         }
651     }
652
653     return iotlb;
654 }
655 #endif /* defined(CONFIG_USER_ONLY) */
656
657 #if !defined(CONFIG_USER_ONLY)
658
659 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
660 typedef struct subpage_t {
661     MemoryRegion iomem;
662     hwaddr base;
663     uint16_t sub_section[TARGET_PAGE_SIZE];
664 } subpage_t;
665
666 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
667                              uint16_t section);
668 static subpage_t *subpage_init(hwaddr base);
669 static void destroy_page_desc(uint16_t section_index)
670 {
671     MemoryRegionSection *section = &phys_sections[section_index];
672     MemoryRegion *mr = section->mr;
673
674     if (mr->subpage) {
675         subpage_t *subpage = container_of(mr, subpage_t, iomem);
676         memory_region_destroy(&subpage->iomem);
677         g_free(subpage);
678     }
679 }
680
681 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
682 {
683     unsigned i;
684     PhysPageEntry *p;
685
686     if (lp->ptr == PHYS_MAP_NODE_NIL) {
687         return;
688     }
689
690     p = phys_map_nodes[lp->ptr];
691     for (i = 0; i < L2_SIZE; ++i) {
692         if (!p[i].is_leaf) {
693             destroy_l2_mapping(&p[i], level - 1);
694         } else {
695             destroy_page_desc(p[i].ptr);
696         }
697     }
698     lp->is_leaf = 0;
699     lp->ptr = PHYS_MAP_NODE_NIL;
700 }
701
702 static void destroy_all_mappings(AddressSpaceDispatch *d)
703 {
704     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
705     phys_map_nodes_reset();
706 }
707
708 static uint16_t phys_section_add(MemoryRegionSection *section)
709 {
710     if (phys_sections_nb == phys_sections_nb_alloc) {
711         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
712         phys_sections = g_renew(MemoryRegionSection, phys_sections,
713                                 phys_sections_nb_alloc);
714     }
715     phys_sections[phys_sections_nb] = *section;
716     return phys_sections_nb++;
717 }
718
719 static void phys_sections_clear(void)
720 {
721     phys_sections_nb = 0;
722 }
723
724 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
725 {
726     subpage_t *subpage;
727     hwaddr base = section->offset_within_address_space
728         & TARGET_PAGE_MASK;
729     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
730     MemoryRegionSection subsection = {
731         .offset_within_address_space = base,
732         .size = TARGET_PAGE_SIZE,
733     };
734     hwaddr start, end;
735
736     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
737
738     if (!(existing->mr->subpage)) {
739         subpage = subpage_init(base);
740         subsection.mr = &subpage->iomem;
741         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
742                       phys_section_add(&subsection));
743     } else {
744         subpage = container_of(existing->mr, subpage_t, iomem);
745     }
746     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
747     end = start + section->size - 1;
748     subpage_register(subpage, start, end, phys_section_add(section));
749 }
750
751
752 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
753 {
754     hwaddr start_addr = section->offset_within_address_space;
755     ram_addr_t size = section->size;
756     hwaddr addr;
757     uint16_t section_index = phys_section_add(section);
758
759     assert(size);
760
761     addr = start_addr;
762     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
763                   section_index);
764 }
765
766 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
767 {
768     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
769     MemoryRegionSection now = *section, remain = *section;
770
771     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
772         || (now.size < TARGET_PAGE_SIZE)) {
773         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
774                        - now.offset_within_address_space,
775                        now.size);
776         register_subpage(d, &now);
777         remain.size -= now.size;
778         remain.offset_within_address_space += now.size;
779         remain.offset_within_region += now.size;
780     }
781     while (remain.size >= TARGET_PAGE_SIZE) {
782         now = remain;
783         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
784             now.size = TARGET_PAGE_SIZE;
785             register_subpage(d, &now);
786         } else {
787             now.size &= TARGET_PAGE_MASK;
788             register_multipage(d, &now);
789         }
790         remain.size -= now.size;
791         remain.offset_within_address_space += now.size;
792         remain.offset_within_region += now.size;
793     }
794     now = remain;
795     if (now.size) {
796         register_subpage(d, &now);
797     }
798 }
799
800 void qemu_flush_coalesced_mmio_buffer(void)
801 {
802     if (kvm_enabled())
803         kvm_flush_coalesced_mmio_buffer();
804 }
805
806 void qemu_mutex_lock_ramlist(void)
807 {
808     qemu_mutex_lock(&ram_list.mutex);
809 }
810
811 void qemu_mutex_unlock_ramlist(void)
812 {
813     qemu_mutex_unlock(&ram_list.mutex);
814 }
815
816 #if defined(__linux__) && !defined(TARGET_S390X)
817
818 #include <sys/vfs.h>
819
820 #define HUGETLBFS_MAGIC       0x958458f6
821
822 static long gethugepagesize(const char *path)
823 {
824     struct statfs fs;
825     int ret;
826
827     do {
828         ret = statfs(path, &fs);
829     } while (ret != 0 && errno == EINTR);
830
831     if (ret != 0) {
832         perror(path);
833         return 0;
834     }
835
836     if (fs.f_type != HUGETLBFS_MAGIC)
837         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
838
839     return fs.f_bsize;
840 }
841
842 static void *file_ram_alloc(RAMBlock *block,
843                             ram_addr_t memory,
844                             const char *path)
845 {
846     char *filename;
847     char *sanitized_name;
848     char *c;
849     void *area;
850     int fd;
851 #ifdef MAP_POPULATE
852     int flags;
853 #endif
854     unsigned long hpagesize;
855
856     hpagesize = gethugepagesize(path);
857     if (!hpagesize) {
858         return NULL;
859     }
860
861     if (memory < hpagesize) {
862         return NULL;
863     }
864
865     if (kvm_enabled() && !kvm_has_sync_mmu()) {
866         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
867         return NULL;
868     }
869
870     /* Make name safe to use with mkstemp by replacing '/' with '_'. */
871     sanitized_name = g_strdup(block->mr->name);
872     for (c = sanitized_name; *c != '\0'; c++) {
873         if (*c == '/')
874             *c = '_';
875     }
876
877     filename = g_strdup_printf("%s/qemu_back_mem.%s.XXXXXX", path,
878                                sanitized_name);
879     g_free(sanitized_name);
880
881     fd = mkstemp(filename);
882     if (fd < 0) {
883         perror("unable to create backing store for hugepages");
884         g_free(filename);
885         return NULL;
886     }
887     unlink(filename);
888     g_free(filename);
889
890     memory = (memory+hpagesize-1) & ~(hpagesize-1);
891
892     /*
893      * ftruncate is not supported by hugetlbfs in older
894      * hosts, so don't bother bailing out on errors.
895      * If anything goes wrong with it under other filesystems,
896      * mmap will fail.
897      */
898     if (ftruncate(fd, memory))
899         perror("ftruncate");
900
901 #ifdef MAP_POPULATE
902     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
903      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
904      * to sidestep this quirk.
905      */
906     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
907     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
908 #else
909     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
910 #endif
911     if (area == MAP_FAILED) {
912         perror("file_ram_alloc: can't mmap RAM pages");
913         close(fd);
914         return (NULL);
915     }
916     block->fd = fd;
917     return area;
918 }
919 #endif
920
921 static ram_addr_t find_ram_offset(ram_addr_t size)
922 {
923     RAMBlock *block, *next_block;
924     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
925
926     if (QTAILQ_EMPTY(&ram_list.blocks))
927         return 0;
928
929     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
930         ram_addr_t end, next = RAM_ADDR_MAX;
931
932         end = block->offset + block->length;
933
934         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
935             if (next_block->offset >= end) {
936                 next = MIN(next, next_block->offset);
937             }
938         }
939         if (next - end >= size && next - end < mingap) {
940             offset = end;
941             mingap = next - end;
942         }
943     }
944
945     if (offset == RAM_ADDR_MAX) {
946         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
947                 (uint64_t)size);
948         abort();
949     }
950
951     return offset;
952 }
953
954 ram_addr_t last_ram_offset(void)
955 {
956     RAMBlock *block;
957     ram_addr_t last = 0;
958
959     QTAILQ_FOREACH(block, &ram_list.blocks, next)
960         last = MAX(last, block->offset + block->length);
961
962     return last;
963 }
964
965 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
966 {
967     int ret;
968     QemuOpts *machine_opts;
969
970     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
971     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
972     if (machine_opts &&
973         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
974         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
975         if (ret) {
976             perror("qemu_madvise");
977             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
978                             "but dump_guest_core=off specified\n");
979         }
980     }
981 }
982
983 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
984 {
985     RAMBlock *new_block, *block;
986
987     new_block = NULL;
988     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
989         if (block->offset == addr) {
990             new_block = block;
991             break;
992         }
993     }
994     assert(new_block);
995     assert(!new_block->idstr[0]);
996
997     if (dev) {
998         char *id = qdev_get_dev_path(dev);
999         if (id) {
1000             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
1001             g_free(id);
1002         }
1003     }
1004     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
1005
1006     /* This assumes the iothread lock is taken here too.  */
1007     qemu_mutex_lock_ramlist();
1008     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1009         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1010             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1011                     new_block->idstr);
1012             abort();
1013         }
1014     }
1015     qemu_mutex_unlock_ramlist();
1016 }
1017
1018 static int memory_try_enable_merging(void *addr, size_t len)
1019 {
1020     QemuOpts *opts;
1021
1022     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1023     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1024         /* disabled by the user */
1025         return 0;
1026     }
1027
1028     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1029 }
1030
1031 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1032                                    MemoryRegion *mr)
1033 {
1034     RAMBlock *block, *new_block;
1035
1036     size = TARGET_PAGE_ALIGN(size);
1037     new_block = g_malloc0(sizeof(*new_block));
1038
1039     /* This assumes the iothread lock is taken here too.  */
1040     qemu_mutex_lock_ramlist();
1041     new_block->mr = mr;
1042     new_block->offset = find_ram_offset(size);
1043     if (host) {
1044         new_block->host = host;
1045         new_block->flags |= RAM_PREALLOC_MASK;
1046     } else {
1047         if (mem_path) {
1048 #if defined (__linux__) && !defined(TARGET_S390X)
1049             new_block->host = file_ram_alloc(new_block, size, mem_path);
1050             if (!new_block->host) {
1051                 new_block->host = qemu_vmalloc(size);
1052                 memory_try_enable_merging(new_block->host, size);
1053             }
1054 #else
1055             fprintf(stderr, "-mem-path option unsupported\n");
1056             exit(1);
1057 #endif
1058         } else {
1059             if (xen_enabled()) {
1060                 xen_ram_alloc(new_block->offset, size, mr);
1061             } else if (kvm_enabled()) {
1062                 /* some s390/kvm configurations have special constraints */
1063                 new_block->host = kvm_vmalloc(size);
1064             } else {
1065                 new_block->host = qemu_vmalloc(size);
1066             }
1067             memory_try_enable_merging(new_block->host, size);
1068         }
1069     }
1070     new_block->length = size;
1071
1072     /* Keep the list sorted from biggest to smallest block.  */
1073     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1074         if (block->length < new_block->length) {
1075             break;
1076         }
1077     }
1078     if (block) {
1079         QTAILQ_INSERT_BEFORE(block, new_block, next);
1080     } else {
1081         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1082     }
1083     ram_list.mru_block = NULL;
1084
1085     ram_list.version++;
1086     qemu_mutex_unlock_ramlist();
1087
1088     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1089                                        last_ram_offset() >> TARGET_PAGE_BITS);
1090     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1091            0, size >> TARGET_PAGE_BITS);
1092     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1093
1094     qemu_ram_setup_dump(new_block->host, size);
1095     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1096
1097     if (kvm_enabled())
1098         kvm_setup_guest_memory(new_block->host, size);
1099
1100     return new_block->offset;
1101 }
1102
1103 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1104 {
1105     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1106 }
1107
1108 void qemu_ram_free_from_ptr(ram_addr_t addr)
1109 {
1110     RAMBlock *block;
1111
1112     /* This assumes the iothread lock is taken here too.  */
1113     qemu_mutex_lock_ramlist();
1114     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1115         if (addr == block->offset) {
1116             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1117             ram_list.mru_block = NULL;
1118             ram_list.version++;
1119             g_free(block);
1120             break;
1121         }
1122     }
1123     qemu_mutex_unlock_ramlist();
1124 }
1125
1126 void qemu_ram_free(ram_addr_t addr)
1127 {
1128     RAMBlock *block;
1129
1130     /* This assumes the iothread lock is taken here too.  */
1131     qemu_mutex_lock_ramlist();
1132     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1133         if (addr == block->offset) {
1134             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1135             ram_list.mru_block = NULL;
1136             ram_list.version++;
1137             if (block->flags & RAM_PREALLOC_MASK) {
1138                 ;
1139             } else if (mem_path) {
1140 #if defined (__linux__) && !defined(TARGET_S390X)
1141                 if (block->fd) {
1142                     munmap(block->host, block->length);
1143                     close(block->fd);
1144                 } else {
1145                     qemu_vfree(block->host);
1146                 }
1147 #else
1148                 abort();
1149 #endif
1150             } else {
1151 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1152                 munmap(block->host, block->length);
1153 #else
1154                 if (xen_enabled()) {
1155                     xen_invalidate_map_cache_entry(block->host);
1156                 } else {
1157                     qemu_vfree(block->host);
1158                 }
1159 #endif
1160             }
1161             g_free(block);
1162             break;
1163         }
1164     }
1165     qemu_mutex_unlock_ramlist();
1166
1167 }
1168
1169 #ifndef _WIN32
1170 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1171 {
1172     RAMBlock *block;
1173     ram_addr_t offset;
1174     int flags;
1175     void *area, *vaddr;
1176
1177     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1178         offset = addr - block->offset;
1179         if (offset < block->length) {
1180             vaddr = block->host + offset;
1181             if (block->flags & RAM_PREALLOC_MASK) {
1182                 ;
1183             } else {
1184                 flags = MAP_FIXED;
1185                 munmap(vaddr, length);
1186                 if (mem_path) {
1187 #if defined(__linux__) && !defined(TARGET_S390X)
1188                     if (block->fd) {
1189 #ifdef MAP_POPULATE
1190                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1191                             MAP_PRIVATE;
1192 #else
1193                         flags |= MAP_PRIVATE;
1194 #endif
1195                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1196                                     flags, block->fd, offset);
1197                     } else {
1198                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1199                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1200                                     flags, -1, 0);
1201                     }
1202 #else
1203                     abort();
1204 #endif
1205                 } else {
1206 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1207                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1208                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1209                                 flags, -1, 0);
1210 #else
1211                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1212                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1213                                 flags, -1, 0);
1214 #endif
1215                 }
1216                 if (area != vaddr) {
1217                     fprintf(stderr, "Could not remap addr: "
1218                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1219                             length, addr);
1220                     exit(1);
1221                 }
1222                 memory_try_enable_merging(vaddr, length);
1223                 qemu_ram_setup_dump(vaddr, length);
1224             }
1225             return;
1226         }
1227     }
1228 }
1229 #endif /* !_WIN32 */
1230
1231 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1232    With the exception of the softmmu code in this file, this should
1233    only be used for local memory (e.g. video ram) that the device owns,
1234    and knows it isn't going to access beyond the end of the block.
1235
1236    It should not be used for general purpose DMA.
1237    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1238  */
1239 void *qemu_get_ram_ptr(ram_addr_t addr)
1240 {
1241     RAMBlock *block;
1242
1243     /* The list is protected by the iothread lock here.  */
1244     block = ram_list.mru_block;
1245     if (block && addr - block->offset < block->length) {
1246         goto found;
1247     }
1248     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1249         if (addr - block->offset < block->length) {
1250             goto found;
1251         }
1252     }
1253
1254     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1255     abort();
1256
1257 found:
1258     ram_list.mru_block = block;
1259     if (xen_enabled()) {
1260         /* We need to check if the requested address is in the RAM
1261          * because we don't want to map the entire memory in QEMU.
1262          * In that case just map until the end of the page.
1263          */
1264         if (block->offset == 0) {
1265             return xen_map_cache(addr, 0, 0);
1266         } else if (block->host == NULL) {
1267             block->host =
1268                 xen_map_cache(block->offset, block->length, 1);
1269         }
1270     }
1271     return block->host + (addr - block->offset);
1272 }
1273
1274 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1275  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1276  *
1277  * ??? Is this still necessary?
1278  */
1279 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1280 {
1281     RAMBlock *block;
1282
1283     /* The list is protected by the iothread lock here.  */
1284     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1285         if (addr - block->offset < block->length) {
1286             if (xen_enabled()) {
1287                 /* We need to check if the requested address is in the RAM
1288                  * because we don't want to map the entire memory in QEMU.
1289                  * In that case just map until the end of the page.
1290                  */
1291                 if (block->offset == 0) {
1292                     return xen_map_cache(addr, 0, 0);
1293                 } else if (block->host == NULL) {
1294                     block->host =
1295                         xen_map_cache(block->offset, block->length, 1);
1296                 }
1297             }
1298             return block->host + (addr - block->offset);
1299         }
1300     }
1301
1302     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1303     abort();
1304
1305     return NULL;
1306 }
1307
1308 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1309  * but takes a size argument */
1310 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1311 {
1312     if (*size == 0) {
1313         return NULL;
1314     }
1315     if (xen_enabled()) {
1316         return xen_map_cache(addr, *size, 1);
1317     } else {
1318         RAMBlock *block;
1319
1320         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1321             if (addr - block->offset < block->length) {
1322                 if (addr - block->offset + *size > block->length)
1323                     *size = block->length - addr + block->offset;
1324                 return block->host + (addr - block->offset);
1325             }
1326         }
1327
1328         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1329         abort();
1330     }
1331 }
1332
1333 void qemu_put_ram_ptr(void *addr)
1334 {
1335     trace_qemu_put_ram_ptr(addr);
1336 }
1337
1338 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1339 {
1340     RAMBlock *block;
1341     uint8_t *host = ptr;
1342
1343     if (xen_enabled()) {
1344         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1345         return 0;
1346     }
1347
1348     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1349         /* This case append when the block is not mapped. */
1350         if (block->host == NULL) {
1351             continue;
1352         }
1353         if (host - block->host < block->length) {
1354             *ram_addr = block->offset + (host - block->host);
1355             return 0;
1356         }
1357     }
1358
1359     return -1;
1360 }
1361
1362 /* Some of the softmmu routines need to translate from a host pointer
1363    (typically a TLB entry) back to a ram offset.  */
1364 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1365 {
1366     ram_addr_t ram_addr;
1367
1368     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1369         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1370         abort();
1371     }
1372     return ram_addr;
1373 }
1374
1375 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1376                                     unsigned size)
1377 {
1378 #ifdef DEBUG_UNASSIGNED
1379     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1380 #endif
1381 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1382     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1383 #endif
1384     return 0;
1385 }
1386
1387 static void unassigned_mem_write(void *opaque, hwaddr addr,
1388                                  uint64_t val, unsigned size)
1389 {
1390 #ifdef DEBUG_UNASSIGNED
1391     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1392 #endif
1393 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1394     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1395 #endif
1396 }
1397
1398 static const MemoryRegionOps unassigned_mem_ops = {
1399     .read = unassigned_mem_read,
1400     .write = unassigned_mem_write,
1401     .endianness = DEVICE_NATIVE_ENDIAN,
1402 };
1403
1404 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1405                                unsigned size)
1406 {
1407     abort();
1408 }
1409
1410 static void error_mem_write(void *opaque, hwaddr addr,
1411                             uint64_t value, unsigned size)
1412 {
1413     abort();
1414 }
1415
1416 static const MemoryRegionOps error_mem_ops = {
1417     .read = error_mem_read,
1418     .write = error_mem_write,
1419     .endianness = DEVICE_NATIVE_ENDIAN,
1420 };
1421
1422 static const MemoryRegionOps rom_mem_ops = {
1423     .read = error_mem_read,
1424     .write = unassigned_mem_write,
1425     .endianness = DEVICE_NATIVE_ENDIAN,
1426 };
1427
1428 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1429                                uint64_t val, unsigned size)
1430 {
1431     int dirty_flags;
1432     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1433     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1434 #if !defined(CONFIG_USER_ONLY)
1435         tb_invalidate_phys_page_fast(ram_addr, size);
1436         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1437 #endif
1438     }
1439     switch (size) {
1440     case 1:
1441         stb_p(qemu_get_ram_ptr(ram_addr), val);
1442         break;
1443     case 2:
1444         stw_p(qemu_get_ram_ptr(ram_addr), val);
1445         break;
1446     case 4:
1447         stl_p(qemu_get_ram_ptr(ram_addr), val);
1448         break;
1449     default:
1450         abort();
1451     }
1452     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1453     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1454     /* we remove the notdirty callback only if the code has been
1455        flushed */
1456     if (dirty_flags == 0xff)
1457         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1458 }
1459
1460 static const MemoryRegionOps notdirty_mem_ops = {
1461     .read = error_mem_read,
1462     .write = notdirty_mem_write,
1463     .endianness = DEVICE_NATIVE_ENDIAN,
1464 };
1465
1466 /* Generate a debug exception if a watchpoint has been hit.  */
1467 static void check_watchpoint(int offset, int len_mask, int flags)
1468 {
1469     CPUArchState *env = cpu_single_env;
1470     target_ulong pc, cs_base;
1471     target_ulong vaddr;
1472     CPUWatchpoint *wp;
1473     int cpu_flags;
1474
1475     if (env->watchpoint_hit) {
1476         /* We re-entered the check after replacing the TB. Now raise
1477          * the debug interrupt so that is will trigger after the
1478          * current instruction. */
1479         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1480         return;
1481     }
1482     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1483     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1484         if ((vaddr == (wp->vaddr & len_mask) ||
1485              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1486             wp->flags |= BP_WATCHPOINT_HIT;
1487             if (!env->watchpoint_hit) {
1488                 env->watchpoint_hit = wp;
1489                 tb_check_watchpoint(env);
1490                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1491                     env->exception_index = EXCP_DEBUG;
1492                     cpu_loop_exit(env);
1493                 } else {
1494                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1495                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1496                     cpu_resume_from_signal(env, NULL);
1497                 }
1498             }
1499         } else {
1500             wp->flags &= ~BP_WATCHPOINT_HIT;
1501         }
1502     }
1503 }
1504
1505 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1506    so these check for a hit then pass through to the normal out-of-line
1507    phys routines.  */
1508 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1509                                unsigned size)
1510 {
1511     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1512     switch (size) {
1513     case 1: return ldub_phys(addr);
1514     case 2: return lduw_phys(addr);
1515     case 4: return ldl_phys(addr);
1516     default: abort();
1517     }
1518 }
1519
1520 static void watch_mem_write(void *opaque, hwaddr addr,
1521                             uint64_t val, unsigned size)
1522 {
1523     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1524     switch (size) {
1525     case 1:
1526         stb_phys(addr, val);
1527         break;
1528     case 2:
1529         stw_phys(addr, val);
1530         break;
1531     case 4:
1532         stl_phys(addr, val);
1533         break;
1534     default: abort();
1535     }
1536 }
1537
1538 static const MemoryRegionOps watch_mem_ops = {
1539     .read = watch_mem_read,
1540     .write = watch_mem_write,
1541     .endianness = DEVICE_NATIVE_ENDIAN,
1542 };
1543
1544 static uint64_t subpage_read(void *opaque, hwaddr addr,
1545                              unsigned len)
1546 {
1547     subpage_t *mmio = opaque;
1548     unsigned int idx = SUBPAGE_IDX(addr);
1549     MemoryRegionSection *section;
1550 #if defined(DEBUG_SUBPAGE)
1551     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1552            mmio, len, addr, idx);
1553 #endif
1554
1555     section = &phys_sections[mmio->sub_section[idx]];
1556     addr += mmio->base;
1557     addr -= section->offset_within_address_space;
1558     addr += section->offset_within_region;
1559     return io_mem_read(section->mr, addr, len);
1560 }
1561
1562 static void subpage_write(void *opaque, hwaddr addr,
1563                           uint64_t value, unsigned len)
1564 {
1565     subpage_t *mmio = opaque;
1566     unsigned int idx = SUBPAGE_IDX(addr);
1567     MemoryRegionSection *section;
1568 #if defined(DEBUG_SUBPAGE)
1569     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1570            " idx %d value %"PRIx64"\n",
1571            __func__, mmio, len, addr, idx, value);
1572 #endif
1573
1574     section = &phys_sections[mmio->sub_section[idx]];
1575     addr += mmio->base;
1576     addr -= section->offset_within_address_space;
1577     addr += section->offset_within_region;
1578     io_mem_write(section->mr, addr, value, len);
1579 }
1580
1581 static const MemoryRegionOps subpage_ops = {
1582     .read = subpage_read,
1583     .write = subpage_write,
1584     .endianness = DEVICE_NATIVE_ENDIAN,
1585 };
1586
1587 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1588                                  unsigned size)
1589 {
1590     ram_addr_t raddr = addr;
1591     void *ptr = qemu_get_ram_ptr(raddr);
1592     switch (size) {
1593     case 1: return ldub_p(ptr);
1594     case 2: return lduw_p(ptr);
1595     case 4: return ldl_p(ptr);
1596     default: abort();
1597     }
1598 }
1599
1600 static void subpage_ram_write(void *opaque, hwaddr addr,
1601                               uint64_t value, unsigned size)
1602 {
1603     ram_addr_t raddr = addr;
1604     void *ptr = qemu_get_ram_ptr(raddr);
1605     switch (size) {
1606     case 1: return stb_p(ptr, value);
1607     case 2: return stw_p(ptr, value);
1608     case 4: return stl_p(ptr, value);
1609     default: abort();
1610     }
1611 }
1612
1613 static const MemoryRegionOps subpage_ram_ops = {
1614     .read = subpage_ram_read,
1615     .write = subpage_ram_write,
1616     .endianness = DEVICE_NATIVE_ENDIAN,
1617 };
1618
1619 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1620                              uint16_t section)
1621 {
1622     int idx, eidx;
1623
1624     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1625         return -1;
1626     idx = SUBPAGE_IDX(start);
1627     eidx = SUBPAGE_IDX(end);
1628 #if defined(DEBUG_SUBPAGE)
1629     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1630            mmio, start, end, idx, eidx, memory);
1631 #endif
1632     if (memory_region_is_ram(phys_sections[section].mr)) {
1633         MemoryRegionSection new_section = phys_sections[section];
1634         new_section.mr = &io_mem_subpage_ram;
1635         section = phys_section_add(&new_section);
1636     }
1637     for (; idx <= eidx; idx++) {
1638         mmio->sub_section[idx] = section;
1639     }
1640
1641     return 0;
1642 }
1643
1644 static subpage_t *subpage_init(hwaddr base)
1645 {
1646     subpage_t *mmio;
1647
1648     mmio = g_malloc0(sizeof(subpage_t));
1649
1650     mmio->base = base;
1651     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1652                           "subpage", TARGET_PAGE_SIZE);
1653     mmio->iomem.subpage = true;
1654 #if defined(DEBUG_SUBPAGE)
1655     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1656            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1657 #endif
1658     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1659
1660     return mmio;
1661 }
1662
1663 static uint16_t dummy_section(MemoryRegion *mr)
1664 {
1665     MemoryRegionSection section = {
1666         .mr = mr,
1667         .offset_within_address_space = 0,
1668         .offset_within_region = 0,
1669         .size = UINT64_MAX,
1670     };
1671
1672     return phys_section_add(&section);
1673 }
1674
1675 MemoryRegion *iotlb_to_region(hwaddr index)
1676 {
1677     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1678 }
1679
1680 static void io_mem_init(void)
1681 {
1682     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1683     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1684     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1685                           "unassigned", UINT64_MAX);
1686     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1687                           "notdirty", UINT64_MAX);
1688     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1689                           "subpage-ram", UINT64_MAX);
1690     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1691                           "watch", UINT64_MAX);
1692 }
1693
1694 static void mem_begin(MemoryListener *listener)
1695 {
1696     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1697
1698     destroy_all_mappings(d);
1699     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1700 }
1701
1702 static void core_begin(MemoryListener *listener)
1703 {
1704     phys_sections_clear();
1705     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1706     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1707     phys_section_rom = dummy_section(&io_mem_rom);
1708     phys_section_watch = dummy_section(&io_mem_watch);
1709 }
1710
1711 static void tcg_commit(MemoryListener *listener)
1712 {
1713     CPUArchState *env;
1714
1715     /* since each CPU stores ram addresses in its TLB cache, we must
1716        reset the modified entries */
1717     /* XXX: slow ! */
1718     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1719         tlb_flush(env, 1);
1720     }
1721 }
1722
1723 static void core_log_global_start(MemoryListener *listener)
1724 {
1725     cpu_physical_memory_set_dirty_tracking(1);
1726 }
1727
1728 static void core_log_global_stop(MemoryListener *listener)
1729 {
1730     cpu_physical_memory_set_dirty_tracking(0);
1731 }
1732
1733 static void io_region_add(MemoryListener *listener,
1734                           MemoryRegionSection *section)
1735 {
1736     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1737
1738     mrio->mr = section->mr;
1739     mrio->offset = section->offset_within_region;
1740     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1741                  section->offset_within_address_space, section->size);
1742     ioport_register(&mrio->iorange);
1743 }
1744
1745 static void io_region_del(MemoryListener *listener,
1746                           MemoryRegionSection *section)
1747 {
1748     isa_unassign_ioport(section->offset_within_address_space, section->size);
1749 }
1750
1751 static MemoryListener core_memory_listener = {
1752     .begin = core_begin,
1753     .log_global_start = core_log_global_start,
1754     .log_global_stop = core_log_global_stop,
1755     .priority = 1,
1756 };
1757
1758 static MemoryListener io_memory_listener = {
1759     .region_add = io_region_add,
1760     .region_del = io_region_del,
1761     .priority = 0,
1762 };
1763
1764 static MemoryListener tcg_memory_listener = {
1765     .commit = tcg_commit,
1766 };
1767
1768 void address_space_init_dispatch(AddressSpace *as)
1769 {
1770     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1771
1772     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1773     d->listener = (MemoryListener) {
1774         .begin = mem_begin,
1775         .region_add = mem_add,
1776         .region_nop = mem_add,
1777         .priority = 0,
1778     };
1779     as->dispatch = d;
1780     memory_listener_register(&d->listener, as);
1781 }
1782
1783 void address_space_destroy_dispatch(AddressSpace *as)
1784 {
1785     AddressSpaceDispatch *d = as->dispatch;
1786
1787     memory_listener_unregister(&d->listener);
1788     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1789     g_free(d);
1790     as->dispatch = NULL;
1791 }
1792
1793 static void memory_map_init(void)
1794 {
1795     system_memory = g_malloc(sizeof(*system_memory));
1796     memory_region_init(system_memory, "system", INT64_MAX);
1797     address_space_init(&address_space_memory, system_memory);
1798     address_space_memory.name = "memory";
1799
1800     system_io = g_malloc(sizeof(*system_io));
1801     memory_region_init(system_io, "io", 65536);
1802     address_space_init(&address_space_io, system_io);
1803     address_space_io.name = "I/O";
1804
1805     memory_listener_register(&core_memory_listener, &address_space_memory);
1806     memory_listener_register(&io_memory_listener, &address_space_io);
1807     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1808
1809     dma_context_init(&dma_context_memory, &address_space_memory,
1810                      NULL, NULL, NULL);
1811 }
1812
1813 MemoryRegion *get_system_memory(void)
1814 {
1815     return system_memory;
1816 }
1817
1818 MemoryRegion *get_system_io(void)
1819 {
1820     return system_io;
1821 }
1822
1823 #endif /* !defined(CONFIG_USER_ONLY) */
1824
1825 /* physical memory access (slow version, mainly for debug) */
1826 #if defined(CONFIG_USER_ONLY)
1827 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1828                         uint8_t *buf, int len, int is_write)
1829 {
1830     int l, flags;
1831     target_ulong page;
1832     void * p;
1833
1834     while (len > 0) {
1835         page = addr & TARGET_PAGE_MASK;
1836         l = (page + TARGET_PAGE_SIZE) - addr;
1837         if (l > len)
1838             l = len;
1839         flags = page_get_flags(page);
1840         if (!(flags & PAGE_VALID))
1841             return -1;
1842         if (is_write) {
1843             if (!(flags & PAGE_WRITE))
1844                 return -1;
1845             /* XXX: this code should not depend on lock_user */
1846             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1847                 return -1;
1848             memcpy(p, buf, l);
1849             unlock_user(p, addr, l);
1850         } else {
1851             if (!(flags & PAGE_READ))
1852                 return -1;
1853             /* XXX: this code should not depend on lock_user */
1854             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1855                 return -1;
1856             memcpy(buf, p, l);
1857             unlock_user(p, addr, 0);
1858         }
1859         len -= l;
1860         buf += l;
1861         addr += l;
1862     }
1863     return 0;
1864 }
1865
1866 #else
1867
1868 static void invalidate_and_set_dirty(hwaddr addr,
1869                                      hwaddr length)
1870 {
1871     if (!cpu_physical_memory_is_dirty(addr)) {
1872         /* invalidate code */
1873         tb_invalidate_phys_page_range(addr, addr + length, 0);
1874         /* set dirty bit */
1875         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1876     }
1877     xen_modified_memory(addr, length);
1878 }
1879
1880 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1881                       int len, bool is_write)
1882 {
1883     AddressSpaceDispatch *d = as->dispatch;
1884     int l;
1885     uint8_t *ptr;
1886     uint32_t val;
1887     hwaddr page;
1888     MemoryRegionSection *section;
1889
1890     while (len > 0) {
1891         page = addr & TARGET_PAGE_MASK;
1892         l = (page + TARGET_PAGE_SIZE) - addr;
1893         if (l > len)
1894             l = len;
1895         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1896
1897         if (is_write) {
1898             if (!memory_region_is_ram(section->mr)) {
1899                 hwaddr addr1;
1900                 addr1 = memory_region_section_addr(section, addr);
1901                 /* XXX: could force cpu_single_env to NULL to avoid
1902                    potential bugs */
1903                 if (l >= 4 && ((addr1 & 3) == 0)) {
1904                     /* 32 bit write access */
1905                     val = ldl_p(buf);
1906                     io_mem_write(section->mr, addr1, val, 4);
1907                     l = 4;
1908                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1909                     /* 16 bit write access */
1910                     val = lduw_p(buf);
1911                     io_mem_write(section->mr, addr1, val, 2);
1912                     l = 2;
1913                 } else {
1914                     /* 8 bit write access */
1915                     val = ldub_p(buf);
1916                     io_mem_write(section->mr, addr1, val, 1);
1917                     l = 1;
1918                 }
1919             } else if (!section->readonly) {
1920                 ram_addr_t addr1;
1921                 addr1 = memory_region_get_ram_addr(section->mr)
1922                     + memory_region_section_addr(section, addr);
1923                 /* RAM case */
1924                 ptr = qemu_get_ram_ptr(addr1);
1925                 memcpy(ptr, buf, l);
1926                 invalidate_and_set_dirty(addr1, l);
1927                 qemu_put_ram_ptr(ptr);
1928             }
1929         } else {
1930             if (!(memory_region_is_ram(section->mr) ||
1931                   memory_region_is_romd(section->mr))) {
1932                 hwaddr addr1;
1933                 /* I/O case */
1934                 addr1 = memory_region_section_addr(section, addr);
1935                 if (l >= 4 && ((addr1 & 3) == 0)) {
1936                     /* 32 bit read access */
1937                     val = io_mem_read(section->mr, addr1, 4);
1938                     stl_p(buf, val);
1939                     l = 4;
1940                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1941                     /* 16 bit read access */
1942                     val = io_mem_read(section->mr, addr1, 2);
1943                     stw_p(buf, val);
1944                     l = 2;
1945                 } else {
1946                     /* 8 bit read access */
1947                     val = io_mem_read(section->mr, addr1, 1);
1948                     stb_p(buf, val);
1949                     l = 1;
1950                 }
1951             } else {
1952                 /* RAM case */
1953                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1954                                        + memory_region_section_addr(section,
1955                                                                     addr));
1956                 memcpy(buf, ptr, l);
1957                 qemu_put_ram_ptr(ptr);
1958             }
1959         }
1960         len -= l;
1961         buf += l;
1962         addr += l;
1963     }
1964 }
1965
1966 void address_space_write(AddressSpace *as, hwaddr addr,
1967                          const uint8_t *buf, int len)
1968 {
1969     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1970 }
1971
1972 /**
1973  * address_space_read: read from an address space.
1974  *
1975  * @as: #AddressSpace to be accessed
1976  * @addr: address within that address space
1977  * @buf: buffer with the data transferred
1978  */
1979 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1980 {
1981     address_space_rw(as, addr, buf, len, false);
1982 }
1983
1984
1985 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1986                             int len, int is_write)
1987 {
1988     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1989 }
1990
1991 /* used for ROM loading : can write in RAM and ROM */
1992 void cpu_physical_memory_write_rom(hwaddr addr,
1993                                    const uint8_t *buf, int len)
1994 {
1995     AddressSpaceDispatch *d = address_space_memory.dispatch;
1996     int l;
1997     uint8_t *ptr;
1998     hwaddr page;
1999     MemoryRegionSection *section;
2000
2001     while (len > 0) {
2002         page = addr & TARGET_PAGE_MASK;
2003         l = (page + TARGET_PAGE_SIZE) - addr;
2004         if (l > len)
2005             l = len;
2006         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2007
2008         if (!(memory_region_is_ram(section->mr) ||
2009               memory_region_is_romd(section->mr))) {
2010             /* do nothing */
2011         } else {
2012             unsigned long addr1;
2013             addr1 = memory_region_get_ram_addr(section->mr)
2014                 + memory_region_section_addr(section, addr);
2015             /* ROM/RAM case */
2016             ptr = qemu_get_ram_ptr(addr1);
2017             memcpy(ptr, buf, l);
2018             invalidate_and_set_dirty(addr1, l);
2019             qemu_put_ram_ptr(ptr);
2020         }
2021         len -= l;
2022         buf += l;
2023         addr += l;
2024     }
2025 }
2026
2027 typedef struct {
2028     void *buffer;
2029     hwaddr addr;
2030     hwaddr len;
2031 } BounceBuffer;
2032
2033 static BounceBuffer bounce;
2034
2035 typedef struct MapClient {
2036     void *opaque;
2037     void (*callback)(void *opaque);
2038     QLIST_ENTRY(MapClient) link;
2039 } MapClient;
2040
2041 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2042     = QLIST_HEAD_INITIALIZER(map_client_list);
2043
2044 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2045 {
2046     MapClient *client = g_malloc(sizeof(*client));
2047
2048     client->opaque = opaque;
2049     client->callback = callback;
2050     QLIST_INSERT_HEAD(&map_client_list, client, link);
2051     return client;
2052 }
2053
2054 static void cpu_unregister_map_client(void *_client)
2055 {
2056     MapClient *client = (MapClient *)_client;
2057
2058     QLIST_REMOVE(client, link);
2059     g_free(client);
2060 }
2061
2062 static void cpu_notify_map_clients(void)
2063 {
2064     MapClient *client;
2065
2066     while (!QLIST_EMPTY(&map_client_list)) {
2067         client = QLIST_FIRST(&map_client_list);
2068         client->callback(client->opaque);
2069         cpu_unregister_map_client(client);
2070     }
2071 }
2072
2073 /* Map a physical memory region into a host virtual address.
2074  * May map a subset of the requested range, given by and returned in *plen.
2075  * May return NULL if resources needed to perform the mapping are exhausted.
2076  * Use only for reads OR writes - not for read-modify-write operations.
2077  * Use cpu_register_map_client() to know when retrying the map operation is
2078  * likely to succeed.
2079  */
2080 void *address_space_map(AddressSpace *as,
2081                         hwaddr addr,
2082                         hwaddr *plen,
2083                         bool is_write)
2084 {
2085     AddressSpaceDispatch *d = as->dispatch;
2086     hwaddr len = *plen;
2087     hwaddr todo = 0;
2088     int l;
2089     hwaddr page;
2090     MemoryRegionSection *section;
2091     ram_addr_t raddr = RAM_ADDR_MAX;
2092     ram_addr_t rlen;
2093     void *ret;
2094
2095     while (len > 0) {
2096         page = addr & TARGET_PAGE_MASK;
2097         l = (page + TARGET_PAGE_SIZE) - addr;
2098         if (l > len)
2099             l = len;
2100         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2101
2102         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2103             if (todo || bounce.buffer) {
2104                 break;
2105             }
2106             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2107             bounce.addr = addr;
2108             bounce.len = l;
2109             if (!is_write) {
2110                 address_space_read(as, addr, bounce.buffer, l);
2111             }
2112
2113             *plen = l;
2114             return bounce.buffer;
2115         }
2116         if (!todo) {
2117             raddr = memory_region_get_ram_addr(section->mr)
2118                 + memory_region_section_addr(section, addr);
2119         }
2120
2121         len -= l;
2122         addr += l;
2123         todo += l;
2124     }
2125     rlen = todo;
2126     ret = qemu_ram_ptr_length(raddr, &rlen);
2127     *plen = rlen;
2128     return ret;
2129 }
2130
2131 /* Unmaps a memory region previously mapped by address_space_map().
2132  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2133  * the amount of memory that was actually read or written by the caller.
2134  */
2135 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2136                          int is_write, hwaddr access_len)
2137 {
2138     if (buffer != bounce.buffer) {
2139         if (is_write) {
2140             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2141             while (access_len) {
2142                 unsigned l;
2143                 l = TARGET_PAGE_SIZE;
2144                 if (l > access_len)
2145                     l = access_len;
2146                 invalidate_and_set_dirty(addr1, l);
2147                 addr1 += l;
2148                 access_len -= l;
2149             }
2150         }
2151         if (xen_enabled()) {
2152             xen_invalidate_map_cache_entry(buffer);
2153         }
2154         return;
2155     }
2156     if (is_write) {
2157         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2158     }
2159     qemu_vfree(bounce.buffer);
2160     bounce.buffer = NULL;
2161     cpu_notify_map_clients();
2162 }
2163
2164 void *cpu_physical_memory_map(hwaddr addr,
2165                               hwaddr *plen,
2166                               int is_write)
2167 {
2168     return address_space_map(&address_space_memory, addr, plen, is_write);
2169 }
2170
2171 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2172                                int is_write, hwaddr access_len)
2173 {
2174     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2175 }
2176
2177 /* warning: addr must be aligned */
2178 static inline uint32_t ldl_phys_internal(hwaddr addr,
2179                                          enum device_endian endian)
2180 {
2181     uint8_t *ptr;
2182     uint32_t val;
2183     MemoryRegionSection *section;
2184
2185     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2186
2187     if (!(memory_region_is_ram(section->mr) ||
2188           memory_region_is_romd(section->mr))) {
2189         /* I/O case */
2190         addr = memory_region_section_addr(section, addr);
2191         val = io_mem_read(section->mr, addr, 4);
2192 #if defined(TARGET_WORDS_BIGENDIAN)
2193         if (endian == DEVICE_LITTLE_ENDIAN) {
2194             val = bswap32(val);
2195         }
2196 #else
2197         if (endian == DEVICE_BIG_ENDIAN) {
2198             val = bswap32(val);
2199         }
2200 #endif
2201     } else {
2202         /* RAM case */
2203         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2204                                 & TARGET_PAGE_MASK)
2205                                + memory_region_section_addr(section, addr));
2206         switch (endian) {
2207         case DEVICE_LITTLE_ENDIAN:
2208             val = ldl_le_p(ptr);
2209             break;
2210         case DEVICE_BIG_ENDIAN:
2211             val = ldl_be_p(ptr);
2212             break;
2213         default:
2214             val = ldl_p(ptr);
2215             break;
2216         }
2217     }
2218     return val;
2219 }
2220
2221 uint32_t ldl_phys(hwaddr addr)
2222 {
2223     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2224 }
2225
2226 uint32_t ldl_le_phys(hwaddr addr)
2227 {
2228     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2229 }
2230
2231 uint32_t ldl_be_phys(hwaddr addr)
2232 {
2233     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2234 }
2235
2236 /* warning: addr must be aligned */
2237 static inline uint64_t ldq_phys_internal(hwaddr addr,
2238                                          enum device_endian endian)
2239 {
2240     uint8_t *ptr;
2241     uint64_t val;
2242     MemoryRegionSection *section;
2243
2244     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2245
2246     if (!(memory_region_is_ram(section->mr) ||
2247           memory_region_is_romd(section->mr))) {
2248         /* I/O case */
2249         addr = memory_region_section_addr(section, addr);
2250
2251         /* XXX This is broken when device endian != cpu endian.
2252                Fix and add "endian" variable check */
2253 #ifdef TARGET_WORDS_BIGENDIAN
2254         val = io_mem_read(section->mr, addr, 4) << 32;
2255         val |= io_mem_read(section->mr, addr + 4, 4);
2256 #else
2257         val = io_mem_read(section->mr, addr, 4);
2258         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2259 #endif
2260     } else {
2261         /* RAM case */
2262         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2263                                 & TARGET_PAGE_MASK)
2264                                + memory_region_section_addr(section, addr));
2265         switch (endian) {
2266         case DEVICE_LITTLE_ENDIAN:
2267             val = ldq_le_p(ptr);
2268             break;
2269         case DEVICE_BIG_ENDIAN:
2270             val = ldq_be_p(ptr);
2271             break;
2272         default:
2273             val = ldq_p(ptr);
2274             break;
2275         }
2276     }
2277     return val;
2278 }
2279
2280 uint64_t ldq_phys(hwaddr addr)
2281 {
2282     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2283 }
2284
2285 uint64_t ldq_le_phys(hwaddr addr)
2286 {
2287     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2288 }
2289
2290 uint64_t ldq_be_phys(hwaddr addr)
2291 {
2292     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2293 }
2294
2295 /* XXX: optimize */
2296 uint32_t ldub_phys(hwaddr addr)
2297 {
2298     uint8_t val;
2299     cpu_physical_memory_read(addr, &val, 1);
2300     return val;
2301 }
2302
2303 /* warning: addr must be aligned */
2304 static inline uint32_t lduw_phys_internal(hwaddr addr,
2305                                           enum device_endian endian)
2306 {
2307     uint8_t *ptr;
2308     uint64_t val;
2309     MemoryRegionSection *section;
2310
2311     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2312
2313     if (!(memory_region_is_ram(section->mr) ||
2314           memory_region_is_romd(section->mr))) {
2315         /* I/O case */
2316         addr = memory_region_section_addr(section, addr);
2317         val = io_mem_read(section->mr, addr, 2);
2318 #if defined(TARGET_WORDS_BIGENDIAN)
2319         if (endian == DEVICE_LITTLE_ENDIAN) {
2320             val = bswap16(val);
2321         }
2322 #else
2323         if (endian == DEVICE_BIG_ENDIAN) {
2324             val = bswap16(val);
2325         }
2326 #endif
2327     } else {
2328         /* RAM case */
2329         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2330                                 & TARGET_PAGE_MASK)
2331                                + memory_region_section_addr(section, addr));
2332         switch (endian) {
2333         case DEVICE_LITTLE_ENDIAN:
2334             val = lduw_le_p(ptr);
2335             break;
2336         case DEVICE_BIG_ENDIAN:
2337             val = lduw_be_p(ptr);
2338             break;
2339         default:
2340             val = lduw_p(ptr);
2341             break;
2342         }
2343     }
2344     return val;
2345 }
2346
2347 uint32_t lduw_phys(hwaddr addr)
2348 {
2349     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2350 }
2351
2352 uint32_t lduw_le_phys(hwaddr addr)
2353 {
2354     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2355 }
2356
2357 uint32_t lduw_be_phys(hwaddr addr)
2358 {
2359     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2360 }
2361
2362 /* warning: addr must be aligned. The ram page is not masked as dirty
2363    and the code inside is not invalidated. It is useful if the dirty
2364    bits are used to track modified PTEs */
2365 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2366 {
2367     uint8_t *ptr;
2368     MemoryRegionSection *section;
2369
2370     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2371
2372     if (!memory_region_is_ram(section->mr) || section->readonly) {
2373         addr = memory_region_section_addr(section, addr);
2374         if (memory_region_is_ram(section->mr)) {
2375             section = &phys_sections[phys_section_rom];
2376         }
2377         io_mem_write(section->mr, addr, val, 4);
2378     } else {
2379         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2380                                & TARGET_PAGE_MASK)
2381             + memory_region_section_addr(section, addr);
2382         ptr = qemu_get_ram_ptr(addr1);
2383         stl_p(ptr, val);
2384
2385         if (unlikely(in_migration)) {
2386             if (!cpu_physical_memory_is_dirty(addr1)) {
2387                 /* invalidate code */
2388                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2389                 /* set dirty bit */
2390                 cpu_physical_memory_set_dirty_flags(
2391                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2392             }
2393         }
2394     }
2395 }
2396
2397 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2398 {
2399     uint8_t *ptr;
2400     MemoryRegionSection *section;
2401
2402     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2403
2404     if (!memory_region_is_ram(section->mr) || section->readonly) {
2405         addr = memory_region_section_addr(section, addr);
2406         if (memory_region_is_ram(section->mr)) {
2407             section = &phys_sections[phys_section_rom];
2408         }
2409 #ifdef TARGET_WORDS_BIGENDIAN
2410         io_mem_write(section->mr, addr, val >> 32, 4);
2411         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2412 #else
2413         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2414         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2415 #endif
2416     } else {
2417         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2418                                 & TARGET_PAGE_MASK)
2419                                + memory_region_section_addr(section, addr));
2420         stq_p(ptr, val);
2421     }
2422 }
2423
2424 /* warning: addr must be aligned */
2425 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2426                                      enum device_endian endian)
2427 {
2428     uint8_t *ptr;
2429     MemoryRegionSection *section;
2430
2431     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2432
2433     if (!memory_region_is_ram(section->mr) || section->readonly) {
2434         addr = memory_region_section_addr(section, addr);
2435         if (memory_region_is_ram(section->mr)) {
2436             section = &phys_sections[phys_section_rom];
2437         }
2438 #if defined(TARGET_WORDS_BIGENDIAN)
2439         if (endian == DEVICE_LITTLE_ENDIAN) {
2440             val = bswap32(val);
2441         }
2442 #else
2443         if (endian == DEVICE_BIG_ENDIAN) {
2444             val = bswap32(val);
2445         }
2446 #endif
2447         io_mem_write(section->mr, addr, val, 4);
2448     } else {
2449         unsigned long addr1;
2450         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2451             + memory_region_section_addr(section, addr);
2452         /* RAM case */
2453         ptr = qemu_get_ram_ptr(addr1);
2454         switch (endian) {
2455         case DEVICE_LITTLE_ENDIAN:
2456             stl_le_p(ptr, val);
2457             break;
2458         case DEVICE_BIG_ENDIAN:
2459             stl_be_p(ptr, val);
2460             break;
2461         default:
2462             stl_p(ptr, val);
2463             break;
2464         }
2465         invalidate_and_set_dirty(addr1, 4);
2466     }
2467 }
2468
2469 void stl_phys(hwaddr addr, uint32_t val)
2470 {
2471     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2472 }
2473
2474 void stl_le_phys(hwaddr addr, uint32_t val)
2475 {
2476     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2477 }
2478
2479 void stl_be_phys(hwaddr addr, uint32_t val)
2480 {
2481     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2482 }
2483
2484 /* XXX: optimize */
2485 void stb_phys(hwaddr addr, uint32_t val)
2486 {
2487     uint8_t v = val;
2488     cpu_physical_memory_write(addr, &v, 1);
2489 }
2490
2491 /* warning: addr must be aligned */
2492 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2493                                      enum device_endian endian)
2494 {
2495     uint8_t *ptr;
2496     MemoryRegionSection *section;
2497
2498     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2499
2500     if (!memory_region_is_ram(section->mr) || section->readonly) {
2501         addr = memory_region_section_addr(section, addr);
2502         if (memory_region_is_ram(section->mr)) {
2503             section = &phys_sections[phys_section_rom];
2504         }
2505 #if defined(TARGET_WORDS_BIGENDIAN)
2506         if (endian == DEVICE_LITTLE_ENDIAN) {
2507             val = bswap16(val);
2508         }
2509 #else
2510         if (endian == DEVICE_BIG_ENDIAN) {
2511             val = bswap16(val);
2512         }
2513 #endif
2514         io_mem_write(section->mr, addr, val, 2);
2515     } else {
2516         unsigned long addr1;
2517         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2518             + memory_region_section_addr(section, addr);
2519         /* RAM case */
2520         ptr = qemu_get_ram_ptr(addr1);
2521         switch (endian) {
2522         case DEVICE_LITTLE_ENDIAN:
2523             stw_le_p(ptr, val);
2524             break;
2525         case DEVICE_BIG_ENDIAN:
2526             stw_be_p(ptr, val);
2527             break;
2528         default:
2529             stw_p(ptr, val);
2530             break;
2531         }
2532         invalidate_and_set_dirty(addr1, 2);
2533     }
2534 }
2535
2536 void stw_phys(hwaddr addr, uint32_t val)
2537 {
2538     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2539 }
2540
2541 void stw_le_phys(hwaddr addr, uint32_t val)
2542 {
2543     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2544 }
2545
2546 void stw_be_phys(hwaddr addr, uint32_t val)
2547 {
2548     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2549 }
2550
2551 /* XXX: optimize */
2552 void stq_phys(hwaddr addr, uint64_t val)
2553 {
2554     val = tswap64(val);
2555     cpu_physical_memory_write(addr, &val, 8);
2556 }
2557
2558 void stq_le_phys(hwaddr addr, uint64_t val)
2559 {
2560     val = cpu_to_le64(val);
2561     cpu_physical_memory_write(addr, &val, 8);
2562 }
2563
2564 void stq_be_phys(hwaddr addr, uint64_t val)
2565 {
2566     val = cpu_to_be64(val);
2567     cpu_physical_memory_write(addr, &val, 8);
2568 }
2569
2570 /* virtual memory access for debug (includes writing to ROM) */
2571 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2572                         uint8_t *buf, int len, int is_write)
2573 {
2574     int l;
2575     hwaddr phys_addr;
2576     target_ulong page;
2577
2578     while (len > 0) {
2579         page = addr & TARGET_PAGE_MASK;
2580         phys_addr = cpu_get_phys_page_debug(env, page);
2581         /* if no physical page mapped, return an error */
2582         if (phys_addr == -1)
2583             return -1;
2584         l = (page + TARGET_PAGE_SIZE) - addr;
2585         if (l > len)
2586             l = len;
2587         phys_addr += (addr & ~TARGET_PAGE_MASK);
2588         if (is_write)
2589             cpu_physical_memory_write_rom(phys_addr, buf, l);
2590         else
2591             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2592         len -= l;
2593         buf += l;
2594         addr += l;
2595     }
2596     return 0;
2597 }
2598 #endif
2599
2600 #if !defined(CONFIG_USER_ONLY)
2601
2602 /*
2603  * A helper function for the _utterly broken_ virtio device model to find out if
2604  * it's running on a big endian machine. Don't do this at home kids!
2605  */
2606 bool virtio_is_big_endian(void);
2607 bool virtio_is_big_endian(void)
2608 {
2609 #if defined(TARGET_WORDS_BIGENDIAN)
2610     return true;
2611 #else
2612     return false;
2613 #endif
2614 }
2615
2616 #endif
2617
2618 #ifndef CONFIG_USER_ONLY
2619 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2620 {
2621     MemoryRegionSection *section;
2622
2623     section = phys_page_find(address_space_memory.dispatch,
2624                              phys_addr >> TARGET_PAGE_BITS);
2625
2626     return !(memory_region_is_ram(section->mr) ||
2627              memory_region_is_romd(section->mr));
2628 }
2629 #endif
This page took 0.16354 seconds and 4 git commands to generate.