]> Git Repo - qemu.git/blob - exec.c
s390: Drop set_bit usage in virtio_ccw.
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUArchState *env = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     env->interrupt_request &= ~0x01;
231     tlb_flush(env, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUArchState),
244         VMSTATE_UINT32(interrupt_request, CPUArchState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #endif
249
250 CPUState *qemu_get_cpu(int index)
251 {
252     CPUArchState *env = first_cpu;
253     CPUState *cpu = NULL;
254
255     while (env) {
256         cpu = ENV_GET_CPU(env);
257         if (cpu->cpu_index == index) {
258             break;
259         }
260         env = env->next_cpu;
261     }
262
263     return cpu;
264 }
265
266 void cpu_exec_init(CPUArchState *env)
267 {
268     CPUState *cpu = ENV_GET_CPU(env);
269     CPUArchState **penv;
270     int cpu_index;
271
272 #if defined(CONFIG_USER_ONLY)
273     cpu_list_lock();
274 #endif
275     env->next_cpu = NULL;
276     penv = &first_cpu;
277     cpu_index = 0;
278     while (*penv != NULL) {
279         penv = &(*penv)->next_cpu;
280         cpu_index++;
281     }
282     cpu->cpu_index = cpu_index;
283     cpu->numa_node = 0;
284     QTAILQ_INIT(&env->breakpoints);
285     QTAILQ_INIT(&env->watchpoints);
286 #ifndef CONFIG_USER_ONLY
287     cpu->thread_id = qemu_get_thread_id();
288 #endif
289     *penv = env;
290 #if defined(CONFIG_USER_ONLY)
291     cpu_list_unlock();
292 #endif
293 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
294     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
295     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
296                     cpu_save, cpu_load, env);
297 #endif
298 }
299
300 #if defined(TARGET_HAS_ICE)
301 #if defined(CONFIG_USER_ONLY)
302 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
303 {
304     tb_invalidate_phys_page_range(pc, pc + 1, 0);
305 }
306 #else
307 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
308 {
309     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
310             (pc & ~TARGET_PAGE_MASK));
311 }
312 #endif
313 #endif /* TARGET_HAS_ICE */
314
315 #if defined(CONFIG_USER_ONLY)
316 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
317
318 {
319 }
320
321 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
322                           int flags, CPUWatchpoint **watchpoint)
323 {
324     return -ENOSYS;
325 }
326 #else
327 /* Add a watchpoint.  */
328 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
329                           int flags, CPUWatchpoint **watchpoint)
330 {
331     target_ulong len_mask = ~(len - 1);
332     CPUWatchpoint *wp;
333
334     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
335     if ((len & (len - 1)) || (addr & ~len_mask) ||
336             len == 0 || len > TARGET_PAGE_SIZE) {
337         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
338                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
339         return -EINVAL;
340     }
341     wp = g_malloc(sizeof(*wp));
342
343     wp->vaddr = addr;
344     wp->len_mask = len_mask;
345     wp->flags = flags;
346
347     /* keep all GDB-injected watchpoints in front */
348     if (flags & BP_GDB)
349         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
350     else
351         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
352
353     tlb_flush_page(env, addr);
354
355     if (watchpoint)
356         *watchpoint = wp;
357     return 0;
358 }
359
360 /* Remove a specific watchpoint.  */
361 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
362                           int flags)
363 {
364     target_ulong len_mask = ~(len - 1);
365     CPUWatchpoint *wp;
366
367     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
368         if (addr == wp->vaddr && len_mask == wp->len_mask
369                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
370             cpu_watchpoint_remove_by_ref(env, wp);
371             return 0;
372         }
373     }
374     return -ENOENT;
375 }
376
377 /* Remove a specific watchpoint by reference.  */
378 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
379 {
380     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
381
382     tlb_flush_page(env, watchpoint->vaddr);
383
384     g_free(watchpoint);
385 }
386
387 /* Remove all matching watchpoints.  */
388 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
389 {
390     CPUWatchpoint *wp, *next;
391
392     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
393         if (wp->flags & mask)
394             cpu_watchpoint_remove_by_ref(env, wp);
395     }
396 }
397 #endif
398
399 /* Add a breakpoint.  */
400 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
401                           CPUBreakpoint **breakpoint)
402 {
403 #if defined(TARGET_HAS_ICE)
404     CPUBreakpoint *bp;
405
406     bp = g_malloc(sizeof(*bp));
407
408     bp->pc = pc;
409     bp->flags = flags;
410
411     /* keep all GDB-injected breakpoints in front */
412     if (flags & BP_GDB)
413         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
414     else
415         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
416
417     breakpoint_invalidate(env, pc);
418
419     if (breakpoint)
420         *breakpoint = bp;
421     return 0;
422 #else
423     return -ENOSYS;
424 #endif
425 }
426
427 /* Remove a specific breakpoint.  */
428 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
429 {
430 #if defined(TARGET_HAS_ICE)
431     CPUBreakpoint *bp;
432
433     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
434         if (bp->pc == pc && bp->flags == flags) {
435             cpu_breakpoint_remove_by_ref(env, bp);
436             return 0;
437         }
438     }
439     return -ENOENT;
440 #else
441     return -ENOSYS;
442 #endif
443 }
444
445 /* Remove a specific breakpoint by reference.  */
446 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
447 {
448 #if defined(TARGET_HAS_ICE)
449     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
450
451     breakpoint_invalidate(env, breakpoint->pc);
452
453     g_free(breakpoint);
454 #endif
455 }
456
457 /* Remove all matching breakpoints. */
458 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
459 {
460 #if defined(TARGET_HAS_ICE)
461     CPUBreakpoint *bp, *next;
462
463     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
464         if (bp->flags & mask)
465             cpu_breakpoint_remove_by_ref(env, bp);
466     }
467 #endif
468 }
469
470 /* enable or disable single step mode. EXCP_DEBUG is returned by the
471    CPU loop after each instruction */
472 void cpu_single_step(CPUArchState *env, int enabled)
473 {
474 #if defined(TARGET_HAS_ICE)
475     if (env->singlestep_enabled != enabled) {
476         env->singlestep_enabled = enabled;
477         if (kvm_enabled())
478             kvm_update_guest_debug(env, 0);
479         else {
480             /* must flush all the translated code to avoid inconsistencies */
481             /* XXX: only flush what is necessary */
482             tb_flush(env);
483         }
484     }
485 #endif
486 }
487
488 void cpu_reset_interrupt(CPUArchState *env, int mask)
489 {
490     env->interrupt_request &= ~mask;
491 }
492
493 void cpu_exit(CPUArchState *env)
494 {
495     env->exit_request = 1;
496     cpu_unlink_tb(env);
497 }
498
499 void cpu_abort(CPUArchState *env, const char *fmt, ...)
500 {
501     va_list ap;
502     va_list ap2;
503
504     va_start(ap, fmt);
505     va_copy(ap2, ap);
506     fprintf(stderr, "qemu: fatal: ");
507     vfprintf(stderr, fmt, ap);
508     fprintf(stderr, "\n");
509     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
510     if (qemu_log_enabled()) {
511         qemu_log("qemu: fatal: ");
512         qemu_log_vprintf(fmt, ap2);
513         qemu_log("\n");
514         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
515         qemu_log_flush();
516         qemu_log_close();
517     }
518     va_end(ap2);
519     va_end(ap);
520 #if defined(CONFIG_USER_ONLY)
521     {
522         struct sigaction act;
523         sigfillset(&act.sa_mask);
524         act.sa_handler = SIG_DFL;
525         sigaction(SIGABRT, &act, NULL);
526     }
527 #endif
528     abort();
529 }
530
531 CPUArchState *cpu_copy(CPUArchState *env)
532 {
533     CPUArchState *new_env = cpu_init(env->cpu_model_str);
534     CPUArchState *next_cpu = new_env->next_cpu;
535 #if defined(TARGET_HAS_ICE)
536     CPUBreakpoint *bp;
537     CPUWatchpoint *wp;
538 #endif
539
540     memcpy(new_env, env, sizeof(CPUArchState));
541
542     /* Preserve chaining. */
543     new_env->next_cpu = next_cpu;
544
545     /* Clone all break/watchpoints.
546        Note: Once we support ptrace with hw-debug register access, make sure
547        BP_CPU break/watchpoints are handled correctly on clone. */
548     QTAILQ_INIT(&env->breakpoints);
549     QTAILQ_INIT(&env->watchpoints);
550 #if defined(TARGET_HAS_ICE)
551     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
552         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
553     }
554     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
555         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
556                               wp->flags, NULL);
557     }
558 #endif
559
560     return new_env;
561 }
562
563 #if !defined(CONFIG_USER_ONLY)
564 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
565                                       uintptr_t length)
566 {
567     uintptr_t start1;
568
569     /* we modify the TLB cache so that the dirty bit will be set again
570        when accessing the range */
571     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
572     /* Check that we don't span multiple blocks - this breaks the
573        address comparisons below.  */
574     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
575             != (end - 1) - start) {
576         abort();
577     }
578     cpu_tlb_reset_dirty_all(start1, length);
579
580 }
581
582 /* Note: start and end must be within the same ram block.  */
583 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
584                                      int dirty_flags)
585 {
586     uintptr_t length;
587
588     start &= TARGET_PAGE_MASK;
589     end = TARGET_PAGE_ALIGN(end);
590
591     length = end - start;
592     if (length == 0)
593         return;
594     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
595
596     if (tcg_enabled()) {
597         tlb_reset_dirty_range_all(start, end, length);
598     }
599 }
600
601 static int cpu_physical_memory_set_dirty_tracking(int enable)
602 {
603     int ret = 0;
604     in_migration = enable;
605     return ret;
606 }
607
608 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
609                                                    MemoryRegionSection *section,
610                                                    target_ulong vaddr,
611                                                    hwaddr paddr,
612                                                    int prot,
613                                                    target_ulong *address)
614 {
615     hwaddr iotlb;
616     CPUWatchpoint *wp;
617
618     if (memory_region_is_ram(section->mr)) {
619         /* Normal RAM.  */
620         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
621             + memory_region_section_addr(section, paddr);
622         if (!section->readonly) {
623             iotlb |= phys_section_notdirty;
624         } else {
625             iotlb |= phys_section_rom;
626         }
627     } else {
628         /* IO handlers are currently passed a physical address.
629            It would be nice to pass an offset from the base address
630            of that region.  This would avoid having to special case RAM,
631            and avoid full address decoding in every device.
632            We can't use the high bits of pd for this because
633            IO_MEM_ROMD uses these as a ram address.  */
634         iotlb = section - phys_sections;
635         iotlb += memory_region_section_addr(section, paddr);
636     }
637
638     /* Make accesses to pages with watchpoints go via the
639        watchpoint trap routines.  */
640     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
641         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
642             /* Avoid trapping reads of pages with a write breakpoint. */
643             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
644                 iotlb = phys_section_watch + paddr;
645                 *address |= TLB_MMIO;
646                 break;
647             }
648         }
649     }
650
651     return iotlb;
652 }
653 #endif /* defined(CONFIG_USER_ONLY) */
654
655 #if !defined(CONFIG_USER_ONLY)
656
657 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
658 typedef struct subpage_t {
659     MemoryRegion iomem;
660     hwaddr base;
661     uint16_t sub_section[TARGET_PAGE_SIZE];
662 } subpage_t;
663
664 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
665                              uint16_t section);
666 static subpage_t *subpage_init(hwaddr base);
667 static void destroy_page_desc(uint16_t section_index)
668 {
669     MemoryRegionSection *section = &phys_sections[section_index];
670     MemoryRegion *mr = section->mr;
671
672     if (mr->subpage) {
673         subpage_t *subpage = container_of(mr, subpage_t, iomem);
674         memory_region_destroy(&subpage->iomem);
675         g_free(subpage);
676     }
677 }
678
679 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
680 {
681     unsigned i;
682     PhysPageEntry *p;
683
684     if (lp->ptr == PHYS_MAP_NODE_NIL) {
685         return;
686     }
687
688     p = phys_map_nodes[lp->ptr];
689     for (i = 0; i < L2_SIZE; ++i) {
690         if (!p[i].is_leaf) {
691             destroy_l2_mapping(&p[i], level - 1);
692         } else {
693             destroy_page_desc(p[i].ptr);
694         }
695     }
696     lp->is_leaf = 0;
697     lp->ptr = PHYS_MAP_NODE_NIL;
698 }
699
700 static void destroy_all_mappings(AddressSpaceDispatch *d)
701 {
702     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
703     phys_map_nodes_reset();
704 }
705
706 static uint16_t phys_section_add(MemoryRegionSection *section)
707 {
708     if (phys_sections_nb == phys_sections_nb_alloc) {
709         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
710         phys_sections = g_renew(MemoryRegionSection, phys_sections,
711                                 phys_sections_nb_alloc);
712     }
713     phys_sections[phys_sections_nb] = *section;
714     return phys_sections_nb++;
715 }
716
717 static void phys_sections_clear(void)
718 {
719     phys_sections_nb = 0;
720 }
721
722 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
723 {
724     subpage_t *subpage;
725     hwaddr base = section->offset_within_address_space
726         & TARGET_PAGE_MASK;
727     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
728     MemoryRegionSection subsection = {
729         .offset_within_address_space = base,
730         .size = TARGET_PAGE_SIZE,
731     };
732     hwaddr start, end;
733
734     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
735
736     if (!(existing->mr->subpage)) {
737         subpage = subpage_init(base);
738         subsection.mr = &subpage->iomem;
739         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
740                       phys_section_add(&subsection));
741     } else {
742         subpage = container_of(existing->mr, subpage_t, iomem);
743     }
744     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
745     end = start + section->size - 1;
746     subpage_register(subpage, start, end, phys_section_add(section));
747 }
748
749
750 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
751 {
752     hwaddr start_addr = section->offset_within_address_space;
753     ram_addr_t size = section->size;
754     hwaddr addr;
755     uint16_t section_index = phys_section_add(section);
756
757     assert(size);
758
759     addr = start_addr;
760     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
761                   section_index);
762 }
763
764 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
765 {
766     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
767     MemoryRegionSection now = *section, remain = *section;
768
769     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
770         || (now.size < TARGET_PAGE_SIZE)) {
771         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
772                        - now.offset_within_address_space,
773                        now.size);
774         register_subpage(d, &now);
775         remain.size -= now.size;
776         remain.offset_within_address_space += now.size;
777         remain.offset_within_region += now.size;
778     }
779     while (remain.size >= TARGET_PAGE_SIZE) {
780         now = remain;
781         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
782             now.size = TARGET_PAGE_SIZE;
783             register_subpage(d, &now);
784         } else {
785             now.size &= TARGET_PAGE_MASK;
786             register_multipage(d, &now);
787         }
788         remain.size -= now.size;
789         remain.offset_within_address_space += now.size;
790         remain.offset_within_region += now.size;
791     }
792     now = remain;
793     if (now.size) {
794         register_subpage(d, &now);
795     }
796 }
797
798 void qemu_flush_coalesced_mmio_buffer(void)
799 {
800     if (kvm_enabled())
801         kvm_flush_coalesced_mmio_buffer();
802 }
803
804 void qemu_mutex_lock_ramlist(void)
805 {
806     qemu_mutex_lock(&ram_list.mutex);
807 }
808
809 void qemu_mutex_unlock_ramlist(void)
810 {
811     qemu_mutex_unlock(&ram_list.mutex);
812 }
813
814 #if defined(__linux__) && !defined(TARGET_S390X)
815
816 #include <sys/vfs.h>
817
818 #define HUGETLBFS_MAGIC       0x958458f6
819
820 static long gethugepagesize(const char *path)
821 {
822     struct statfs fs;
823     int ret;
824
825     do {
826         ret = statfs(path, &fs);
827     } while (ret != 0 && errno == EINTR);
828
829     if (ret != 0) {
830         perror(path);
831         return 0;
832     }
833
834     if (fs.f_type != HUGETLBFS_MAGIC)
835         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
836
837     return fs.f_bsize;
838 }
839
840 static void *file_ram_alloc(RAMBlock *block,
841                             ram_addr_t memory,
842                             const char *path)
843 {
844     char *filename;
845     void *area;
846     int fd;
847 #ifdef MAP_POPULATE
848     int flags;
849 #endif
850     unsigned long hpagesize;
851
852     hpagesize = gethugepagesize(path);
853     if (!hpagesize) {
854         return NULL;
855     }
856
857     if (memory < hpagesize) {
858         return NULL;
859     }
860
861     if (kvm_enabled() && !kvm_has_sync_mmu()) {
862         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
863         return NULL;
864     }
865
866     filename = g_strdup_printf("%s/qemu_back_mem.XXXXXX", path);
867
868     fd = mkstemp(filename);
869     if (fd < 0) {
870         perror("unable to create backing store for hugepages");
871         g_free(filename);
872         return NULL;
873     }
874     unlink(filename);
875     g_free(filename);
876
877     memory = (memory+hpagesize-1) & ~(hpagesize-1);
878
879     /*
880      * ftruncate is not supported by hugetlbfs in older
881      * hosts, so don't bother bailing out on errors.
882      * If anything goes wrong with it under other filesystems,
883      * mmap will fail.
884      */
885     if (ftruncate(fd, memory))
886         perror("ftruncate");
887
888 #ifdef MAP_POPULATE
889     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
890      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
891      * to sidestep this quirk.
892      */
893     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
894     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
895 #else
896     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
897 #endif
898     if (area == MAP_FAILED) {
899         perror("file_ram_alloc: can't mmap RAM pages");
900         close(fd);
901         return (NULL);
902     }
903     block->fd = fd;
904     return area;
905 }
906 #endif
907
908 static ram_addr_t find_ram_offset(ram_addr_t size)
909 {
910     RAMBlock *block, *next_block;
911     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
912
913     if (QTAILQ_EMPTY(&ram_list.blocks))
914         return 0;
915
916     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
917         ram_addr_t end, next = RAM_ADDR_MAX;
918
919         end = block->offset + block->length;
920
921         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
922             if (next_block->offset >= end) {
923                 next = MIN(next, next_block->offset);
924             }
925         }
926         if (next - end >= size && next - end < mingap) {
927             offset = end;
928             mingap = next - end;
929         }
930     }
931
932     if (offset == RAM_ADDR_MAX) {
933         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
934                 (uint64_t)size);
935         abort();
936     }
937
938     return offset;
939 }
940
941 ram_addr_t last_ram_offset(void)
942 {
943     RAMBlock *block;
944     ram_addr_t last = 0;
945
946     QTAILQ_FOREACH(block, &ram_list.blocks, next)
947         last = MAX(last, block->offset + block->length);
948
949     return last;
950 }
951
952 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
953 {
954     int ret;
955     QemuOpts *machine_opts;
956
957     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
958     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
959     if (machine_opts &&
960         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
961         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
962         if (ret) {
963             perror("qemu_madvise");
964             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
965                             "but dump_guest_core=off specified\n");
966         }
967     }
968 }
969
970 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
971 {
972     RAMBlock *new_block, *block;
973
974     new_block = NULL;
975     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
976         if (block->offset == addr) {
977             new_block = block;
978             break;
979         }
980     }
981     assert(new_block);
982     assert(!new_block->idstr[0]);
983
984     if (dev) {
985         char *id = qdev_get_dev_path(dev);
986         if (id) {
987             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
988             g_free(id);
989         }
990     }
991     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
992
993     /* This assumes the iothread lock is taken here too.  */
994     qemu_mutex_lock_ramlist();
995     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
996         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
997             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
998                     new_block->idstr);
999             abort();
1000         }
1001     }
1002     qemu_mutex_unlock_ramlist();
1003 }
1004
1005 static int memory_try_enable_merging(void *addr, size_t len)
1006 {
1007     QemuOpts *opts;
1008
1009     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1010     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1011         /* disabled by the user */
1012         return 0;
1013     }
1014
1015     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1016 }
1017
1018 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1019                                    MemoryRegion *mr)
1020 {
1021     RAMBlock *block, *new_block;
1022
1023     size = TARGET_PAGE_ALIGN(size);
1024     new_block = g_malloc0(sizeof(*new_block));
1025
1026     /* This assumes the iothread lock is taken here too.  */
1027     qemu_mutex_lock_ramlist();
1028     new_block->mr = mr;
1029     new_block->offset = find_ram_offset(size);
1030     if (host) {
1031         new_block->host = host;
1032         new_block->flags |= RAM_PREALLOC_MASK;
1033     } else {
1034         if (mem_path) {
1035 #if defined (__linux__) && !defined(TARGET_S390X)
1036             new_block->host = file_ram_alloc(new_block, size, mem_path);
1037             if (!new_block->host) {
1038                 new_block->host = qemu_vmalloc(size);
1039                 memory_try_enable_merging(new_block->host, size);
1040             }
1041 #else
1042             fprintf(stderr, "-mem-path option unsupported\n");
1043             exit(1);
1044 #endif
1045         } else {
1046             if (xen_enabled()) {
1047                 xen_ram_alloc(new_block->offset, size, mr);
1048             } else if (kvm_enabled()) {
1049                 /* some s390/kvm configurations have special constraints */
1050                 new_block->host = kvm_vmalloc(size);
1051             } else {
1052                 new_block->host = qemu_vmalloc(size);
1053             }
1054             memory_try_enable_merging(new_block->host, size);
1055         }
1056     }
1057     new_block->length = size;
1058
1059     /* Keep the list sorted from biggest to smallest block.  */
1060     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1061         if (block->length < new_block->length) {
1062             break;
1063         }
1064     }
1065     if (block) {
1066         QTAILQ_INSERT_BEFORE(block, new_block, next);
1067     } else {
1068         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1069     }
1070     ram_list.mru_block = NULL;
1071
1072     ram_list.version++;
1073     qemu_mutex_unlock_ramlist();
1074
1075     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1076                                        last_ram_offset() >> TARGET_PAGE_BITS);
1077     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1078            0, size >> TARGET_PAGE_BITS);
1079     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1080
1081     qemu_ram_setup_dump(new_block->host, size);
1082     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1083
1084     if (kvm_enabled())
1085         kvm_setup_guest_memory(new_block->host, size);
1086
1087     return new_block->offset;
1088 }
1089
1090 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1091 {
1092     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1093 }
1094
1095 void qemu_ram_free_from_ptr(ram_addr_t addr)
1096 {
1097     RAMBlock *block;
1098
1099     /* This assumes the iothread lock is taken here too.  */
1100     qemu_mutex_lock_ramlist();
1101     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1102         if (addr == block->offset) {
1103             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1104             ram_list.mru_block = NULL;
1105             ram_list.version++;
1106             g_free(block);
1107             break;
1108         }
1109     }
1110     qemu_mutex_unlock_ramlist();
1111 }
1112
1113 void qemu_ram_free(ram_addr_t addr)
1114 {
1115     RAMBlock *block;
1116
1117     /* This assumes the iothread lock is taken here too.  */
1118     qemu_mutex_lock_ramlist();
1119     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1120         if (addr == block->offset) {
1121             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1122             ram_list.mru_block = NULL;
1123             ram_list.version++;
1124             if (block->flags & RAM_PREALLOC_MASK) {
1125                 ;
1126             } else if (mem_path) {
1127 #if defined (__linux__) && !defined(TARGET_S390X)
1128                 if (block->fd) {
1129                     munmap(block->host, block->length);
1130                     close(block->fd);
1131                 } else {
1132                     qemu_vfree(block->host);
1133                 }
1134 #else
1135                 abort();
1136 #endif
1137             } else {
1138 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1139                 munmap(block->host, block->length);
1140 #else
1141                 if (xen_enabled()) {
1142                     xen_invalidate_map_cache_entry(block->host);
1143                 } else {
1144                     qemu_vfree(block->host);
1145                 }
1146 #endif
1147             }
1148             g_free(block);
1149             break;
1150         }
1151     }
1152     qemu_mutex_unlock_ramlist();
1153
1154 }
1155
1156 #ifndef _WIN32
1157 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1158 {
1159     RAMBlock *block;
1160     ram_addr_t offset;
1161     int flags;
1162     void *area, *vaddr;
1163
1164     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1165         offset = addr - block->offset;
1166         if (offset < block->length) {
1167             vaddr = block->host + offset;
1168             if (block->flags & RAM_PREALLOC_MASK) {
1169                 ;
1170             } else {
1171                 flags = MAP_FIXED;
1172                 munmap(vaddr, length);
1173                 if (mem_path) {
1174 #if defined(__linux__) && !defined(TARGET_S390X)
1175                     if (block->fd) {
1176 #ifdef MAP_POPULATE
1177                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1178                             MAP_PRIVATE;
1179 #else
1180                         flags |= MAP_PRIVATE;
1181 #endif
1182                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1183                                     flags, block->fd, offset);
1184                     } else {
1185                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1186                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1187                                     flags, -1, 0);
1188                     }
1189 #else
1190                     abort();
1191 #endif
1192                 } else {
1193 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1194                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1195                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1196                                 flags, -1, 0);
1197 #else
1198                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1199                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1200                                 flags, -1, 0);
1201 #endif
1202                 }
1203                 if (area != vaddr) {
1204                     fprintf(stderr, "Could not remap addr: "
1205                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1206                             length, addr);
1207                     exit(1);
1208                 }
1209                 memory_try_enable_merging(vaddr, length);
1210                 qemu_ram_setup_dump(vaddr, length);
1211             }
1212             return;
1213         }
1214     }
1215 }
1216 #endif /* !_WIN32 */
1217
1218 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1219    With the exception of the softmmu code in this file, this should
1220    only be used for local memory (e.g. video ram) that the device owns,
1221    and knows it isn't going to access beyond the end of the block.
1222
1223    It should not be used for general purpose DMA.
1224    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1225  */
1226 void *qemu_get_ram_ptr(ram_addr_t addr)
1227 {
1228     RAMBlock *block;
1229
1230     /* The list is protected by the iothread lock here.  */
1231     block = ram_list.mru_block;
1232     if (block && addr - block->offset < block->length) {
1233         goto found;
1234     }
1235     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1236         if (addr - block->offset < block->length) {
1237             goto found;
1238         }
1239     }
1240
1241     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1242     abort();
1243
1244 found:
1245     ram_list.mru_block = block;
1246     if (xen_enabled()) {
1247         /* We need to check if the requested address is in the RAM
1248          * because we don't want to map the entire memory in QEMU.
1249          * In that case just map until the end of the page.
1250          */
1251         if (block->offset == 0) {
1252             return xen_map_cache(addr, 0, 0);
1253         } else if (block->host == NULL) {
1254             block->host =
1255                 xen_map_cache(block->offset, block->length, 1);
1256         }
1257     }
1258     return block->host + (addr - block->offset);
1259 }
1260
1261 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1262  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1263  *
1264  * ??? Is this still necessary?
1265  */
1266 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1267 {
1268     RAMBlock *block;
1269
1270     /* The list is protected by the iothread lock here.  */
1271     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1272         if (addr - block->offset < block->length) {
1273             if (xen_enabled()) {
1274                 /* We need to check if the requested address is in the RAM
1275                  * because we don't want to map the entire memory in QEMU.
1276                  * In that case just map until the end of the page.
1277                  */
1278                 if (block->offset == 0) {
1279                     return xen_map_cache(addr, 0, 0);
1280                 } else if (block->host == NULL) {
1281                     block->host =
1282                         xen_map_cache(block->offset, block->length, 1);
1283                 }
1284             }
1285             return block->host + (addr - block->offset);
1286         }
1287     }
1288
1289     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1290     abort();
1291
1292     return NULL;
1293 }
1294
1295 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1296  * but takes a size argument */
1297 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1298 {
1299     if (*size == 0) {
1300         return NULL;
1301     }
1302     if (xen_enabled()) {
1303         return xen_map_cache(addr, *size, 1);
1304     } else {
1305         RAMBlock *block;
1306
1307         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1308             if (addr - block->offset < block->length) {
1309                 if (addr - block->offset + *size > block->length)
1310                     *size = block->length - addr + block->offset;
1311                 return block->host + (addr - block->offset);
1312             }
1313         }
1314
1315         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1316         abort();
1317     }
1318 }
1319
1320 void qemu_put_ram_ptr(void *addr)
1321 {
1322     trace_qemu_put_ram_ptr(addr);
1323 }
1324
1325 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1326 {
1327     RAMBlock *block;
1328     uint8_t *host = ptr;
1329
1330     if (xen_enabled()) {
1331         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1332         return 0;
1333     }
1334
1335     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1336         /* This case append when the block is not mapped. */
1337         if (block->host == NULL) {
1338             continue;
1339         }
1340         if (host - block->host < block->length) {
1341             *ram_addr = block->offset + (host - block->host);
1342             return 0;
1343         }
1344     }
1345
1346     return -1;
1347 }
1348
1349 /* Some of the softmmu routines need to translate from a host pointer
1350    (typically a TLB entry) back to a ram offset.  */
1351 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1352 {
1353     ram_addr_t ram_addr;
1354
1355     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1356         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1357         abort();
1358     }
1359     return ram_addr;
1360 }
1361
1362 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1363                                     unsigned size)
1364 {
1365 #ifdef DEBUG_UNASSIGNED
1366     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1367 #endif
1368 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1369     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1370 #endif
1371     return 0;
1372 }
1373
1374 static void unassigned_mem_write(void *opaque, hwaddr addr,
1375                                  uint64_t val, unsigned size)
1376 {
1377 #ifdef DEBUG_UNASSIGNED
1378     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1379 #endif
1380 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1381     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1382 #endif
1383 }
1384
1385 static const MemoryRegionOps unassigned_mem_ops = {
1386     .read = unassigned_mem_read,
1387     .write = unassigned_mem_write,
1388     .endianness = DEVICE_NATIVE_ENDIAN,
1389 };
1390
1391 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1392                                unsigned size)
1393 {
1394     abort();
1395 }
1396
1397 static void error_mem_write(void *opaque, hwaddr addr,
1398                             uint64_t value, unsigned size)
1399 {
1400     abort();
1401 }
1402
1403 static const MemoryRegionOps error_mem_ops = {
1404     .read = error_mem_read,
1405     .write = error_mem_write,
1406     .endianness = DEVICE_NATIVE_ENDIAN,
1407 };
1408
1409 static const MemoryRegionOps rom_mem_ops = {
1410     .read = error_mem_read,
1411     .write = unassigned_mem_write,
1412     .endianness = DEVICE_NATIVE_ENDIAN,
1413 };
1414
1415 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1416                                uint64_t val, unsigned size)
1417 {
1418     int dirty_flags;
1419     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1420     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1421 #if !defined(CONFIG_USER_ONLY)
1422         tb_invalidate_phys_page_fast(ram_addr, size);
1423         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1424 #endif
1425     }
1426     switch (size) {
1427     case 1:
1428         stb_p(qemu_get_ram_ptr(ram_addr), val);
1429         break;
1430     case 2:
1431         stw_p(qemu_get_ram_ptr(ram_addr), val);
1432         break;
1433     case 4:
1434         stl_p(qemu_get_ram_ptr(ram_addr), val);
1435         break;
1436     default:
1437         abort();
1438     }
1439     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1440     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1441     /* we remove the notdirty callback only if the code has been
1442        flushed */
1443     if (dirty_flags == 0xff)
1444         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1445 }
1446
1447 static const MemoryRegionOps notdirty_mem_ops = {
1448     .read = error_mem_read,
1449     .write = notdirty_mem_write,
1450     .endianness = DEVICE_NATIVE_ENDIAN,
1451 };
1452
1453 /* Generate a debug exception if a watchpoint has been hit.  */
1454 static void check_watchpoint(int offset, int len_mask, int flags)
1455 {
1456     CPUArchState *env = cpu_single_env;
1457     target_ulong pc, cs_base;
1458     target_ulong vaddr;
1459     CPUWatchpoint *wp;
1460     int cpu_flags;
1461
1462     if (env->watchpoint_hit) {
1463         /* We re-entered the check after replacing the TB. Now raise
1464          * the debug interrupt so that is will trigger after the
1465          * current instruction. */
1466         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1467         return;
1468     }
1469     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1470     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1471         if ((vaddr == (wp->vaddr & len_mask) ||
1472              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1473             wp->flags |= BP_WATCHPOINT_HIT;
1474             if (!env->watchpoint_hit) {
1475                 env->watchpoint_hit = wp;
1476                 tb_check_watchpoint(env);
1477                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1478                     env->exception_index = EXCP_DEBUG;
1479                     cpu_loop_exit(env);
1480                 } else {
1481                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1482                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1483                     cpu_resume_from_signal(env, NULL);
1484                 }
1485             }
1486         } else {
1487             wp->flags &= ~BP_WATCHPOINT_HIT;
1488         }
1489     }
1490 }
1491
1492 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1493    so these check for a hit then pass through to the normal out-of-line
1494    phys routines.  */
1495 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1496                                unsigned size)
1497 {
1498     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1499     switch (size) {
1500     case 1: return ldub_phys(addr);
1501     case 2: return lduw_phys(addr);
1502     case 4: return ldl_phys(addr);
1503     default: abort();
1504     }
1505 }
1506
1507 static void watch_mem_write(void *opaque, hwaddr addr,
1508                             uint64_t val, unsigned size)
1509 {
1510     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1511     switch (size) {
1512     case 1:
1513         stb_phys(addr, val);
1514         break;
1515     case 2:
1516         stw_phys(addr, val);
1517         break;
1518     case 4:
1519         stl_phys(addr, val);
1520         break;
1521     default: abort();
1522     }
1523 }
1524
1525 static const MemoryRegionOps watch_mem_ops = {
1526     .read = watch_mem_read,
1527     .write = watch_mem_write,
1528     .endianness = DEVICE_NATIVE_ENDIAN,
1529 };
1530
1531 static uint64_t subpage_read(void *opaque, hwaddr addr,
1532                              unsigned len)
1533 {
1534     subpage_t *mmio = opaque;
1535     unsigned int idx = SUBPAGE_IDX(addr);
1536     MemoryRegionSection *section;
1537 #if defined(DEBUG_SUBPAGE)
1538     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1539            mmio, len, addr, idx);
1540 #endif
1541
1542     section = &phys_sections[mmio->sub_section[idx]];
1543     addr += mmio->base;
1544     addr -= section->offset_within_address_space;
1545     addr += section->offset_within_region;
1546     return io_mem_read(section->mr, addr, len);
1547 }
1548
1549 static void subpage_write(void *opaque, hwaddr addr,
1550                           uint64_t value, unsigned len)
1551 {
1552     subpage_t *mmio = opaque;
1553     unsigned int idx = SUBPAGE_IDX(addr);
1554     MemoryRegionSection *section;
1555 #if defined(DEBUG_SUBPAGE)
1556     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1557            " idx %d value %"PRIx64"\n",
1558            __func__, mmio, len, addr, idx, value);
1559 #endif
1560
1561     section = &phys_sections[mmio->sub_section[idx]];
1562     addr += mmio->base;
1563     addr -= section->offset_within_address_space;
1564     addr += section->offset_within_region;
1565     io_mem_write(section->mr, addr, value, len);
1566 }
1567
1568 static const MemoryRegionOps subpage_ops = {
1569     .read = subpage_read,
1570     .write = subpage_write,
1571     .endianness = DEVICE_NATIVE_ENDIAN,
1572 };
1573
1574 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1575                                  unsigned size)
1576 {
1577     ram_addr_t raddr = addr;
1578     void *ptr = qemu_get_ram_ptr(raddr);
1579     switch (size) {
1580     case 1: return ldub_p(ptr);
1581     case 2: return lduw_p(ptr);
1582     case 4: return ldl_p(ptr);
1583     default: abort();
1584     }
1585 }
1586
1587 static void subpage_ram_write(void *opaque, hwaddr addr,
1588                               uint64_t value, unsigned size)
1589 {
1590     ram_addr_t raddr = addr;
1591     void *ptr = qemu_get_ram_ptr(raddr);
1592     switch (size) {
1593     case 1: return stb_p(ptr, value);
1594     case 2: return stw_p(ptr, value);
1595     case 4: return stl_p(ptr, value);
1596     default: abort();
1597     }
1598 }
1599
1600 static const MemoryRegionOps subpage_ram_ops = {
1601     .read = subpage_ram_read,
1602     .write = subpage_ram_write,
1603     .endianness = DEVICE_NATIVE_ENDIAN,
1604 };
1605
1606 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1607                              uint16_t section)
1608 {
1609     int idx, eidx;
1610
1611     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1612         return -1;
1613     idx = SUBPAGE_IDX(start);
1614     eidx = SUBPAGE_IDX(end);
1615 #if defined(DEBUG_SUBPAGE)
1616     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1617            mmio, start, end, idx, eidx, memory);
1618 #endif
1619     if (memory_region_is_ram(phys_sections[section].mr)) {
1620         MemoryRegionSection new_section = phys_sections[section];
1621         new_section.mr = &io_mem_subpage_ram;
1622         section = phys_section_add(&new_section);
1623     }
1624     for (; idx <= eidx; idx++) {
1625         mmio->sub_section[idx] = section;
1626     }
1627
1628     return 0;
1629 }
1630
1631 static subpage_t *subpage_init(hwaddr base)
1632 {
1633     subpage_t *mmio;
1634
1635     mmio = g_malloc0(sizeof(subpage_t));
1636
1637     mmio->base = base;
1638     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1639                           "subpage", TARGET_PAGE_SIZE);
1640     mmio->iomem.subpage = true;
1641 #if defined(DEBUG_SUBPAGE)
1642     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1643            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1644 #endif
1645     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1646
1647     return mmio;
1648 }
1649
1650 static uint16_t dummy_section(MemoryRegion *mr)
1651 {
1652     MemoryRegionSection section = {
1653         .mr = mr,
1654         .offset_within_address_space = 0,
1655         .offset_within_region = 0,
1656         .size = UINT64_MAX,
1657     };
1658
1659     return phys_section_add(&section);
1660 }
1661
1662 MemoryRegion *iotlb_to_region(hwaddr index)
1663 {
1664     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1665 }
1666
1667 static void io_mem_init(void)
1668 {
1669     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1670     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1671     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1672                           "unassigned", UINT64_MAX);
1673     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1674                           "notdirty", UINT64_MAX);
1675     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1676                           "subpage-ram", UINT64_MAX);
1677     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1678                           "watch", UINT64_MAX);
1679 }
1680
1681 static void mem_begin(MemoryListener *listener)
1682 {
1683     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1684
1685     destroy_all_mappings(d);
1686     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1687 }
1688
1689 static void core_begin(MemoryListener *listener)
1690 {
1691     phys_sections_clear();
1692     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1693     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1694     phys_section_rom = dummy_section(&io_mem_rom);
1695     phys_section_watch = dummy_section(&io_mem_watch);
1696 }
1697
1698 static void tcg_commit(MemoryListener *listener)
1699 {
1700     CPUArchState *env;
1701
1702     /* since each CPU stores ram addresses in its TLB cache, we must
1703        reset the modified entries */
1704     /* XXX: slow ! */
1705     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1706         tlb_flush(env, 1);
1707     }
1708 }
1709
1710 static void core_log_global_start(MemoryListener *listener)
1711 {
1712     cpu_physical_memory_set_dirty_tracking(1);
1713 }
1714
1715 static void core_log_global_stop(MemoryListener *listener)
1716 {
1717     cpu_physical_memory_set_dirty_tracking(0);
1718 }
1719
1720 static void io_region_add(MemoryListener *listener,
1721                           MemoryRegionSection *section)
1722 {
1723     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1724
1725     mrio->mr = section->mr;
1726     mrio->offset = section->offset_within_region;
1727     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1728                  section->offset_within_address_space, section->size);
1729     ioport_register(&mrio->iorange);
1730 }
1731
1732 static void io_region_del(MemoryListener *listener,
1733                           MemoryRegionSection *section)
1734 {
1735     isa_unassign_ioport(section->offset_within_address_space, section->size);
1736 }
1737
1738 static MemoryListener core_memory_listener = {
1739     .begin = core_begin,
1740     .log_global_start = core_log_global_start,
1741     .log_global_stop = core_log_global_stop,
1742     .priority = 1,
1743 };
1744
1745 static MemoryListener io_memory_listener = {
1746     .region_add = io_region_add,
1747     .region_del = io_region_del,
1748     .priority = 0,
1749 };
1750
1751 static MemoryListener tcg_memory_listener = {
1752     .commit = tcg_commit,
1753 };
1754
1755 void address_space_init_dispatch(AddressSpace *as)
1756 {
1757     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1758
1759     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1760     d->listener = (MemoryListener) {
1761         .begin = mem_begin,
1762         .region_add = mem_add,
1763         .region_nop = mem_add,
1764         .priority = 0,
1765     };
1766     as->dispatch = d;
1767     memory_listener_register(&d->listener, as);
1768 }
1769
1770 void address_space_destroy_dispatch(AddressSpace *as)
1771 {
1772     AddressSpaceDispatch *d = as->dispatch;
1773
1774     memory_listener_unregister(&d->listener);
1775     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1776     g_free(d);
1777     as->dispatch = NULL;
1778 }
1779
1780 static void memory_map_init(void)
1781 {
1782     system_memory = g_malloc(sizeof(*system_memory));
1783     memory_region_init(system_memory, "system", INT64_MAX);
1784     address_space_init(&address_space_memory, system_memory);
1785     address_space_memory.name = "memory";
1786
1787     system_io = g_malloc(sizeof(*system_io));
1788     memory_region_init(system_io, "io", 65536);
1789     address_space_init(&address_space_io, system_io);
1790     address_space_io.name = "I/O";
1791
1792     memory_listener_register(&core_memory_listener, &address_space_memory);
1793     memory_listener_register(&io_memory_listener, &address_space_io);
1794     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1795
1796     dma_context_init(&dma_context_memory, &address_space_memory,
1797                      NULL, NULL, NULL);
1798 }
1799
1800 MemoryRegion *get_system_memory(void)
1801 {
1802     return system_memory;
1803 }
1804
1805 MemoryRegion *get_system_io(void)
1806 {
1807     return system_io;
1808 }
1809
1810 #endif /* !defined(CONFIG_USER_ONLY) */
1811
1812 /* physical memory access (slow version, mainly for debug) */
1813 #if defined(CONFIG_USER_ONLY)
1814 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1815                         uint8_t *buf, int len, int is_write)
1816 {
1817     int l, flags;
1818     target_ulong page;
1819     void * p;
1820
1821     while (len > 0) {
1822         page = addr & TARGET_PAGE_MASK;
1823         l = (page + TARGET_PAGE_SIZE) - addr;
1824         if (l > len)
1825             l = len;
1826         flags = page_get_flags(page);
1827         if (!(flags & PAGE_VALID))
1828             return -1;
1829         if (is_write) {
1830             if (!(flags & PAGE_WRITE))
1831                 return -1;
1832             /* XXX: this code should not depend on lock_user */
1833             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1834                 return -1;
1835             memcpy(p, buf, l);
1836             unlock_user(p, addr, l);
1837         } else {
1838             if (!(flags & PAGE_READ))
1839                 return -1;
1840             /* XXX: this code should not depend on lock_user */
1841             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1842                 return -1;
1843             memcpy(buf, p, l);
1844             unlock_user(p, addr, 0);
1845         }
1846         len -= l;
1847         buf += l;
1848         addr += l;
1849     }
1850     return 0;
1851 }
1852
1853 #else
1854
1855 static void invalidate_and_set_dirty(hwaddr addr,
1856                                      hwaddr length)
1857 {
1858     if (!cpu_physical_memory_is_dirty(addr)) {
1859         /* invalidate code */
1860         tb_invalidate_phys_page_range(addr, addr + length, 0);
1861         /* set dirty bit */
1862         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1863     }
1864     xen_modified_memory(addr, length);
1865 }
1866
1867 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1868                       int len, bool is_write)
1869 {
1870     AddressSpaceDispatch *d = as->dispatch;
1871     int l;
1872     uint8_t *ptr;
1873     uint32_t val;
1874     hwaddr page;
1875     MemoryRegionSection *section;
1876
1877     while (len > 0) {
1878         page = addr & TARGET_PAGE_MASK;
1879         l = (page + TARGET_PAGE_SIZE) - addr;
1880         if (l > len)
1881             l = len;
1882         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1883
1884         if (is_write) {
1885             if (!memory_region_is_ram(section->mr)) {
1886                 hwaddr addr1;
1887                 addr1 = memory_region_section_addr(section, addr);
1888                 /* XXX: could force cpu_single_env to NULL to avoid
1889                    potential bugs */
1890                 if (l >= 4 && ((addr1 & 3) == 0)) {
1891                     /* 32 bit write access */
1892                     val = ldl_p(buf);
1893                     io_mem_write(section->mr, addr1, val, 4);
1894                     l = 4;
1895                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1896                     /* 16 bit write access */
1897                     val = lduw_p(buf);
1898                     io_mem_write(section->mr, addr1, val, 2);
1899                     l = 2;
1900                 } else {
1901                     /* 8 bit write access */
1902                     val = ldub_p(buf);
1903                     io_mem_write(section->mr, addr1, val, 1);
1904                     l = 1;
1905                 }
1906             } else if (!section->readonly) {
1907                 ram_addr_t addr1;
1908                 addr1 = memory_region_get_ram_addr(section->mr)
1909                     + memory_region_section_addr(section, addr);
1910                 /* RAM case */
1911                 ptr = qemu_get_ram_ptr(addr1);
1912                 memcpy(ptr, buf, l);
1913                 invalidate_and_set_dirty(addr1, l);
1914                 qemu_put_ram_ptr(ptr);
1915             }
1916         } else {
1917             if (!(memory_region_is_ram(section->mr) ||
1918                   memory_region_is_romd(section->mr))) {
1919                 hwaddr addr1;
1920                 /* I/O case */
1921                 addr1 = memory_region_section_addr(section, addr);
1922                 if (l >= 4 && ((addr1 & 3) == 0)) {
1923                     /* 32 bit read access */
1924                     val = io_mem_read(section->mr, addr1, 4);
1925                     stl_p(buf, val);
1926                     l = 4;
1927                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1928                     /* 16 bit read access */
1929                     val = io_mem_read(section->mr, addr1, 2);
1930                     stw_p(buf, val);
1931                     l = 2;
1932                 } else {
1933                     /* 8 bit read access */
1934                     val = io_mem_read(section->mr, addr1, 1);
1935                     stb_p(buf, val);
1936                     l = 1;
1937                 }
1938             } else {
1939                 /* RAM case */
1940                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1941                                        + memory_region_section_addr(section,
1942                                                                     addr));
1943                 memcpy(buf, ptr, l);
1944                 qemu_put_ram_ptr(ptr);
1945             }
1946         }
1947         len -= l;
1948         buf += l;
1949         addr += l;
1950     }
1951 }
1952
1953 void address_space_write(AddressSpace *as, hwaddr addr,
1954                          const uint8_t *buf, int len)
1955 {
1956     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1957 }
1958
1959 /**
1960  * address_space_read: read from an address space.
1961  *
1962  * @as: #AddressSpace to be accessed
1963  * @addr: address within that address space
1964  * @buf: buffer with the data transferred
1965  */
1966 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1967 {
1968     address_space_rw(as, addr, buf, len, false);
1969 }
1970
1971
1972 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1973                             int len, int is_write)
1974 {
1975     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1976 }
1977
1978 /* used for ROM loading : can write in RAM and ROM */
1979 void cpu_physical_memory_write_rom(hwaddr addr,
1980                                    const uint8_t *buf, int len)
1981 {
1982     AddressSpaceDispatch *d = address_space_memory.dispatch;
1983     int l;
1984     uint8_t *ptr;
1985     hwaddr page;
1986     MemoryRegionSection *section;
1987
1988     while (len > 0) {
1989         page = addr & TARGET_PAGE_MASK;
1990         l = (page + TARGET_PAGE_SIZE) - addr;
1991         if (l > len)
1992             l = len;
1993         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1994
1995         if (!(memory_region_is_ram(section->mr) ||
1996               memory_region_is_romd(section->mr))) {
1997             /* do nothing */
1998         } else {
1999             unsigned long addr1;
2000             addr1 = memory_region_get_ram_addr(section->mr)
2001                 + memory_region_section_addr(section, addr);
2002             /* ROM/RAM case */
2003             ptr = qemu_get_ram_ptr(addr1);
2004             memcpy(ptr, buf, l);
2005             invalidate_and_set_dirty(addr1, l);
2006             qemu_put_ram_ptr(ptr);
2007         }
2008         len -= l;
2009         buf += l;
2010         addr += l;
2011     }
2012 }
2013
2014 typedef struct {
2015     void *buffer;
2016     hwaddr addr;
2017     hwaddr len;
2018 } BounceBuffer;
2019
2020 static BounceBuffer bounce;
2021
2022 typedef struct MapClient {
2023     void *opaque;
2024     void (*callback)(void *opaque);
2025     QLIST_ENTRY(MapClient) link;
2026 } MapClient;
2027
2028 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2029     = QLIST_HEAD_INITIALIZER(map_client_list);
2030
2031 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2032 {
2033     MapClient *client = g_malloc(sizeof(*client));
2034
2035     client->opaque = opaque;
2036     client->callback = callback;
2037     QLIST_INSERT_HEAD(&map_client_list, client, link);
2038     return client;
2039 }
2040
2041 static void cpu_unregister_map_client(void *_client)
2042 {
2043     MapClient *client = (MapClient *)_client;
2044
2045     QLIST_REMOVE(client, link);
2046     g_free(client);
2047 }
2048
2049 static void cpu_notify_map_clients(void)
2050 {
2051     MapClient *client;
2052
2053     while (!QLIST_EMPTY(&map_client_list)) {
2054         client = QLIST_FIRST(&map_client_list);
2055         client->callback(client->opaque);
2056         cpu_unregister_map_client(client);
2057     }
2058 }
2059
2060 /* Map a physical memory region into a host virtual address.
2061  * May map a subset of the requested range, given by and returned in *plen.
2062  * May return NULL if resources needed to perform the mapping are exhausted.
2063  * Use only for reads OR writes - not for read-modify-write operations.
2064  * Use cpu_register_map_client() to know when retrying the map operation is
2065  * likely to succeed.
2066  */
2067 void *address_space_map(AddressSpace *as,
2068                         hwaddr addr,
2069                         hwaddr *plen,
2070                         bool is_write)
2071 {
2072     AddressSpaceDispatch *d = as->dispatch;
2073     hwaddr len = *plen;
2074     hwaddr todo = 0;
2075     int l;
2076     hwaddr page;
2077     MemoryRegionSection *section;
2078     ram_addr_t raddr = RAM_ADDR_MAX;
2079     ram_addr_t rlen;
2080     void *ret;
2081
2082     while (len > 0) {
2083         page = addr & TARGET_PAGE_MASK;
2084         l = (page + TARGET_PAGE_SIZE) - addr;
2085         if (l > len)
2086             l = len;
2087         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2088
2089         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2090             if (todo || bounce.buffer) {
2091                 break;
2092             }
2093             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2094             bounce.addr = addr;
2095             bounce.len = l;
2096             if (!is_write) {
2097                 address_space_read(as, addr, bounce.buffer, l);
2098             }
2099
2100             *plen = l;
2101             return bounce.buffer;
2102         }
2103         if (!todo) {
2104             raddr = memory_region_get_ram_addr(section->mr)
2105                 + memory_region_section_addr(section, addr);
2106         }
2107
2108         len -= l;
2109         addr += l;
2110         todo += l;
2111     }
2112     rlen = todo;
2113     ret = qemu_ram_ptr_length(raddr, &rlen);
2114     *plen = rlen;
2115     return ret;
2116 }
2117
2118 /* Unmaps a memory region previously mapped by address_space_map().
2119  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2120  * the amount of memory that was actually read or written by the caller.
2121  */
2122 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2123                          int is_write, hwaddr access_len)
2124 {
2125     if (buffer != bounce.buffer) {
2126         if (is_write) {
2127             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2128             while (access_len) {
2129                 unsigned l;
2130                 l = TARGET_PAGE_SIZE;
2131                 if (l > access_len)
2132                     l = access_len;
2133                 invalidate_and_set_dirty(addr1, l);
2134                 addr1 += l;
2135                 access_len -= l;
2136             }
2137         }
2138         if (xen_enabled()) {
2139             xen_invalidate_map_cache_entry(buffer);
2140         }
2141         return;
2142     }
2143     if (is_write) {
2144         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2145     }
2146     qemu_vfree(bounce.buffer);
2147     bounce.buffer = NULL;
2148     cpu_notify_map_clients();
2149 }
2150
2151 void *cpu_physical_memory_map(hwaddr addr,
2152                               hwaddr *plen,
2153                               int is_write)
2154 {
2155     return address_space_map(&address_space_memory, addr, plen, is_write);
2156 }
2157
2158 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2159                                int is_write, hwaddr access_len)
2160 {
2161     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2162 }
2163
2164 /* warning: addr must be aligned */
2165 static inline uint32_t ldl_phys_internal(hwaddr addr,
2166                                          enum device_endian endian)
2167 {
2168     uint8_t *ptr;
2169     uint32_t val;
2170     MemoryRegionSection *section;
2171
2172     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2173
2174     if (!(memory_region_is_ram(section->mr) ||
2175           memory_region_is_romd(section->mr))) {
2176         /* I/O case */
2177         addr = memory_region_section_addr(section, addr);
2178         val = io_mem_read(section->mr, addr, 4);
2179 #if defined(TARGET_WORDS_BIGENDIAN)
2180         if (endian == DEVICE_LITTLE_ENDIAN) {
2181             val = bswap32(val);
2182         }
2183 #else
2184         if (endian == DEVICE_BIG_ENDIAN) {
2185             val = bswap32(val);
2186         }
2187 #endif
2188     } else {
2189         /* RAM case */
2190         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2191                                 & TARGET_PAGE_MASK)
2192                                + memory_region_section_addr(section, addr));
2193         switch (endian) {
2194         case DEVICE_LITTLE_ENDIAN:
2195             val = ldl_le_p(ptr);
2196             break;
2197         case DEVICE_BIG_ENDIAN:
2198             val = ldl_be_p(ptr);
2199             break;
2200         default:
2201             val = ldl_p(ptr);
2202             break;
2203         }
2204     }
2205     return val;
2206 }
2207
2208 uint32_t ldl_phys(hwaddr addr)
2209 {
2210     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2211 }
2212
2213 uint32_t ldl_le_phys(hwaddr addr)
2214 {
2215     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2216 }
2217
2218 uint32_t ldl_be_phys(hwaddr addr)
2219 {
2220     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2221 }
2222
2223 /* warning: addr must be aligned */
2224 static inline uint64_t ldq_phys_internal(hwaddr addr,
2225                                          enum device_endian endian)
2226 {
2227     uint8_t *ptr;
2228     uint64_t val;
2229     MemoryRegionSection *section;
2230
2231     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2232
2233     if (!(memory_region_is_ram(section->mr) ||
2234           memory_region_is_romd(section->mr))) {
2235         /* I/O case */
2236         addr = memory_region_section_addr(section, addr);
2237
2238         /* XXX This is broken when device endian != cpu endian.
2239                Fix and add "endian" variable check */
2240 #ifdef TARGET_WORDS_BIGENDIAN
2241         val = io_mem_read(section->mr, addr, 4) << 32;
2242         val |= io_mem_read(section->mr, addr + 4, 4);
2243 #else
2244         val = io_mem_read(section->mr, addr, 4);
2245         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2246 #endif
2247     } else {
2248         /* RAM case */
2249         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2250                                 & TARGET_PAGE_MASK)
2251                                + memory_region_section_addr(section, addr));
2252         switch (endian) {
2253         case DEVICE_LITTLE_ENDIAN:
2254             val = ldq_le_p(ptr);
2255             break;
2256         case DEVICE_BIG_ENDIAN:
2257             val = ldq_be_p(ptr);
2258             break;
2259         default:
2260             val = ldq_p(ptr);
2261             break;
2262         }
2263     }
2264     return val;
2265 }
2266
2267 uint64_t ldq_phys(hwaddr addr)
2268 {
2269     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2270 }
2271
2272 uint64_t ldq_le_phys(hwaddr addr)
2273 {
2274     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2275 }
2276
2277 uint64_t ldq_be_phys(hwaddr addr)
2278 {
2279     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2280 }
2281
2282 /* XXX: optimize */
2283 uint32_t ldub_phys(hwaddr addr)
2284 {
2285     uint8_t val;
2286     cpu_physical_memory_read(addr, &val, 1);
2287     return val;
2288 }
2289
2290 /* warning: addr must be aligned */
2291 static inline uint32_t lduw_phys_internal(hwaddr addr,
2292                                           enum device_endian endian)
2293 {
2294     uint8_t *ptr;
2295     uint64_t val;
2296     MemoryRegionSection *section;
2297
2298     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2299
2300     if (!(memory_region_is_ram(section->mr) ||
2301           memory_region_is_romd(section->mr))) {
2302         /* I/O case */
2303         addr = memory_region_section_addr(section, addr);
2304         val = io_mem_read(section->mr, addr, 2);
2305 #if defined(TARGET_WORDS_BIGENDIAN)
2306         if (endian == DEVICE_LITTLE_ENDIAN) {
2307             val = bswap16(val);
2308         }
2309 #else
2310         if (endian == DEVICE_BIG_ENDIAN) {
2311             val = bswap16(val);
2312         }
2313 #endif
2314     } else {
2315         /* RAM case */
2316         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2317                                 & TARGET_PAGE_MASK)
2318                                + memory_region_section_addr(section, addr));
2319         switch (endian) {
2320         case DEVICE_LITTLE_ENDIAN:
2321             val = lduw_le_p(ptr);
2322             break;
2323         case DEVICE_BIG_ENDIAN:
2324             val = lduw_be_p(ptr);
2325             break;
2326         default:
2327             val = lduw_p(ptr);
2328             break;
2329         }
2330     }
2331     return val;
2332 }
2333
2334 uint32_t lduw_phys(hwaddr addr)
2335 {
2336     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2337 }
2338
2339 uint32_t lduw_le_phys(hwaddr addr)
2340 {
2341     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2342 }
2343
2344 uint32_t lduw_be_phys(hwaddr addr)
2345 {
2346     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2347 }
2348
2349 /* warning: addr must be aligned. The ram page is not masked as dirty
2350    and the code inside is not invalidated. It is useful if the dirty
2351    bits are used to track modified PTEs */
2352 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2353 {
2354     uint8_t *ptr;
2355     MemoryRegionSection *section;
2356
2357     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2358
2359     if (!memory_region_is_ram(section->mr) || section->readonly) {
2360         addr = memory_region_section_addr(section, addr);
2361         if (memory_region_is_ram(section->mr)) {
2362             section = &phys_sections[phys_section_rom];
2363         }
2364         io_mem_write(section->mr, addr, val, 4);
2365     } else {
2366         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2367                                & TARGET_PAGE_MASK)
2368             + memory_region_section_addr(section, addr);
2369         ptr = qemu_get_ram_ptr(addr1);
2370         stl_p(ptr, val);
2371
2372         if (unlikely(in_migration)) {
2373             if (!cpu_physical_memory_is_dirty(addr1)) {
2374                 /* invalidate code */
2375                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2376                 /* set dirty bit */
2377                 cpu_physical_memory_set_dirty_flags(
2378                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2379             }
2380         }
2381     }
2382 }
2383
2384 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2385 {
2386     uint8_t *ptr;
2387     MemoryRegionSection *section;
2388
2389     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2390
2391     if (!memory_region_is_ram(section->mr) || section->readonly) {
2392         addr = memory_region_section_addr(section, addr);
2393         if (memory_region_is_ram(section->mr)) {
2394             section = &phys_sections[phys_section_rom];
2395         }
2396 #ifdef TARGET_WORDS_BIGENDIAN
2397         io_mem_write(section->mr, addr, val >> 32, 4);
2398         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2399 #else
2400         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2401         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2402 #endif
2403     } else {
2404         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2405                                 & TARGET_PAGE_MASK)
2406                                + memory_region_section_addr(section, addr));
2407         stq_p(ptr, val);
2408     }
2409 }
2410
2411 /* warning: addr must be aligned */
2412 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2413                                      enum device_endian endian)
2414 {
2415     uint8_t *ptr;
2416     MemoryRegionSection *section;
2417
2418     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2419
2420     if (!memory_region_is_ram(section->mr) || section->readonly) {
2421         addr = memory_region_section_addr(section, addr);
2422         if (memory_region_is_ram(section->mr)) {
2423             section = &phys_sections[phys_section_rom];
2424         }
2425 #if defined(TARGET_WORDS_BIGENDIAN)
2426         if (endian == DEVICE_LITTLE_ENDIAN) {
2427             val = bswap32(val);
2428         }
2429 #else
2430         if (endian == DEVICE_BIG_ENDIAN) {
2431             val = bswap32(val);
2432         }
2433 #endif
2434         io_mem_write(section->mr, addr, val, 4);
2435     } else {
2436         unsigned long addr1;
2437         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2438             + memory_region_section_addr(section, addr);
2439         /* RAM case */
2440         ptr = qemu_get_ram_ptr(addr1);
2441         switch (endian) {
2442         case DEVICE_LITTLE_ENDIAN:
2443             stl_le_p(ptr, val);
2444             break;
2445         case DEVICE_BIG_ENDIAN:
2446             stl_be_p(ptr, val);
2447             break;
2448         default:
2449             stl_p(ptr, val);
2450             break;
2451         }
2452         invalidate_and_set_dirty(addr1, 4);
2453     }
2454 }
2455
2456 void stl_phys(hwaddr addr, uint32_t val)
2457 {
2458     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2459 }
2460
2461 void stl_le_phys(hwaddr addr, uint32_t val)
2462 {
2463     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2464 }
2465
2466 void stl_be_phys(hwaddr addr, uint32_t val)
2467 {
2468     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2469 }
2470
2471 /* XXX: optimize */
2472 void stb_phys(hwaddr addr, uint32_t val)
2473 {
2474     uint8_t v = val;
2475     cpu_physical_memory_write(addr, &v, 1);
2476 }
2477
2478 /* warning: addr must be aligned */
2479 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2480                                      enum device_endian endian)
2481 {
2482     uint8_t *ptr;
2483     MemoryRegionSection *section;
2484
2485     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2486
2487     if (!memory_region_is_ram(section->mr) || section->readonly) {
2488         addr = memory_region_section_addr(section, addr);
2489         if (memory_region_is_ram(section->mr)) {
2490             section = &phys_sections[phys_section_rom];
2491         }
2492 #if defined(TARGET_WORDS_BIGENDIAN)
2493         if (endian == DEVICE_LITTLE_ENDIAN) {
2494             val = bswap16(val);
2495         }
2496 #else
2497         if (endian == DEVICE_BIG_ENDIAN) {
2498             val = bswap16(val);
2499         }
2500 #endif
2501         io_mem_write(section->mr, addr, val, 2);
2502     } else {
2503         unsigned long addr1;
2504         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2505             + memory_region_section_addr(section, addr);
2506         /* RAM case */
2507         ptr = qemu_get_ram_ptr(addr1);
2508         switch (endian) {
2509         case DEVICE_LITTLE_ENDIAN:
2510             stw_le_p(ptr, val);
2511             break;
2512         case DEVICE_BIG_ENDIAN:
2513             stw_be_p(ptr, val);
2514             break;
2515         default:
2516             stw_p(ptr, val);
2517             break;
2518         }
2519         invalidate_and_set_dirty(addr1, 2);
2520     }
2521 }
2522
2523 void stw_phys(hwaddr addr, uint32_t val)
2524 {
2525     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2526 }
2527
2528 void stw_le_phys(hwaddr addr, uint32_t val)
2529 {
2530     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2531 }
2532
2533 void stw_be_phys(hwaddr addr, uint32_t val)
2534 {
2535     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2536 }
2537
2538 /* XXX: optimize */
2539 void stq_phys(hwaddr addr, uint64_t val)
2540 {
2541     val = tswap64(val);
2542     cpu_physical_memory_write(addr, &val, 8);
2543 }
2544
2545 void stq_le_phys(hwaddr addr, uint64_t val)
2546 {
2547     val = cpu_to_le64(val);
2548     cpu_physical_memory_write(addr, &val, 8);
2549 }
2550
2551 void stq_be_phys(hwaddr addr, uint64_t val)
2552 {
2553     val = cpu_to_be64(val);
2554     cpu_physical_memory_write(addr, &val, 8);
2555 }
2556
2557 /* virtual memory access for debug (includes writing to ROM) */
2558 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2559                         uint8_t *buf, int len, int is_write)
2560 {
2561     int l;
2562     hwaddr phys_addr;
2563     target_ulong page;
2564
2565     while (len > 0) {
2566         page = addr & TARGET_PAGE_MASK;
2567         phys_addr = cpu_get_phys_page_debug(env, page);
2568         /* if no physical page mapped, return an error */
2569         if (phys_addr == -1)
2570             return -1;
2571         l = (page + TARGET_PAGE_SIZE) - addr;
2572         if (l > len)
2573             l = len;
2574         phys_addr += (addr & ~TARGET_PAGE_MASK);
2575         if (is_write)
2576             cpu_physical_memory_write_rom(phys_addr, buf, l);
2577         else
2578             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2579         len -= l;
2580         buf += l;
2581         addr += l;
2582     }
2583     return 0;
2584 }
2585 #endif
2586
2587 #if !defined(CONFIG_USER_ONLY)
2588
2589 /*
2590  * A helper function for the _utterly broken_ virtio device model to find out if
2591  * it's running on a big endian machine. Don't do this at home kids!
2592  */
2593 bool virtio_is_big_endian(void);
2594 bool virtio_is_big_endian(void)
2595 {
2596 #if defined(TARGET_WORDS_BIGENDIAN)
2597     return true;
2598 #else
2599     return false;
2600 #endif
2601 }
2602
2603 #endif
2604
2605 #ifndef CONFIG_USER_ONLY
2606 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2607 {
2608     MemoryRegionSection *section;
2609
2610     section = phys_page_find(address_space_memory.dispatch,
2611                              phys_addr >> TARGET_PAGE_BITS);
2612
2613     return !(memory_region_is_ram(section->mr) ||
2614              memory_region_is_romd(section->mr));
2615 }
2616 #endif
This page took 0.16113 seconds and 4 git commands to generate.