]> Git Repo - qemu.git/blob - exec.c
Revert "virtio-pci: replace byte swap hack"
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount = 0;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     qemu_mutex_init(&ram_list.mutex);
217     memory_map_init();
218     io_mem_init();
219 #endif
220 }
221
222 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
223
224 static int cpu_common_post_load(void *opaque, int version_id)
225 {
226     CPUArchState *env = opaque;
227
228     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
229        version_id is increased. */
230     env->interrupt_request &= ~0x01;
231     tlb_flush(env, 1);
232
233     return 0;
234 }
235
236 static const VMStateDescription vmstate_cpu_common = {
237     .name = "cpu_common",
238     .version_id = 1,
239     .minimum_version_id = 1,
240     .minimum_version_id_old = 1,
241     .post_load = cpu_common_post_load,
242     .fields      = (VMStateField []) {
243         VMSTATE_UINT32(halted, CPUArchState),
244         VMSTATE_UINT32(interrupt_request, CPUArchState),
245         VMSTATE_END_OF_LIST()
246     }
247 };
248 #endif
249
250 CPUArchState *qemu_get_cpu(int cpu)
251 {
252     CPUArchState *env = first_cpu;
253
254     while (env) {
255         if (env->cpu_index == cpu)
256             break;
257         env = env->next_cpu;
258     }
259
260     return env;
261 }
262
263 void cpu_exec_init(CPUArchState *env)
264 {
265 #ifndef CONFIG_USER_ONLY
266     CPUState *cpu = ENV_GET_CPU(env);
267 #endif
268     CPUArchState **penv;
269     int cpu_index;
270
271 #if defined(CONFIG_USER_ONLY)
272     cpu_list_lock();
273 #endif
274     env->next_cpu = NULL;
275     penv = &first_cpu;
276     cpu_index = 0;
277     while (*penv != NULL) {
278         penv = &(*penv)->next_cpu;
279         cpu_index++;
280     }
281     env->cpu_index = cpu_index;
282     env->numa_node = 0;
283     QTAILQ_INIT(&env->breakpoints);
284     QTAILQ_INIT(&env->watchpoints);
285 #ifndef CONFIG_USER_ONLY
286     cpu->thread_id = qemu_get_thread_id();
287 #endif
288     *penv = env;
289 #if defined(CONFIG_USER_ONLY)
290     cpu_list_unlock();
291 #endif
292 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
293     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
294     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
295                     cpu_save, cpu_load, env);
296 #endif
297 }
298
299 #if defined(TARGET_HAS_ICE)
300 #if defined(CONFIG_USER_ONLY)
301 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
302 {
303     tb_invalidate_phys_page_range(pc, pc + 1, 0);
304 }
305 #else
306 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
307 {
308     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
309             (pc & ~TARGET_PAGE_MASK));
310 }
311 #endif
312 #endif /* TARGET_HAS_ICE */
313
314 #if defined(CONFIG_USER_ONLY)
315 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
316
317 {
318 }
319
320 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
321                           int flags, CPUWatchpoint **watchpoint)
322 {
323     return -ENOSYS;
324 }
325 #else
326 /* Add a watchpoint.  */
327 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
328                           int flags, CPUWatchpoint **watchpoint)
329 {
330     target_ulong len_mask = ~(len - 1);
331     CPUWatchpoint *wp;
332
333     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
334     if ((len & (len - 1)) || (addr & ~len_mask) ||
335             len == 0 || len > TARGET_PAGE_SIZE) {
336         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
337                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
338         return -EINVAL;
339     }
340     wp = g_malloc(sizeof(*wp));
341
342     wp->vaddr = addr;
343     wp->len_mask = len_mask;
344     wp->flags = flags;
345
346     /* keep all GDB-injected watchpoints in front */
347     if (flags & BP_GDB)
348         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
349     else
350         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
351
352     tlb_flush_page(env, addr);
353
354     if (watchpoint)
355         *watchpoint = wp;
356     return 0;
357 }
358
359 /* Remove a specific watchpoint.  */
360 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
361                           int flags)
362 {
363     target_ulong len_mask = ~(len - 1);
364     CPUWatchpoint *wp;
365
366     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
367         if (addr == wp->vaddr && len_mask == wp->len_mask
368                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
369             cpu_watchpoint_remove_by_ref(env, wp);
370             return 0;
371         }
372     }
373     return -ENOENT;
374 }
375
376 /* Remove a specific watchpoint by reference.  */
377 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
378 {
379     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
380
381     tlb_flush_page(env, watchpoint->vaddr);
382
383     g_free(watchpoint);
384 }
385
386 /* Remove all matching watchpoints.  */
387 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
388 {
389     CPUWatchpoint *wp, *next;
390
391     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
392         if (wp->flags & mask)
393             cpu_watchpoint_remove_by_ref(env, wp);
394     }
395 }
396 #endif
397
398 /* Add a breakpoint.  */
399 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
400                           CPUBreakpoint **breakpoint)
401 {
402 #if defined(TARGET_HAS_ICE)
403     CPUBreakpoint *bp;
404
405     bp = g_malloc(sizeof(*bp));
406
407     bp->pc = pc;
408     bp->flags = flags;
409
410     /* keep all GDB-injected breakpoints in front */
411     if (flags & BP_GDB)
412         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
413     else
414         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
415
416     breakpoint_invalidate(env, pc);
417
418     if (breakpoint)
419         *breakpoint = bp;
420     return 0;
421 #else
422     return -ENOSYS;
423 #endif
424 }
425
426 /* Remove a specific breakpoint.  */
427 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
428 {
429 #if defined(TARGET_HAS_ICE)
430     CPUBreakpoint *bp;
431
432     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
433         if (bp->pc == pc && bp->flags == flags) {
434             cpu_breakpoint_remove_by_ref(env, bp);
435             return 0;
436         }
437     }
438     return -ENOENT;
439 #else
440     return -ENOSYS;
441 #endif
442 }
443
444 /* Remove a specific breakpoint by reference.  */
445 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
446 {
447 #if defined(TARGET_HAS_ICE)
448     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
449
450     breakpoint_invalidate(env, breakpoint->pc);
451
452     g_free(breakpoint);
453 #endif
454 }
455
456 /* Remove all matching breakpoints. */
457 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
458 {
459 #if defined(TARGET_HAS_ICE)
460     CPUBreakpoint *bp, *next;
461
462     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
463         if (bp->flags & mask)
464             cpu_breakpoint_remove_by_ref(env, bp);
465     }
466 #endif
467 }
468
469 /* enable or disable single step mode. EXCP_DEBUG is returned by the
470    CPU loop after each instruction */
471 void cpu_single_step(CPUArchState *env, int enabled)
472 {
473 #if defined(TARGET_HAS_ICE)
474     if (env->singlestep_enabled != enabled) {
475         env->singlestep_enabled = enabled;
476         if (kvm_enabled())
477             kvm_update_guest_debug(env, 0);
478         else {
479             /* must flush all the translated code to avoid inconsistencies */
480             /* XXX: only flush what is necessary */
481             tb_flush(env);
482         }
483     }
484 #endif
485 }
486
487 void cpu_reset_interrupt(CPUArchState *env, int mask)
488 {
489     env->interrupt_request &= ~mask;
490 }
491
492 void cpu_exit(CPUArchState *env)
493 {
494     env->exit_request = 1;
495     cpu_unlink_tb(env);
496 }
497
498 void cpu_abort(CPUArchState *env, const char *fmt, ...)
499 {
500     va_list ap;
501     va_list ap2;
502
503     va_start(ap, fmt);
504     va_copy(ap2, ap);
505     fprintf(stderr, "qemu: fatal: ");
506     vfprintf(stderr, fmt, ap);
507     fprintf(stderr, "\n");
508     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
509     if (qemu_log_enabled()) {
510         qemu_log("qemu: fatal: ");
511         qemu_log_vprintf(fmt, ap2);
512         qemu_log("\n");
513         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
514         qemu_log_flush();
515         qemu_log_close();
516     }
517     va_end(ap2);
518     va_end(ap);
519 #if defined(CONFIG_USER_ONLY)
520     {
521         struct sigaction act;
522         sigfillset(&act.sa_mask);
523         act.sa_handler = SIG_DFL;
524         sigaction(SIGABRT, &act, NULL);
525     }
526 #endif
527     abort();
528 }
529
530 CPUArchState *cpu_copy(CPUArchState *env)
531 {
532     CPUArchState *new_env = cpu_init(env->cpu_model_str);
533     CPUArchState *next_cpu = new_env->next_cpu;
534     int cpu_index = new_env->cpu_index;
535 #if defined(TARGET_HAS_ICE)
536     CPUBreakpoint *bp;
537     CPUWatchpoint *wp;
538 #endif
539
540     memcpy(new_env, env, sizeof(CPUArchState));
541
542     /* Preserve chaining and index. */
543     new_env->next_cpu = next_cpu;
544     new_env->cpu_index = cpu_index;
545
546     /* Clone all break/watchpoints.
547        Note: Once we support ptrace with hw-debug register access, make sure
548        BP_CPU break/watchpoints are handled correctly on clone. */
549     QTAILQ_INIT(&env->breakpoints);
550     QTAILQ_INIT(&env->watchpoints);
551 #if defined(TARGET_HAS_ICE)
552     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
553         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
554     }
555     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
556         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
557                               wp->flags, NULL);
558     }
559 #endif
560
561     return new_env;
562 }
563
564 #if !defined(CONFIG_USER_ONLY)
565 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
566                                       uintptr_t length)
567 {
568     uintptr_t start1;
569
570     /* we modify the TLB cache so that the dirty bit will be set again
571        when accessing the range */
572     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
573     /* Check that we don't span multiple blocks - this breaks the
574        address comparisons below.  */
575     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
576             != (end - 1) - start) {
577         abort();
578     }
579     cpu_tlb_reset_dirty_all(start1, length);
580
581 }
582
583 /* Note: start and end must be within the same ram block.  */
584 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
585                                      int dirty_flags)
586 {
587     uintptr_t length;
588
589     start &= TARGET_PAGE_MASK;
590     end = TARGET_PAGE_ALIGN(end);
591
592     length = end - start;
593     if (length == 0)
594         return;
595     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
596
597     if (tcg_enabled()) {
598         tlb_reset_dirty_range_all(start, end, length);
599     }
600 }
601
602 static int cpu_physical_memory_set_dirty_tracking(int enable)
603 {
604     int ret = 0;
605     in_migration = enable;
606     return ret;
607 }
608
609 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
610                                                    MemoryRegionSection *section,
611                                                    target_ulong vaddr,
612                                                    hwaddr paddr,
613                                                    int prot,
614                                                    target_ulong *address)
615 {
616     hwaddr iotlb;
617     CPUWatchpoint *wp;
618
619     if (memory_region_is_ram(section->mr)) {
620         /* Normal RAM.  */
621         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
622             + memory_region_section_addr(section, paddr);
623         if (!section->readonly) {
624             iotlb |= phys_section_notdirty;
625         } else {
626             iotlb |= phys_section_rom;
627         }
628     } else {
629         /* IO handlers are currently passed a physical address.
630            It would be nice to pass an offset from the base address
631            of that region.  This would avoid having to special case RAM,
632            and avoid full address decoding in every device.
633            We can't use the high bits of pd for this because
634            IO_MEM_ROMD uses these as a ram address.  */
635         iotlb = section - phys_sections;
636         iotlb += memory_region_section_addr(section, paddr);
637     }
638
639     /* Make accesses to pages with watchpoints go via the
640        watchpoint trap routines.  */
641     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
642         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
643             /* Avoid trapping reads of pages with a write breakpoint. */
644             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
645                 iotlb = phys_section_watch + paddr;
646                 *address |= TLB_MMIO;
647                 break;
648             }
649         }
650     }
651
652     return iotlb;
653 }
654 #endif /* defined(CONFIG_USER_ONLY) */
655
656 #if !defined(CONFIG_USER_ONLY)
657
658 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
659 typedef struct subpage_t {
660     MemoryRegion iomem;
661     hwaddr base;
662     uint16_t sub_section[TARGET_PAGE_SIZE];
663 } subpage_t;
664
665 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
666                              uint16_t section);
667 static subpage_t *subpage_init(hwaddr base);
668 static void destroy_page_desc(uint16_t section_index)
669 {
670     MemoryRegionSection *section = &phys_sections[section_index];
671     MemoryRegion *mr = section->mr;
672
673     if (mr->subpage) {
674         subpage_t *subpage = container_of(mr, subpage_t, iomem);
675         memory_region_destroy(&subpage->iomem);
676         g_free(subpage);
677     }
678 }
679
680 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
681 {
682     unsigned i;
683     PhysPageEntry *p;
684
685     if (lp->ptr == PHYS_MAP_NODE_NIL) {
686         return;
687     }
688
689     p = phys_map_nodes[lp->ptr];
690     for (i = 0; i < L2_SIZE; ++i) {
691         if (!p[i].is_leaf) {
692             destroy_l2_mapping(&p[i], level - 1);
693         } else {
694             destroy_page_desc(p[i].ptr);
695         }
696     }
697     lp->is_leaf = 0;
698     lp->ptr = PHYS_MAP_NODE_NIL;
699 }
700
701 static void destroy_all_mappings(AddressSpaceDispatch *d)
702 {
703     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
704     phys_map_nodes_reset();
705 }
706
707 static uint16_t phys_section_add(MemoryRegionSection *section)
708 {
709     if (phys_sections_nb == phys_sections_nb_alloc) {
710         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
711         phys_sections = g_renew(MemoryRegionSection, phys_sections,
712                                 phys_sections_nb_alloc);
713     }
714     phys_sections[phys_sections_nb] = *section;
715     return phys_sections_nb++;
716 }
717
718 static void phys_sections_clear(void)
719 {
720     phys_sections_nb = 0;
721 }
722
723 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
724 {
725     subpage_t *subpage;
726     hwaddr base = section->offset_within_address_space
727         & TARGET_PAGE_MASK;
728     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
729     MemoryRegionSection subsection = {
730         .offset_within_address_space = base,
731         .size = TARGET_PAGE_SIZE,
732     };
733     hwaddr start, end;
734
735     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
736
737     if (!(existing->mr->subpage)) {
738         subpage = subpage_init(base);
739         subsection.mr = &subpage->iomem;
740         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
741                       phys_section_add(&subsection));
742     } else {
743         subpage = container_of(existing->mr, subpage_t, iomem);
744     }
745     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
746     end = start + section->size - 1;
747     subpage_register(subpage, start, end, phys_section_add(section));
748 }
749
750
751 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
752 {
753     hwaddr start_addr = section->offset_within_address_space;
754     ram_addr_t size = section->size;
755     hwaddr addr;
756     uint16_t section_index = phys_section_add(section);
757
758     assert(size);
759
760     addr = start_addr;
761     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
762                   section_index);
763 }
764
765 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
766 {
767     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
768     MemoryRegionSection now = *section, remain = *section;
769
770     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
771         || (now.size < TARGET_PAGE_SIZE)) {
772         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
773                        - now.offset_within_address_space,
774                        now.size);
775         register_subpage(d, &now);
776         remain.size -= now.size;
777         remain.offset_within_address_space += now.size;
778         remain.offset_within_region += now.size;
779     }
780     while (remain.size >= TARGET_PAGE_SIZE) {
781         now = remain;
782         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
783             now.size = TARGET_PAGE_SIZE;
784             register_subpage(d, &now);
785         } else {
786             now.size &= TARGET_PAGE_MASK;
787             register_multipage(d, &now);
788         }
789         remain.size -= now.size;
790         remain.offset_within_address_space += now.size;
791         remain.offset_within_region += now.size;
792     }
793     now = remain;
794     if (now.size) {
795         register_subpage(d, &now);
796     }
797 }
798
799 void qemu_flush_coalesced_mmio_buffer(void)
800 {
801     if (kvm_enabled())
802         kvm_flush_coalesced_mmio_buffer();
803 }
804
805 void qemu_mutex_lock_ramlist(void)
806 {
807     qemu_mutex_lock(&ram_list.mutex);
808 }
809
810 void qemu_mutex_unlock_ramlist(void)
811 {
812     qemu_mutex_unlock(&ram_list.mutex);
813 }
814
815 #if defined(__linux__) && !defined(TARGET_S390X)
816
817 #include <sys/vfs.h>
818
819 #define HUGETLBFS_MAGIC       0x958458f6
820
821 static long gethugepagesize(const char *path)
822 {
823     struct statfs fs;
824     int ret;
825
826     do {
827         ret = statfs(path, &fs);
828     } while (ret != 0 && errno == EINTR);
829
830     if (ret != 0) {
831         perror(path);
832         return 0;
833     }
834
835     if (fs.f_type != HUGETLBFS_MAGIC)
836         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
837
838     return fs.f_bsize;
839 }
840
841 static void *file_ram_alloc(RAMBlock *block,
842                             ram_addr_t memory,
843                             const char *path)
844 {
845     char *filename;
846     void *area;
847     int fd;
848 #ifdef MAP_POPULATE
849     int flags;
850 #endif
851     unsigned long hpagesize;
852
853     hpagesize = gethugepagesize(path);
854     if (!hpagesize) {
855         return NULL;
856     }
857
858     if (memory < hpagesize) {
859         return NULL;
860     }
861
862     if (kvm_enabled() && !kvm_has_sync_mmu()) {
863         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
864         return NULL;
865     }
866
867     if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
868         return NULL;
869     }
870
871     fd = mkstemp(filename);
872     if (fd < 0) {
873         perror("unable to create backing store for hugepages");
874         free(filename);
875         return NULL;
876     }
877     unlink(filename);
878     free(filename);
879
880     memory = (memory+hpagesize-1) & ~(hpagesize-1);
881
882     /*
883      * ftruncate is not supported by hugetlbfs in older
884      * hosts, so don't bother bailing out on errors.
885      * If anything goes wrong with it under other filesystems,
886      * mmap will fail.
887      */
888     if (ftruncate(fd, memory))
889         perror("ftruncate");
890
891 #ifdef MAP_POPULATE
892     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
893      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
894      * to sidestep this quirk.
895      */
896     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
897     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
898 #else
899     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
900 #endif
901     if (area == MAP_FAILED) {
902         perror("file_ram_alloc: can't mmap RAM pages");
903         close(fd);
904         return (NULL);
905     }
906     block->fd = fd;
907     return area;
908 }
909 #endif
910
911 static ram_addr_t find_ram_offset(ram_addr_t size)
912 {
913     RAMBlock *block, *next_block;
914     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
915
916     if (QTAILQ_EMPTY(&ram_list.blocks))
917         return 0;
918
919     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
920         ram_addr_t end, next = RAM_ADDR_MAX;
921
922         end = block->offset + block->length;
923
924         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
925             if (next_block->offset >= end) {
926                 next = MIN(next, next_block->offset);
927             }
928         }
929         if (next - end >= size && next - end < mingap) {
930             offset = end;
931             mingap = next - end;
932         }
933     }
934
935     if (offset == RAM_ADDR_MAX) {
936         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
937                 (uint64_t)size);
938         abort();
939     }
940
941     return offset;
942 }
943
944 ram_addr_t last_ram_offset(void)
945 {
946     RAMBlock *block;
947     ram_addr_t last = 0;
948
949     QTAILQ_FOREACH(block, &ram_list.blocks, next)
950         last = MAX(last, block->offset + block->length);
951
952     return last;
953 }
954
955 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
956 {
957     int ret;
958     QemuOpts *machine_opts;
959
960     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
961     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
962     if (machine_opts &&
963         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
964         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
965         if (ret) {
966             perror("qemu_madvise");
967             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
968                             "but dump_guest_core=off specified\n");
969         }
970     }
971 }
972
973 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
974 {
975     RAMBlock *new_block, *block;
976
977     new_block = NULL;
978     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
979         if (block->offset == addr) {
980             new_block = block;
981             break;
982         }
983     }
984     assert(new_block);
985     assert(!new_block->idstr[0]);
986
987     if (dev) {
988         char *id = qdev_get_dev_path(dev);
989         if (id) {
990             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
991             g_free(id);
992         }
993     }
994     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
995
996     /* This assumes the iothread lock is taken here too.  */
997     qemu_mutex_lock_ramlist();
998     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
999         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
1000             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
1001                     new_block->idstr);
1002             abort();
1003         }
1004     }
1005     qemu_mutex_unlock_ramlist();
1006 }
1007
1008 static int memory_try_enable_merging(void *addr, size_t len)
1009 {
1010     QemuOpts *opts;
1011
1012     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
1013     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1014         /* disabled by the user */
1015         return 0;
1016     }
1017
1018     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1019 }
1020
1021 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1022                                    MemoryRegion *mr)
1023 {
1024     RAMBlock *block, *new_block;
1025
1026     size = TARGET_PAGE_ALIGN(size);
1027     new_block = g_malloc0(sizeof(*new_block));
1028
1029     /* This assumes the iothread lock is taken here too.  */
1030     qemu_mutex_lock_ramlist();
1031     new_block->mr = mr;
1032     new_block->offset = find_ram_offset(size);
1033     if (host) {
1034         new_block->host = host;
1035         new_block->flags |= RAM_PREALLOC_MASK;
1036     } else {
1037         if (mem_path) {
1038 #if defined (__linux__) && !defined(TARGET_S390X)
1039             new_block->host = file_ram_alloc(new_block, size, mem_path);
1040             if (!new_block->host) {
1041                 new_block->host = qemu_vmalloc(size);
1042                 memory_try_enable_merging(new_block->host, size);
1043             }
1044 #else
1045             fprintf(stderr, "-mem-path option unsupported\n");
1046             exit(1);
1047 #endif
1048         } else {
1049             if (xen_enabled()) {
1050                 xen_ram_alloc(new_block->offset, size, mr);
1051             } else if (kvm_enabled()) {
1052                 /* some s390/kvm configurations have special constraints */
1053                 new_block->host = kvm_vmalloc(size);
1054             } else {
1055                 new_block->host = qemu_vmalloc(size);
1056             }
1057             memory_try_enable_merging(new_block->host, size);
1058         }
1059     }
1060     new_block->length = size;
1061
1062     /* Keep the list sorted from biggest to smallest block.  */
1063     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1064         if (block->length < new_block->length) {
1065             break;
1066         }
1067     }
1068     if (block) {
1069         QTAILQ_INSERT_BEFORE(block, new_block, next);
1070     } else {
1071         QTAILQ_INSERT_TAIL(&ram_list.blocks, new_block, next);
1072     }
1073     ram_list.mru_block = NULL;
1074
1075     ram_list.version++;
1076     qemu_mutex_unlock_ramlist();
1077
1078     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1079                                        last_ram_offset() >> TARGET_PAGE_BITS);
1080     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1081            0, size >> TARGET_PAGE_BITS);
1082     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1083
1084     qemu_ram_setup_dump(new_block->host, size);
1085     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1086
1087     if (kvm_enabled())
1088         kvm_setup_guest_memory(new_block->host, size);
1089
1090     return new_block->offset;
1091 }
1092
1093 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1094 {
1095     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1096 }
1097
1098 void qemu_ram_free_from_ptr(ram_addr_t addr)
1099 {
1100     RAMBlock *block;
1101
1102     /* This assumes the iothread lock is taken here too.  */
1103     qemu_mutex_lock_ramlist();
1104     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1105         if (addr == block->offset) {
1106             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1107             ram_list.mru_block = NULL;
1108             ram_list.version++;
1109             g_free(block);
1110             break;
1111         }
1112     }
1113     qemu_mutex_unlock_ramlist();
1114 }
1115
1116 void qemu_ram_free(ram_addr_t addr)
1117 {
1118     RAMBlock *block;
1119
1120     /* This assumes the iothread lock is taken here too.  */
1121     qemu_mutex_lock_ramlist();
1122     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1123         if (addr == block->offset) {
1124             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1125             ram_list.mru_block = NULL;
1126             ram_list.version++;
1127             if (block->flags & RAM_PREALLOC_MASK) {
1128                 ;
1129             } else if (mem_path) {
1130 #if defined (__linux__) && !defined(TARGET_S390X)
1131                 if (block->fd) {
1132                     munmap(block->host, block->length);
1133                     close(block->fd);
1134                 } else {
1135                     qemu_vfree(block->host);
1136                 }
1137 #else
1138                 abort();
1139 #endif
1140             } else {
1141 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1142                 munmap(block->host, block->length);
1143 #else
1144                 if (xen_enabled()) {
1145                     xen_invalidate_map_cache_entry(block->host);
1146                 } else {
1147                     qemu_vfree(block->host);
1148                 }
1149 #endif
1150             }
1151             g_free(block);
1152             break;
1153         }
1154     }
1155     qemu_mutex_unlock_ramlist();
1156
1157 }
1158
1159 #ifndef _WIN32
1160 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1161 {
1162     RAMBlock *block;
1163     ram_addr_t offset;
1164     int flags;
1165     void *area, *vaddr;
1166
1167     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1168         offset = addr - block->offset;
1169         if (offset < block->length) {
1170             vaddr = block->host + offset;
1171             if (block->flags & RAM_PREALLOC_MASK) {
1172                 ;
1173             } else {
1174                 flags = MAP_FIXED;
1175                 munmap(vaddr, length);
1176                 if (mem_path) {
1177 #if defined(__linux__) && !defined(TARGET_S390X)
1178                     if (block->fd) {
1179 #ifdef MAP_POPULATE
1180                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1181                             MAP_PRIVATE;
1182 #else
1183                         flags |= MAP_PRIVATE;
1184 #endif
1185                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1186                                     flags, block->fd, offset);
1187                     } else {
1188                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1189                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1190                                     flags, -1, 0);
1191                     }
1192 #else
1193                     abort();
1194 #endif
1195                 } else {
1196 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1197                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1198                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1199                                 flags, -1, 0);
1200 #else
1201                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1202                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1203                                 flags, -1, 0);
1204 #endif
1205                 }
1206                 if (area != vaddr) {
1207                     fprintf(stderr, "Could not remap addr: "
1208                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1209                             length, addr);
1210                     exit(1);
1211                 }
1212                 memory_try_enable_merging(vaddr, length);
1213                 qemu_ram_setup_dump(vaddr, length);
1214             }
1215             return;
1216         }
1217     }
1218 }
1219 #endif /* !_WIN32 */
1220
1221 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1222    With the exception of the softmmu code in this file, this should
1223    only be used for local memory (e.g. video ram) that the device owns,
1224    and knows it isn't going to access beyond the end of the block.
1225
1226    It should not be used for general purpose DMA.
1227    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1228  */
1229 void *qemu_get_ram_ptr(ram_addr_t addr)
1230 {
1231     RAMBlock *block;
1232
1233     /* The list is protected by the iothread lock here.  */
1234     block = ram_list.mru_block;
1235     if (block && addr - block->offset < block->length) {
1236         goto found;
1237     }
1238     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1239         if (addr - block->offset < block->length) {
1240             goto found;
1241         }
1242     }
1243
1244     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1245     abort();
1246
1247 found:
1248     ram_list.mru_block = block;
1249     if (xen_enabled()) {
1250         /* We need to check if the requested address is in the RAM
1251          * because we don't want to map the entire memory in QEMU.
1252          * In that case just map until the end of the page.
1253          */
1254         if (block->offset == 0) {
1255             return xen_map_cache(addr, 0, 0);
1256         } else if (block->host == NULL) {
1257             block->host =
1258                 xen_map_cache(block->offset, block->length, 1);
1259         }
1260     }
1261     return block->host + (addr - block->offset);
1262 }
1263
1264 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1265  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1266  *
1267  * ??? Is this still necessary?
1268  */
1269 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1270 {
1271     RAMBlock *block;
1272
1273     /* The list is protected by the iothread lock here.  */
1274     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1275         if (addr - block->offset < block->length) {
1276             if (xen_enabled()) {
1277                 /* We need to check if the requested address is in the RAM
1278                  * because we don't want to map the entire memory in QEMU.
1279                  * In that case just map until the end of the page.
1280                  */
1281                 if (block->offset == 0) {
1282                     return xen_map_cache(addr, 0, 0);
1283                 } else if (block->host == NULL) {
1284                     block->host =
1285                         xen_map_cache(block->offset, block->length, 1);
1286                 }
1287             }
1288             return block->host + (addr - block->offset);
1289         }
1290     }
1291
1292     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1293     abort();
1294
1295     return NULL;
1296 }
1297
1298 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1299  * but takes a size argument */
1300 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1301 {
1302     if (*size == 0) {
1303         return NULL;
1304     }
1305     if (xen_enabled()) {
1306         return xen_map_cache(addr, *size, 1);
1307     } else {
1308         RAMBlock *block;
1309
1310         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1311             if (addr - block->offset < block->length) {
1312                 if (addr - block->offset + *size > block->length)
1313                     *size = block->length - addr + block->offset;
1314                 return block->host + (addr - block->offset);
1315             }
1316         }
1317
1318         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1319         abort();
1320     }
1321 }
1322
1323 void qemu_put_ram_ptr(void *addr)
1324 {
1325     trace_qemu_put_ram_ptr(addr);
1326 }
1327
1328 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1329 {
1330     RAMBlock *block;
1331     uint8_t *host = ptr;
1332
1333     if (xen_enabled()) {
1334         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1335         return 0;
1336     }
1337
1338     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1339         /* This case append when the block is not mapped. */
1340         if (block->host == NULL) {
1341             continue;
1342         }
1343         if (host - block->host < block->length) {
1344             *ram_addr = block->offset + (host - block->host);
1345             return 0;
1346         }
1347     }
1348
1349     return -1;
1350 }
1351
1352 /* Some of the softmmu routines need to translate from a host pointer
1353    (typically a TLB entry) back to a ram offset.  */
1354 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1355 {
1356     ram_addr_t ram_addr;
1357
1358     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1359         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1360         abort();
1361     }
1362     return ram_addr;
1363 }
1364
1365 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1366                                     unsigned size)
1367 {
1368 #ifdef DEBUG_UNASSIGNED
1369     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1370 #endif
1371 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1372     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1373 #endif
1374     return 0;
1375 }
1376
1377 static void unassigned_mem_write(void *opaque, hwaddr addr,
1378                                  uint64_t val, unsigned size)
1379 {
1380 #ifdef DEBUG_UNASSIGNED
1381     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1382 #endif
1383 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1384     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1385 #endif
1386 }
1387
1388 static const MemoryRegionOps unassigned_mem_ops = {
1389     .read = unassigned_mem_read,
1390     .write = unassigned_mem_write,
1391     .endianness = DEVICE_NATIVE_ENDIAN,
1392 };
1393
1394 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1395                                unsigned size)
1396 {
1397     abort();
1398 }
1399
1400 static void error_mem_write(void *opaque, hwaddr addr,
1401                             uint64_t value, unsigned size)
1402 {
1403     abort();
1404 }
1405
1406 static const MemoryRegionOps error_mem_ops = {
1407     .read = error_mem_read,
1408     .write = error_mem_write,
1409     .endianness = DEVICE_NATIVE_ENDIAN,
1410 };
1411
1412 static const MemoryRegionOps rom_mem_ops = {
1413     .read = error_mem_read,
1414     .write = unassigned_mem_write,
1415     .endianness = DEVICE_NATIVE_ENDIAN,
1416 };
1417
1418 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1419                                uint64_t val, unsigned size)
1420 {
1421     int dirty_flags;
1422     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1423     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1424 #if !defined(CONFIG_USER_ONLY)
1425         tb_invalidate_phys_page_fast(ram_addr, size);
1426         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1427 #endif
1428     }
1429     switch (size) {
1430     case 1:
1431         stb_p(qemu_get_ram_ptr(ram_addr), val);
1432         break;
1433     case 2:
1434         stw_p(qemu_get_ram_ptr(ram_addr), val);
1435         break;
1436     case 4:
1437         stl_p(qemu_get_ram_ptr(ram_addr), val);
1438         break;
1439     default:
1440         abort();
1441     }
1442     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1443     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1444     /* we remove the notdirty callback only if the code has been
1445        flushed */
1446     if (dirty_flags == 0xff)
1447         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1448 }
1449
1450 static const MemoryRegionOps notdirty_mem_ops = {
1451     .read = error_mem_read,
1452     .write = notdirty_mem_write,
1453     .endianness = DEVICE_NATIVE_ENDIAN,
1454 };
1455
1456 /* Generate a debug exception if a watchpoint has been hit.  */
1457 static void check_watchpoint(int offset, int len_mask, int flags)
1458 {
1459     CPUArchState *env = cpu_single_env;
1460     target_ulong pc, cs_base;
1461     target_ulong vaddr;
1462     CPUWatchpoint *wp;
1463     int cpu_flags;
1464
1465     if (env->watchpoint_hit) {
1466         /* We re-entered the check after replacing the TB. Now raise
1467          * the debug interrupt so that is will trigger after the
1468          * current instruction. */
1469         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1470         return;
1471     }
1472     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1473     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1474         if ((vaddr == (wp->vaddr & len_mask) ||
1475              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1476             wp->flags |= BP_WATCHPOINT_HIT;
1477             if (!env->watchpoint_hit) {
1478                 env->watchpoint_hit = wp;
1479                 tb_check_watchpoint(env);
1480                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1481                     env->exception_index = EXCP_DEBUG;
1482                     cpu_loop_exit(env);
1483                 } else {
1484                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1485                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1486                     cpu_resume_from_signal(env, NULL);
1487                 }
1488             }
1489         } else {
1490             wp->flags &= ~BP_WATCHPOINT_HIT;
1491         }
1492     }
1493 }
1494
1495 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1496    so these check for a hit then pass through to the normal out-of-line
1497    phys routines.  */
1498 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1499                                unsigned size)
1500 {
1501     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1502     switch (size) {
1503     case 1: return ldub_phys(addr);
1504     case 2: return lduw_phys(addr);
1505     case 4: return ldl_phys(addr);
1506     default: abort();
1507     }
1508 }
1509
1510 static void watch_mem_write(void *opaque, hwaddr addr,
1511                             uint64_t val, unsigned size)
1512 {
1513     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1514     switch (size) {
1515     case 1:
1516         stb_phys(addr, val);
1517         break;
1518     case 2:
1519         stw_phys(addr, val);
1520         break;
1521     case 4:
1522         stl_phys(addr, val);
1523         break;
1524     default: abort();
1525     }
1526 }
1527
1528 static const MemoryRegionOps watch_mem_ops = {
1529     .read = watch_mem_read,
1530     .write = watch_mem_write,
1531     .endianness = DEVICE_NATIVE_ENDIAN,
1532 };
1533
1534 static uint64_t subpage_read(void *opaque, hwaddr addr,
1535                              unsigned len)
1536 {
1537     subpage_t *mmio = opaque;
1538     unsigned int idx = SUBPAGE_IDX(addr);
1539     MemoryRegionSection *section;
1540 #if defined(DEBUG_SUBPAGE)
1541     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1542            mmio, len, addr, idx);
1543 #endif
1544
1545     section = &phys_sections[mmio->sub_section[idx]];
1546     addr += mmio->base;
1547     addr -= section->offset_within_address_space;
1548     addr += section->offset_within_region;
1549     return io_mem_read(section->mr, addr, len);
1550 }
1551
1552 static void subpage_write(void *opaque, hwaddr addr,
1553                           uint64_t value, unsigned len)
1554 {
1555     subpage_t *mmio = opaque;
1556     unsigned int idx = SUBPAGE_IDX(addr);
1557     MemoryRegionSection *section;
1558 #if defined(DEBUG_SUBPAGE)
1559     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1560            " idx %d value %"PRIx64"\n",
1561            __func__, mmio, len, addr, idx, value);
1562 #endif
1563
1564     section = &phys_sections[mmio->sub_section[idx]];
1565     addr += mmio->base;
1566     addr -= section->offset_within_address_space;
1567     addr += section->offset_within_region;
1568     io_mem_write(section->mr, addr, value, len);
1569 }
1570
1571 static const MemoryRegionOps subpage_ops = {
1572     .read = subpage_read,
1573     .write = subpage_write,
1574     .endianness = DEVICE_NATIVE_ENDIAN,
1575 };
1576
1577 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1578                                  unsigned size)
1579 {
1580     ram_addr_t raddr = addr;
1581     void *ptr = qemu_get_ram_ptr(raddr);
1582     switch (size) {
1583     case 1: return ldub_p(ptr);
1584     case 2: return lduw_p(ptr);
1585     case 4: return ldl_p(ptr);
1586     default: abort();
1587     }
1588 }
1589
1590 static void subpage_ram_write(void *opaque, hwaddr addr,
1591                               uint64_t value, unsigned size)
1592 {
1593     ram_addr_t raddr = addr;
1594     void *ptr = qemu_get_ram_ptr(raddr);
1595     switch (size) {
1596     case 1: return stb_p(ptr, value);
1597     case 2: return stw_p(ptr, value);
1598     case 4: return stl_p(ptr, value);
1599     default: abort();
1600     }
1601 }
1602
1603 static const MemoryRegionOps subpage_ram_ops = {
1604     .read = subpage_ram_read,
1605     .write = subpage_ram_write,
1606     .endianness = DEVICE_NATIVE_ENDIAN,
1607 };
1608
1609 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1610                              uint16_t section)
1611 {
1612     int idx, eidx;
1613
1614     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1615         return -1;
1616     idx = SUBPAGE_IDX(start);
1617     eidx = SUBPAGE_IDX(end);
1618 #if defined(DEBUG_SUBPAGE)
1619     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1620            mmio, start, end, idx, eidx, memory);
1621 #endif
1622     if (memory_region_is_ram(phys_sections[section].mr)) {
1623         MemoryRegionSection new_section = phys_sections[section];
1624         new_section.mr = &io_mem_subpage_ram;
1625         section = phys_section_add(&new_section);
1626     }
1627     for (; idx <= eidx; idx++) {
1628         mmio->sub_section[idx] = section;
1629     }
1630
1631     return 0;
1632 }
1633
1634 static subpage_t *subpage_init(hwaddr base)
1635 {
1636     subpage_t *mmio;
1637
1638     mmio = g_malloc0(sizeof(subpage_t));
1639
1640     mmio->base = base;
1641     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1642                           "subpage", TARGET_PAGE_SIZE);
1643     mmio->iomem.subpage = true;
1644 #if defined(DEBUG_SUBPAGE)
1645     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1646            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1647 #endif
1648     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1649
1650     return mmio;
1651 }
1652
1653 static uint16_t dummy_section(MemoryRegion *mr)
1654 {
1655     MemoryRegionSection section = {
1656         .mr = mr,
1657         .offset_within_address_space = 0,
1658         .offset_within_region = 0,
1659         .size = UINT64_MAX,
1660     };
1661
1662     return phys_section_add(&section);
1663 }
1664
1665 MemoryRegion *iotlb_to_region(hwaddr index)
1666 {
1667     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1668 }
1669
1670 static void io_mem_init(void)
1671 {
1672     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1673     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1674     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1675                           "unassigned", UINT64_MAX);
1676     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1677                           "notdirty", UINT64_MAX);
1678     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1679                           "subpage-ram", UINT64_MAX);
1680     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1681                           "watch", UINT64_MAX);
1682 }
1683
1684 static void mem_begin(MemoryListener *listener)
1685 {
1686     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1687
1688     destroy_all_mappings(d);
1689     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1690 }
1691
1692 static void core_begin(MemoryListener *listener)
1693 {
1694     phys_sections_clear();
1695     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1696     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1697     phys_section_rom = dummy_section(&io_mem_rom);
1698     phys_section_watch = dummy_section(&io_mem_watch);
1699 }
1700
1701 static void tcg_commit(MemoryListener *listener)
1702 {
1703     CPUArchState *env;
1704
1705     /* since each CPU stores ram addresses in its TLB cache, we must
1706        reset the modified entries */
1707     /* XXX: slow ! */
1708     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1709         tlb_flush(env, 1);
1710     }
1711 }
1712
1713 static void core_log_global_start(MemoryListener *listener)
1714 {
1715     cpu_physical_memory_set_dirty_tracking(1);
1716 }
1717
1718 static void core_log_global_stop(MemoryListener *listener)
1719 {
1720     cpu_physical_memory_set_dirty_tracking(0);
1721 }
1722
1723 static void io_region_add(MemoryListener *listener,
1724                           MemoryRegionSection *section)
1725 {
1726     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1727
1728     mrio->mr = section->mr;
1729     mrio->offset = section->offset_within_region;
1730     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1731                  section->offset_within_address_space, section->size);
1732     ioport_register(&mrio->iorange);
1733 }
1734
1735 static void io_region_del(MemoryListener *listener,
1736                           MemoryRegionSection *section)
1737 {
1738     isa_unassign_ioport(section->offset_within_address_space, section->size);
1739 }
1740
1741 static MemoryListener core_memory_listener = {
1742     .begin = core_begin,
1743     .log_global_start = core_log_global_start,
1744     .log_global_stop = core_log_global_stop,
1745     .priority = 1,
1746 };
1747
1748 static MemoryListener io_memory_listener = {
1749     .region_add = io_region_add,
1750     .region_del = io_region_del,
1751     .priority = 0,
1752 };
1753
1754 static MemoryListener tcg_memory_listener = {
1755     .commit = tcg_commit,
1756 };
1757
1758 void address_space_init_dispatch(AddressSpace *as)
1759 {
1760     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1761
1762     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1763     d->listener = (MemoryListener) {
1764         .begin = mem_begin,
1765         .region_add = mem_add,
1766         .region_nop = mem_add,
1767         .priority = 0,
1768     };
1769     as->dispatch = d;
1770     memory_listener_register(&d->listener, as);
1771 }
1772
1773 void address_space_destroy_dispatch(AddressSpace *as)
1774 {
1775     AddressSpaceDispatch *d = as->dispatch;
1776
1777     memory_listener_unregister(&d->listener);
1778     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1779     g_free(d);
1780     as->dispatch = NULL;
1781 }
1782
1783 static void memory_map_init(void)
1784 {
1785     system_memory = g_malloc(sizeof(*system_memory));
1786     memory_region_init(system_memory, "system", INT64_MAX);
1787     address_space_init(&address_space_memory, system_memory);
1788     address_space_memory.name = "memory";
1789
1790     system_io = g_malloc(sizeof(*system_io));
1791     memory_region_init(system_io, "io", 65536);
1792     address_space_init(&address_space_io, system_io);
1793     address_space_io.name = "I/O";
1794
1795     memory_listener_register(&core_memory_listener, &address_space_memory);
1796     memory_listener_register(&io_memory_listener, &address_space_io);
1797     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1798
1799     dma_context_init(&dma_context_memory, &address_space_memory,
1800                      NULL, NULL, NULL);
1801 }
1802
1803 MemoryRegion *get_system_memory(void)
1804 {
1805     return system_memory;
1806 }
1807
1808 MemoryRegion *get_system_io(void)
1809 {
1810     return system_io;
1811 }
1812
1813 #endif /* !defined(CONFIG_USER_ONLY) */
1814
1815 /* physical memory access (slow version, mainly for debug) */
1816 #if defined(CONFIG_USER_ONLY)
1817 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1818                         uint8_t *buf, int len, int is_write)
1819 {
1820     int l, flags;
1821     target_ulong page;
1822     void * p;
1823
1824     while (len > 0) {
1825         page = addr & TARGET_PAGE_MASK;
1826         l = (page + TARGET_PAGE_SIZE) - addr;
1827         if (l > len)
1828             l = len;
1829         flags = page_get_flags(page);
1830         if (!(flags & PAGE_VALID))
1831             return -1;
1832         if (is_write) {
1833             if (!(flags & PAGE_WRITE))
1834                 return -1;
1835             /* XXX: this code should not depend on lock_user */
1836             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1837                 return -1;
1838             memcpy(p, buf, l);
1839             unlock_user(p, addr, l);
1840         } else {
1841             if (!(flags & PAGE_READ))
1842                 return -1;
1843             /* XXX: this code should not depend on lock_user */
1844             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1845                 return -1;
1846             memcpy(buf, p, l);
1847             unlock_user(p, addr, 0);
1848         }
1849         len -= l;
1850         buf += l;
1851         addr += l;
1852     }
1853     return 0;
1854 }
1855
1856 #else
1857
1858 static void invalidate_and_set_dirty(hwaddr addr,
1859                                      hwaddr length)
1860 {
1861     if (!cpu_physical_memory_is_dirty(addr)) {
1862         /* invalidate code */
1863         tb_invalidate_phys_page_range(addr, addr + length, 0);
1864         /* set dirty bit */
1865         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1866     }
1867     xen_modified_memory(addr, length);
1868 }
1869
1870 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1871                       int len, bool is_write)
1872 {
1873     AddressSpaceDispatch *d = as->dispatch;
1874     int l;
1875     uint8_t *ptr;
1876     uint32_t val;
1877     hwaddr page;
1878     MemoryRegionSection *section;
1879
1880     while (len > 0) {
1881         page = addr & TARGET_PAGE_MASK;
1882         l = (page + TARGET_PAGE_SIZE) - addr;
1883         if (l > len)
1884             l = len;
1885         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1886
1887         if (is_write) {
1888             if (!memory_region_is_ram(section->mr)) {
1889                 hwaddr addr1;
1890                 addr1 = memory_region_section_addr(section, addr);
1891                 /* XXX: could force cpu_single_env to NULL to avoid
1892                    potential bugs */
1893                 if (l >= 4 && ((addr1 & 3) == 0)) {
1894                     /* 32 bit write access */
1895                     val = ldl_p(buf);
1896                     io_mem_write(section->mr, addr1, val, 4);
1897                     l = 4;
1898                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1899                     /* 16 bit write access */
1900                     val = lduw_p(buf);
1901                     io_mem_write(section->mr, addr1, val, 2);
1902                     l = 2;
1903                 } else {
1904                     /* 8 bit write access */
1905                     val = ldub_p(buf);
1906                     io_mem_write(section->mr, addr1, val, 1);
1907                     l = 1;
1908                 }
1909             } else if (!section->readonly) {
1910                 ram_addr_t addr1;
1911                 addr1 = memory_region_get_ram_addr(section->mr)
1912                     + memory_region_section_addr(section, addr);
1913                 /* RAM case */
1914                 ptr = qemu_get_ram_ptr(addr1);
1915                 memcpy(ptr, buf, l);
1916                 invalidate_and_set_dirty(addr1, l);
1917                 qemu_put_ram_ptr(ptr);
1918             }
1919         } else {
1920             if (!(memory_region_is_ram(section->mr) ||
1921                   memory_region_is_romd(section->mr))) {
1922                 hwaddr addr1;
1923                 /* I/O case */
1924                 addr1 = memory_region_section_addr(section, addr);
1925                 if (l >= 4 && ((addr1 & 3) == 0)) {
1926                     /* 32 bit read access */
1927                     val = io_mem_read(section->mr, addr1, 4);
1928                     stl_p(buf, val);
1929                     l = 4;
1930                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1931                     /* 16 bit read access */
1932                     val = io_mem_read(section->mr, addr1, 2);
1933                     stw_p(buf, val);
1934                     l = 2;
1935                 } else {
1936                     /* 8 bit read access */
1937                     val = io_mem_read(section->mr, addr1, 1);
1938                     stb_p(buf, val);
1939                     l = 1;
1940                 }
1941             } else {
1942                 /* RAM case */
1943                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1944                                        + memory_region_section_addr(section,
1945                                                                     addr));
1946                 memcpy(buf, ptr, l);
1947                 qemu_put_ram_ptr(ptr);
1948             }
1949         }
1950         len -= l;
1951         buf += l;
1952         addr += l;
1953     }
1954 }
1955
1956 void address_space_write(AddressSpace *as, hwaddr addr,
1957                          const uint8_t *buf, int len)
1958 {
1959     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1960 }
1961
1962 /**
1963  * address_space_read: read from an address space.
1964  *
1965  * @as: #AddressSpace to be accessed
1966  * @addr: address within that address space
1967  * @buf: buffer with the data transferred
1968  */
1969 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1970 {
1971     address_space_rw(as, addr, buf, len, false);
1972 }
1973
1974
1975 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1976                             int len, int is_write)
1977 {
1978     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1979 }
1980
1981 /* used for ROM loading : can write in RAM and ROM */
1982 void cpu_physical_memory_write_rom(hwaddr addr,
1983                                    const uint8_t *buf, int len)
1984 {
1985     AddressSpaceDispatch *d = address_space_memory.dispatch;
1986     int l;
1987     uint8_t *ptr;
1988     hwaddr page;
1989     MemoryRegionSection *section;
1990
1991     while (len > 0) {
1992         page = addr & TARGET_PAGE_MASK;
1993         l = (page + TARGET_PAGE_SIZE) - addr;
1994         if (l > len)
1995             l = len;
1996         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1997
1998         if (!(memory_region_is_ram(section->mr) ||
1999               memory_region_is_romd(section->mr))) {
2000             /* do nothing */
2001         } else {
2002             unsigned long addr1;
2003             addr1 = memory_region_get_ram_addr(section->mr)
2004                 + memory_region_section_addr(section, addr);
2005             /* ROM/RAM case */
2006             ptr = qemu_get_ram_ptr(addr1);
2007             memcpy(ptr, buf, l);
2008             invalidate_and_set_dirty(addr1, l);
2009             qemu_put_ram_ptr(ptr);
2010         }
2011         len -= l;
2012         buf += l;
2013         addr += l;
2014     }
2015 }
2016
2017 typedef struct {
2018     void *buffer;
2019     hwaddr addr;
2020     hwaddr len;
2021 } BounceBuffer;
2022
2023 static BounceBuffer bounce;
2024
2025 typedef struct MapClient {
2026     void *opaque;
2027     void (*callback)(void *opaque);
2028     QLIST_ENTRY(MapClient) link;
2029 } MapClient;
2030
2031 static QLIST_HEAD(map_client_list, MapClient) map_client_list
2032     = QLIST_HEAD_INITIALIZER(map_client_list);
2033
2034 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
2035 {
2036     MapClient *client = g_malloc(sizeof(*client));
2037
2038     client->opaque = opaque;
2039     client->callback = callback;
2040     QLIST_INSERT_HEAD(&map_client_list, client, link);
2041     return client;
2042 }
2043
2044 static void cpu_unregister_map_client(void *_client)
2045 {
2046     MapClient *client = (MapClient *)_client;
2047
2048     QLIST_REMOVE(client, link);
2049     g_free(client);
2050 }
2051
2052 static void cpu_notify_map_clients(void)
2053 {
2054     MapClient *client;
2055
2056     while (!QLIST_EMPTY(&map_client_list)) {
2057         client = QLIST_FIRST(&map_client_list);
2058         client->callback(client->opaque);
2059         cpu_unregister_map_client(client);
2060     }
2061 }
2062
2063 /* Map a physical memory region into a host virtual address.
2064  * May map a subset of the requested range, given by and returned in *plen.
2065  * May return NULL if resources needed to perform the mapping are exhausted.
2066  * Use only for reads OR writes - not for read-modify-write operations.
2067  * Use cpu_register_map_client() to know when retrying the map operation is
2068  * likely to succeed.
2069  */
2070 void *address_space_map(AddressSpace *as,
2071                         hwaddr addr,
2072                         hwaddr *plen,
2073                         bool is_write)
2074 {
2075     AddressSpaceDispatch *d = as->dispatch;
2076     hwaddr len = *plen;
2077     hwaddr todo = 0;
2078     int l;
2079     hwaddr page;
2080     MemoryRegionSection *section;
2081     ram_addr_t raddr = RAM_ADDR_MAX;
2082     ram_addr_t rlen;
2083     void *ret;
2084
2085     while (len > 0) {
2086         page = addr & TARGET_PAGE_MASK;
2087         l = (page + TARGET_PAGE_SIZE) - addr;
2088         if (l > len)
2089             l = len;
2090         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2091
2092         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2093             if (todo || bounce.buffer) {
2094                 break;
2095             }
2096             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2097             bounce.addr = addr;
2098             bounce.len = l;
2099             if (!is_write) {
2100                 address_space_read(as, addr, bounce.buffer, l);
2101             }
2102
2103             *plen = l;
2104             return bounce.buffer;
2105         }
2106         if (!todo) {
2107             raddr = memory_region_get_ram_addr(section->mr)
2108                 + memory_region_section_addr(section, addr);
2109         }
2110
2111         len -= l;
2112         addr += l;
2113         todo += l;
2114     }
2115     rlen = todo;
2116     ret = qemu_ram_ptr_length(raddr, &rlen);
2117     *plen = rlen;
2118     return ret;
2119 }
2120
2121 /* Unmaps a memory region previously mapped by address_space_map().
2122  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2123  * the amount of memory that was actually read or written by the caller.
2124  */
2125 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2126                          int is_write, hwaddr access_len)
2127 {
2128     if (buffer != bounce.buffer) {
2129         if (is_write) {
2130             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2131             while (access_len) {
2132                 unsigned l;
2133                 l = TARGET_PAGE_SIZE;
2134                 if (l > access_len)
2135                     l = access_len;
2136                 invalidate_and_set_dirty(addr1, l);
2137                 addr1 += l;
2138                 access_len -= l;
2139             }
2140         }
2141         if (xen_enabled()) {
2142             xen_invalidate_map_cache_entry(buffer);
2143         }
2144         return;
2145     }
2146     if (is_write) {
2147         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2148     }
2149     qemu_vfree(bounce.buffer);
2150     bounce.buffer = NULL;
2151     cpu_notify_map_clients();
2152 }
2153
2154 void *cpu_physical_memory_map(hwaddr addr,
2155                               hwaddr *plen,
2156                               int is_write)
2157 {
2158     return address_space_map(&address_space_memory, addr, plen, is_write);
2159 }
2160
2161 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2162                                int is_write, hwaddr access_len)
2163 {
2164     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2165 }
2166
2167 /* warning: addr must be aligned */
2168 static inline uint32_t ldl_phys_internal(hwaddr addr,
2169                                          enum device_endian endian)
2170 {
2171     uint8_t *ptr;
2172     uint32_t val;
2173     MemoryRegionSection *section;
2174
2175     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2176
2177     if (!(memory_region_is_ram(section->mr) ||
2178           memory_region_is_romd(section->mr))) {
2179         /* I/O case */
2180         addr = memory_region_section_addr(section, addr);
2181         val = io_mem_read(section->mr, addr, 4);
2182 #if defined(TARGET_WORDS_BIGENDIAN)
2183         if (endian == DEVICE_LITTLE_ENDIAN) {
2184             val = bswap32(val);
2185         }
2186 #else
2187         if (endian == DEVICE_BIG_ENDIAN) {
2188             val = bswap32(val);
2189         }
2190 #endif
2191     } else {
2192         /* RAM case */
2193         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2194                                 & TARGET_PAGE_MASK)
2195                                + memory_region_section_addr(section, addr));
2196         switch (endian) {
2197         case DEVICE_LITTLE_ENDIAN:
2198             val = ldl_le_p(ptr);
2199             break;
2200         case DEVICE_BIG_ENDIAN:
2201             val = ldl_be_p(ptr);
2202             break;
2203         default:
2204             val = ldl_p(ptr);
2205             break;
2206         }
2207     }
2208     return val;
2209 }
2210
2211 uint32_t ldl_phys(hwaddr addr)
2212 {
2213     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2214 }
2215
2216 uint32_t ldl_le_phys(hwaddr addr)
2217 {
2218     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2219 }
2220
2221 uint32_t ldl_be_phys(hwaddr addr)
2222 {
2223     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2224 }
2225
2226 /* warning: addr must be aligned */
2227 static inline uint64_t ldq_phys_internal(hwaddr addr,
2228                                          enum device_endian endian)
2229 {
2230     uint8_t *ptr;
2231     uint64_t val;
2232     MemoryRegionSection *section;
2233
2234     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2235
2236     if (!(memory_region_is_ram(section->mr) ||
2237           memory_region_is_romd(section->mr))) {
2238         /* I/O case */
2239         addr = memory_region_section_addr(section, addr);
2240
2241         /* XXX This is broken when device endian != cpu endian.
2242                Fix and add "endian" variable check */
2243 #ifdef TARGET_WORDS_BIGENDIAN
2244         val = io_mem_read(section->mr, addr, 4) << 32;
2245         val |= io_mem_read(section->mr, addr + 4, 4);
2246 #else
2247         val = io_mem_read(section->mr, addr, 4);
2248         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2249 #endif
2250     } else {
2251         /* RAM case */
2252         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2253                                 & TARGET_PAGE_MASK)
2254                                + memory_region_section_addr(section, addr));
2255         switch (endian) {
2256         case DEVICE_LITTLE_ENDIAN:
2257             val = ldq_le_p(ptr);
2258             break;
2259         case DEVICE_BIG_ENDIAN:
2260             val = ldq_be_p(ptr);
2261             break;
2262         default:
2263             val = ldq_p(ptr);
2264             break;
2265         }
2266     }
2267     return val;
2268 }
2269
2270 uint64_t ldq_phys(hwaddr addr)
2271 {
2272     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2273 }
2274
2275 uint64_t ldq_le_phys(hwaddr addr)
2276 {
2277     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2278 }
2279
2280 uint64_t ldq_be_phys(hwaddr addr)
2281 {
2282     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2283 }
2284
2285 /* XXX: optimize */
2286 uint32_t ldub_phys(hwaddr addr)
2287 {
2288     uint8_t val;
2289     cpu_physical_memory_read(addr, &val, 1);
2290     return val;
2291 }
2292
2293 /* warning: addr must be aligned */
2294 static inline uint32_t lduw_phys_internal(hwaddr addr,
2295                                           enum device_endian endian)
2296 {
2297     uint8_t *ptr;
2298     uint64_t val;
2299     MemoryRegionSection *section;
2300
2301     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2302
2303     if (!(memory_region_is_ram(section->mr) ||
2304           memory_region_is_romd(section->mr))) {
2305         /* I/O case */
2306         addr = memory_region_section_addr(section, addr);
2307         val = io_mem_read(section->mr, addr, 2);
2308 #if defined(TARGET_WORDS_BIGENDIAN)
2309         if (endian == DEVICE_LITTLE_ENDIAN) {
2310             val = bswap16(val);
2311         }
2312 #else
2313         if (endian == DEVICE_BIG_ENDIAN) {
2314             val = bswap16(val);
2315         }
2316 #endif
2317     } else {
2318         /* RAM case */
2319         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2320                                 & TARGET_PAGE_MASK)
2321                                + memory_region_section_addr(section, addr));
2322         switch (endian) {
2323         case DEVICE_LITTLE_ENDIAN:
2324             val = lduw_le_p(ptr);
2325             break;
2326         case DEVICE_BIG_ENDIAN:
2327             val = lduw_be_p(ptr);
2328             break;
2329         default:
2330             val = lduw_p(ptr);
2331             break;
2332         }
2333     }
2334     return val;
2335 }
2336
2337 uint32_t lduw_phys(hwaddr addr)
2338 {
2339     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2340 }
2341
2342 uint32_t lduw_le_phys(hwaddr addr)
2343 {
2344     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2345 }
2346
2347 uint32_t lduw_be_phys(hwaddr addr)
2348 {
2349     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2350 }
2351
2352 /* warning: addr must be aligned. The ram page is not masked as dirty
2353    and the code inside is not invalidated. It is useful if the dirty
2354    bits are used to track modified PTEs */
2355 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2356 {
2357     uint8_t *ptr;
2358     MemoryRegionSection *section;
2359
2360     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2361
2362     if (!memory_region_is_ram(section->mr) || section->readonly) {
2363         addr = memory_region_section_addr(section, addr);
2364         if (memory_region_is_ram(section->mr)) {
2365             section = &phys_sections[phys_section_rom];
2366         }
2367         io_mem_write(section->mr, addr, val, 4);
2368     } else {
2369         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2370                                & TARGET_PAGE_MASK)
2371             + memory_region_section_addr(section, addr);
2372         ptr = qemu_get_ram_ptr(addr1);
2373         stl_p(ptr, val);
2374
2375         if (unlikely(in_migration)) {
2376             if (!cpu_physical_memory_is_dirty(addr1)) {
2377                 /* invalidate code */
2378                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2379                 /* set dirty bit */
2380                 cpu_physical_memory_set_dirty_flags(
2381                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2382             }
2383         }
2384     }
2385 }
2386
2387 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2388 {
2389     uint8_t *ptr;
2390     MemoryRegionSection *section;
2391
2392     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2393
2394     if (!memory_region_is_ram(section->mr) || section->readonly) {
2395         addr = memory_region_section_addr(section, addr);
2396         if (memory_region_is_ram(section->mr)) {
2397             section = &phys_sections[phys_section_rom];
2398         }
2399 #ifdef TARGET_WORDS_BIGENDIAN
2400         io_mem_write(section->mr, addr, val >> 32, 4);
2401         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2402 #else
2403         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2404         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2405 #endif
2406     } else {
2407         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2408                                 & TARGET_PAGE_MASK)
2409                                + memory_region_section_addr(section, addr));
2410         stq_p(ptr, val);
2411     }
2412 }
2413
2414 /* warning: addr must be aligned */
2415 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2416                                      enum device_endian endian)
2417 {
2418     uint8_t *ptr;
2419     MemoryRegionSection *section;
2420
2421     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2422
2423     if (!memory_region_is_ram(section->mr) || section->readonly) {
2424         addr = memory_region_section_addr(section, addr);
2425         if (memory_region_is_ram(section->mr)) {
2426             section = &phys_sections[phys_section_rom];
2427         }
2428 #if defined(TARGET_WORDS_BIGENDIAN)
2429         if (endian == DEVICE_LITTLE_ENDIAN) {
2430             val = bswap32(val);
2431         }
2432 #else
2433         if (endian == DEVICE_BIG_ENDIAN) {
2434             val = bswap32(val);
2435         }
2436 #endif
2437         io_mem_write(section->mr, addr, val, 4);
2438     } else {
2439         unsigned long addr1;
2440         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2441             + memory_region_section_addr(section, addr);
2442         /* RAM case */
2443         ptr = qemu_get_ram_ptr(addr1);
2444         switch (endian) {
2445         case DEVICE_LITTLE_ENDIAN:
2446             stl_le_p(ptr, val);
2447             break;
2448         case DEVICE_BIG_ENDIAN:
2449             stl_be_p(ptr, val);
2450             break;
2451         default:
2452             stl_p(ptr, val);
2453             break;
2454         }
2455         invalidate_and_set_dirty(addr1, 4);
2456     }
2457 }
2458
2459 void stl_phys(hwaddr addr, uint32_t val)
2460 {
2461     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2462 }
2463
2464 void stl_le_phys(hwaddr addr, uint32_t val)
2465 {
2466     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2467 }
2468
2469 void stl_be_phys(hwaddr addr, uint32_t val)
2470 {
2471     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2472 }
2473
2474 /* XXX: optimize */
2475 void stb_phys(hwaddr addr, uint32_t val)
2476 {
2477     uint8_t v = val;
2478     cpu_physical_memory_write(addr, &v, 1);
2479 }
2480
2481 /* warning: addr must be aligned */
2482 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2483                                      enum device_endian endian)
2484 {
2485     uint8_t *ptr;
2486     MemoryRegionSection *section;
2487
2488     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2489
2490     if (!memory_region_is_ram(section->mr) || section->readonly) {
2491         addr = memory_region_section_addr(section, addr);
2492         if (memory_region_is_ram(section->mr)) {
2493             section = &phys_sections[phys_section_rom];
2494         }
2495 #if defined(TARGET_WORDS_BIGENDIAN)
2496         if (endian == DEVICE_LITTLE_ENDIAN) {
2497             val = bswap16(val);
2498         }
2499 #else
2500         if (endian == DEVICE_BIG_ENDIAN) {
2501             val = bswap16(val);
2502         }
2503 #endif
2504         io_mem_write(section->mr, addr, val, 2);
2505     } else {
2506         unsigned long addr1;
2507         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2508             + memory_region_section_addr(section, addr);
2509         /* RAM case */
2510         ptr = qemu_get_ram_ptr(addr1);
2511         switch (endian) {
2512         case DEVICE_LITTLE_ENDIAN:
2513             stw_le_p(ptr, val);
2514             break;
2515         case DEVICE_BIG_ENDIAN:
2516             stw_be_p(ptr, val);
2517             break;
2518         default:
2519             stw_p(ptr, val);
2520             break;
2521         }
2522         invalidate_and_set_dirty(addr1, 2);
2523     }
2524 }
2525
2526 void stw_phys(hwaddr addr, uint32_t val)
2527 {
2528     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2529 }
2530
2531 void stw_le_phys(hwaddr addr, uint32_t val)
2532 {
2533     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2534 }
2535
2536 void stw_be_phys(hwaddr addr, uint32_t val)
2537 {
2538     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2539 }
2540
2541 /* XXX: optimize */
2542 void stq_phys(hwaddr addr, uint64_t val)
2543 {
2544     val = tswap64(val);
2545     cpu_physical_memory_write(addr, &val, 8);
2546 }
2547
2548 void stq_le_phys(hwaddr addr, uint64_t val)
2549 {
2550     val = cpu_to_le64(val);
2551     cpu_physical_memory_write(addr, &val, 8);
2552 }
2553
2554 void stq_be_phys(hwaddr addr, uint64_t val)
2555 {
2556     val = cpu_to_be64(val);
2557     cpu_physical_memory_write(addr, &val, 8);
2558 }
2559
2560 /* virtual memory access for debug (includes writing to ROM) */
2561 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2562                         uint8_t *buf, int len, int is_write)
2563 {
2564     int l;
2565     hwaddr phys_addr;
2566     target_ulong page;
2567
2568     while (len > 0) {
2569         page = addr & TARGET_PAGE_MASK;
2570         phys_addr = cpu_get_phys_page_debug(env, page);
2571         /* if no physical page mapped, return an error */
2572         if (phys_addr == -1)
2573             return -1;
2574         l = (page + TARGET_PAGE_SIZE) - addr;
2575         if (l > len)
2576             l = len;
2577         phys_addr += (addr & ~TARGET_PAGE_MASK);
2578         if (is_write)
2579             cpu_physical_memory_write_rom(phys_addr, buf, l);
2580         else
2581             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2582         len -= l;
2583         buf += l;
2584         addr += l;
2585     }
2586     return 0;
2587 }
2588 #endif
2589
2590 #if !defined(CONFIG_USER_ONLY)
2591
2592 /*
2593  * A helper function for the _utterly broken_ virtio device model to find out if
2594  * it's running on a big endian machine. Don't do this at home kids!
2595  */
2596 bool virtio_is_big_endian(void);
2597 bool virtio_is_big_endian(void)
2598 {
2599 #if defined(TARGET_WORDS_BIGENDIAN)
2600     return true;
2601 #else
2602     return false;
2603 #endif
2604 }
2605
2606 #endif
2607
2608 #ifndef CONFIG_USER_ONLY
2609 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2610 {
2611     MemoryRegionSection *section;
2612
2613     section = phys_page_find(address_space_memory.dispatch,
2614                              phys_addr >> TARGET_PAGE_BITS);
2615
2616     return !(memory_region_is_ram(section->mr) ||
2617              memory_region_is_romd(section->mr));
2618 }
2619 #endif
This page took 0.162834 seconds and 4 git commands to generate.