]> Git Repo - qemu.git/blob - exec.c
exec: change RAM list to a TAILQ
[qemu.git] / exec.c
1 /*
2  *  Virtual page mapping
3  *
4  *  Copyright (c) 2003 Fabrice Bellard
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 #include "config.h"
20 #ifdef _WIN32
21 #include <windows.h>
22 #else
23 #include <sys/types.h>
24 #include <sys/mman.h>
25 #endif
26
27 #include "qemu-common.h"
28 #include "cpu.h"
29 #include "tcg.h"
30 #include "hw/hw.h"
31 #include "hw/qdev.h"
32 #include "qemu/osdep.h"
33 #include "sysemu/kvm.h"
34 #include "hw/xen.h"
35 #include "qemu/timer.h"
36 #include "qemu/config-file.h"
37 #include "exec/memory.h"
38 #include "sysemu/dma.h"
39 #include "exec/address-spaces.h"
40 #if defined(CONFIG_USER_ONLY)
41 #include <qemu.h>
42 #else /* !CONFIG_USER_ONLY */
43 #include "sysemu/xen-mapcache.h"
44 #include "trace.h"
45 #endif
46 #include "exec/cpu-all.h"
47
48 #include "exec/cputlb.h"
49 #include "translate-all.h"
50
51 #include "exec/memory-internal.h"
52
53 //#define DEBUG_UNASSIGNED
54 //#define DEBUG_SUBPAGE
55
56 #if !defined(CONFIG_USER_ONLY)
57 int phys_ram_fd;
58 static int in_migration;
59
60 RAMList ram_list = { .blocks = QTAILQ_HEAD_INITIALIZER(ram_list.blocks) };
61
62 static MemoryRegion *system_memory;
63 static MemoryRegion *system_io;
64
65 AddressSpace address_space_io;
66 AddressSpace address_space_memory;
67 DMAContext dma_context_memory;
68
69 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
70 static MemoryRegion io_mem_subpage_ram;
71
72 #endif
73
74 CPUArchState *first_cpu;
75 /* current CPU in the current thread. It is only valid inside
76    cpu_exec() */
77 DEFINE_TLS(CPUArchState *,cpu_single_env);
78 /* 0 = Do not count executed instructions.
79    1 = Precise instruction counting.
80    2 = Adaptive rate instruction counting.  */
81 int use_icount = 0;
82
83 #if !defined(CONFIG_USER_ONLY)
84
85 static MemoryRegionSection *phys_sections;
86 static unsigned phys_sections_nb, phys_sections_nb_alloc;
87 static uint16_t phys_section_unassigned;
88 static uint16_t phys_section_notdirty;
89 static uint16_t phys_section_rom;
90 static uint16_t phys_section_watch;
91
92 /* Simple allocator for PhysPageEntry nodes */
93 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
94 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
95
96 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
97
98 static void io_mem_init(void);
99 static void memory_map_init(void);
100 static void *qemu_safe_ram_ptr(ram_addr_t addr);
101
102 static MemoryRegion io_mem_watch;
103 #endif
104
105 #if !defined(CONFIG_USER_ONLY)
106
107 static void phys_map_node_reserve(unsigned nodes)
108 {
109     if (phys_map_nodes_nb + nodes > phys_map_nodes_nb_alloc) {
110         typedef PhysPageEntry Node[L2_SIZE];
111         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc * 2, 16);
112         phys_map_nodes_nb_alloc = MAX(phys_map_nodes_nb_alloc,
113                                       phys_map_nodes_nb + nodes);
114         phys_map_nodes = g_renew(Node, phys_map_nodes,
115                                  phys_map_nodes_nb_alloc);
116     }
117 }
118
119 static uint16_t phys_map_node_alloc(void)
120 {
121     unsigned i;
122     uint16_t ret;
123
124     ret = phys_map_nodes_nb++;
125     assert(ret != PHYS_MAP_NODE_NIL);
126     assert(ret != phys_map_nodes_nb_alloc);
127     for (i = 0; i < L2_SIZE; ++i) {
128         phys_map_nodes[ret][i].is_leaf = 0;
129         phys_map_nodes[ret][i].ptr = PHYS_MAP_NODE_NIL;
130     }
131     return ret;
132 }
133
134 static void phys_map_nodes_reset(void)
135 {
136     phys_map_nodes_nb = 0;
137 }
138
139
140 static void phys_page_set_level(PhysPageEntry *lp, hwaddr *index,
141                                 hwaddr *nb, uint16_t leaf,
142                                 int level)
143 {
144     PhysPageEntry *p;
145     int i;
146     hwaddr step = (hwaddr)1 << (level * L2_BITS);
147
148     if (!lp->is_leaf && lp->ptr == PHYS_MAP_NODE_NIL) {
149         lp->ptr = phys_map_node_alloc();
150         p = phys_map_nodes[lp->ptr];
151         if (level == 0) {
152             for (i = 0; i < L2_SIZE; i++) {
153                 p[i].is_leaf = 1;
154                 p[i].ptr = phys_section_unassigned;
155             }
156         }
157     } else {
158         p = phys_map_nodes[lp->ptr];
159     }
160     lp = &p[(*index >> (level * L2_BITS)) & (L2_SIZE - 1)];
161
162     while (*nb && lp < &p[L2_SIZE]) {
163         if ((*index & (step - 1)) == 0 && *nb >= step) {
164             lp->is_leaf = true;
165             lp->ptr = leaf;
166             *index += step;
167             *nb -= step;
168         } else {
169             phys_page_set_level(lp, index, nb, leaf, level - 1);
170         }
171         ++lp;
172     }
173 }
174
175 static void phys_page_set(AddressSpaceDispatch *d,
176                           hwaddr index, hwaddr nb,
177                           uint16_t leaf)
178 {
179     /* Wildly overreserve - it doesn't matter much. */
180     phys_map_node_reserve(3 * P_L2_LEVELS);
181
182     phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
183 }
184
185 MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index)
186 {
187     PhysPageEntry lp = d->phys_map;
188     PhysPageEntry *p;
189     int i;
190     uint16_t s_index = phys_section_unassigned;
191
192     for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) {
193         if (lp.ptr == PHYS_MAP_NODE_NIL) {
194             goto not_found;
195         }
196         p = phys_map_nodes[lp.ptr];
197         lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)];
198     }
199
200     s_index = lp.ptr;
201 not_found:
202     return &phys_sections[s_index];
203 }
204
205 bool memory_region_is_unassigned(MemoryRegion *mr)
206 {
207     return mr != &io_mem_ram && mr != &io_mem_rom
208         && mr != &io_mem_notdirty && !mr->rom_device
209         && mr != &io_mem_watch;
210 }
211 #endif
212
213 void cpu_exec_init_all(void)
214 {
215 #if !defined(CONFIG_USER_ONLY)
216     memory_map_init();
217     io_mem_init();
218 #endif
219 }
220
221 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
222
223 static int cpu_common_post_load(void *opaque, int version_id)
224 {
225     CPUArchState *env = opaque;
226
227     /* 0x01 was CPU_INTERRUPT_EXIT. This line can be removed when the
228        version_id is increased. */
229     env->interrupt_request &= ~0x01;
230     tlb_flush(env, 1);
231
232     return 0;
233 }
234
235 static const VMStateDescription vmstate_cpu_common = {
236     .name = "cpu_common",
237     .version_id = 1,
238     .minimum_version_id = 1,
239     .minimum_version_id_old = 1,
240     .post_load = cpu_common_post_load,
241     .fields      = (VMStateField []) {
242         VMSTATE_UINT32(halted, CPUArchState),
243         VMSTATE_UINT32(interrupt_request, CPUArchState),
244         VMSTATE_END_OF_LIST()
245     }
246 };
247 #endif
248
249 CPUArchState *qemu_get_cpu(int cpu)
250 {
251     CPUArchState *env = first_cpu;
252
253     while (env) {
254         if (env->cpu_index == cpu)
255             break;
256         env = env->next_cpu;
257     }
258
259     return env;
260 }
261
262 void cpu_exec_init(CPUArchState *env)
263 {
264 #ifndef CONFIG_USER_ONLY
265     CPUState *cpu = ENV_GET_CPU(env);
266 #endif
267     CPUArchState **penv;
268     int cpu_index;
269
270 #if defined(CONFIG_USER_ONLY)
271     cpu_list_lock();
272 #endif
273     env->next_cpu = NULL;
274     penv = &first_cpu;
275     cpu_index = 0;
276     while (*penv != NULL) {
277         penv = &(*penv)->next_cpu;
278         cpu_index++;
279     }
280     env->cpu_index = cpu_index;
281     env->numa_node = 0;
282     QTAILQ_INIT(&env->breakpoints);
283     QTAILQ_INIT(&env->watchpoints);
284 #ifndef CONFIG_USER_ONLY
285     cpu->thread_id = qemu_get_thread_id();
286 #endif
287     *penv = env;
288 #if defined(CONFIG_USER_ONLY)
289     cpu_list_unlock();
290 #endif
291 #if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
292     vmstate_register(NULL, cpu_index, &vmstate_cpu_common, env);
293     register_savevm(NULL, "cpu", cpu_index, CPU_SAVE_VERSION,
294                     cpu_save, cpu_load, env);
295 #endif
296 }
297
298 #if defined(TARGET_HAS_ICE)
299 #if defined(CONFIG_USER_ONLY)
300 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
301 {
302     tb_invalidate_phys_page_range(pc, pc + 1, 0);
303 }
304 #else
305 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
306 {
307     tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
308             (pc & ~TARGET_PAGE_MASK));
309 }
310 #endif
311 #endif /* TARGET_HAS_ICE */
312
313 #if defined(CONFIG_USER_ONLY)
314 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
315
316 {
317 }
318
319 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
320                           int flags, CPUWatchpoint **watchpoint)
321 {
322     return -ENOSYS;
323 }
324 #else
325 /* Add a watchpoint.  */
326 int cpu_watchpoint_insert(CPUArchState *env, target_ulong addr, target_ulong len,
327                           int flags, CPUWatchpoint **watchpoint)
328 {
329     target_ulong len_mask = ~(len - 1);
330     CPUWatchpoint *wp;
331
332     /* sanity checks: allow power-of-2 lengths, deny unaligned watchpoints */
333     if ((len & (len - 1)) || (addr & ~len_mask) ||
334             len == 0 || len > TARGET_PAGE_SIZE) {
335         fprintf(stderr, "qemu: tried to set invalid watchpoint at "
336                 TARGET_FMT_lx ", len=" TARGET_FMT_lu "\n", addr, len);
337         return -EINVAL;
338     }
339     wp = g_malloc(sizeof(*wp));
340
341     wp->vaddr = addr;
342     wp->len_mask = len_mask;
343     wp->flags = flags;
344
345     /* keep all GDB-injected watchpoints in front */
346     if (flags & BP_GDB)
347         QTAILQ_INSERT_HEAD(&env->watchpoints, wp, entry);
348     else
349         QTAILQ_INSERT_TAIL(&env->watchpoints, wp, entry);
350
351     tlb_flush_page(env, addr);
352
353     if (watchpoint)
354         *watchpoint = wp;
355     return 0;
356 }
357
358 /* Remove a specific watchpoint.  */
359 int cpu_watchpoint_remove(CPUArchState *env, target_ulong addr, target_ulong len,
360                           int flags)
361 {
362     target_ulong len_mask = ~(len - 1);
363     CPUWatchpoint *wp;
364
365     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
366         if (addr == wp->vaddr && len_mask == wp->len_mask
367                 && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
368             cpu_watchpoint_remove_by_ref(env, wp);
369             return 0;
370         }
371     }
372     return -ENOENT;
373 }
374
375 /* Remove a specific watchpoint by reference.  */
376 void cpu_watchpoint_remove_by_ref(CPUArchState *env, CPUWatchpoint *watchpoint)
377 {
378     QTAILQ_REMOVE(&env->watchpoints, watchpoint, entry);
379
380     tlb_flush_page(env, watchpoint->vaddr);
381
382     g_free(watchpoint);
383 }
384
385 /* Remove all matching watchpoints.  */
386 void cpu_watchpoint_remove_all(CPUArchState *env, int mask)
387 {
388     CPUWatchpoint *wp, *next;
389
390     QTAILQ_FOREACH_SAFE(wp, &env->watchpoints, entry, next) {
391         if (wp->flags & mask)
392             cpu_watchpoint_remove_by_ref(env, wp);
393     }
394 }
395 #endif
396
397 /* Add a breakpoint.  */
398 int cpu_breakpoint_insert(CPUArchState *env, target_ulong pc, int flags,
399                           CPUBreakpoint **breakpoint)
400 {
401 #if defined(TARGET_HAS_ICE)
402     CPUBreakpoint *bp;
403
404     bp = g_malloc(sizeof(*bp));
405
406     bp->pc = pc;
407     bp->flags = flags;
408
409     /* keep all GDB-injected breakpoints in front */
410     if (flags & BP_GDB)
411         QTAILQ_INSERT_HEAD(&env->breakpoints, bp, entry);
412     else
413         QTAILQ_INSERT_TAIL(&env->breakpoints, bp, entry);
414
415     breakpoint_invalidate(env, pc);
416
417     if (breakpoint)
418         *breakpoint = bp;
419     return 0;
420 #else
421     return -ENOSYS;
422 #endif
423 }
424
425 /* Remove a specific breakpoint.  */
426 int cpu_breakpoint_remove(CPUArchState *env, target_ulong pc, int flags)
427 {
428 #if defined(TARGET_HAS_ICE)
429     CPUBreakpoint *bp;
430
431     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
432         if (bp->pc == pc && bp->flags == flags) {
433             cpu_breakpoint_remove_by_ref(env, bp);
434             return 0;
435         }
436     }
437     return -ENOENT;
438 #else
439     return -ENOSYS;
440 #endif
441 }
442
443 /* Remove a specific breakpoint by reference.  */
444 void cpu_breakpoint_remove_by_ref(CPUArchState *env, CPUBreakpoint *breakpoint)
445 {
446 #if defined(TARGET_HAS_ICE)
447     QTAILQ_REMOVE(&env->breakpoints, breakpoint, entry);
448
449     breakpoint_invalidate(env, breakpoint->pc);
450
451     g_free(breakpoint);
452 #endif
453 }
454
455 /* Remove all matching breakpoints. */
456 void cpu_breakpoint_remove_all(CPUArchState *env, int mask)
457 {
458 #if defined(TARGET_HAS_ICE)
459     CPUBreakpoint *bp, *next;
460
461     QTAILQ_FOREACH_SAFE(bp, &env->breakpoints, entry, next) {
462         if (bp->flags & mask)
463             cpu_breakpoint_remove_by_ref(env, bp);
464     }
465 #endif
466 }
467
468 /* enable or disable single step mode. EXCP_DEBUG is returned by the
469    CPU loop after each instruction */
470 void cpu_single_step(CPUArchState *env, int enabled)
471 {
472 #if defined(TARGET_HAS_ICE)
473     if (env->singlestep_enabled != enabled) {
474         env->singlestep_enabled = enabled;
475         if (kvm_enabled())
476             kvm_update_guest_debug(env, 0);
477         else {
478             /* must flush all the translated code to avoid inconsistencies */
479             /* XXX: only flush what is necessary */
480             tb_flush(env);
481         }
482     }
483 #endif
484 }
485
486 void cpu_reset_interrupt(CPUArchState *env, int mask)
487 {
488     env->interrupt_request &= ~mask;
489 }
490
491 void cpu_exit(CPUArchState *env)
492 {
493     env->exit_request = 1;
494     cpu_unlink_tb(env);
495 }
496
497 void cpu_abort(CPUArchState *env, const char *fmt, ...)
498 {
499     va_list ap;
500     va_list ap2;
501
502     va_start(ap, fmt);
503     va_copy(ap2, ap);
504     fprintf(stderr, "qemu: fatal: ");
505     vfprintf(stderr, fmt, ap);
506     fprintf(stderr, "\n");
507     cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
508     if (qemu_log_enabled()) {
509         qemu_log("qemu: fatal: ");
510         qemu_log_vprintf(fmt, ap2);
511         qemu_log("\n");
512         log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
513         qemu_log_flush();
514         qemu_log_close();
515     }
516     va_end(ap2);
517     va_end(ap);
518 #if defined(CONFIG_USER_ONLY)
519     {
520         struct sigaction act;
521         sigfillset(&act.sa_mask);
522         act.sa_handler = SIG_DFL;
523         sigaction(SIGABRT, &act, NULL);
524     }
525 #endif
526     abort();
527 }
528
529 CPUArchState *cpu_copy(CPUArchState *env)
530 {
531     CPUArchState *new_env = cpu_init(env->cpu_model_str);
532     CPUArchState *next_cpu = new_env->next_cpu;
533     int cpu_index = new_env->cpu_index;
534 #if defined(TARGET_HAS_ICE)
535     CPUBreakpoint *bp;
536     CPUWatchpoint *wp;
537 #endif
538
539     memcpy(new_env, env, sizeof(CPUArchState));
540
541     /* Preserve chaining and index. */
542     new_env->next_cpu = next_cpu;
543     new_env->cpu_index = cpu_index;
544
545     /* Clone all break/watchpoints.
546        Note: Once we support ptrace with hw-debug register access, make sure
547        BP_CPU break/watchpoints are handled correctly on clone. */
548     QTAILQ_INIT(&env->breakpoints);
549     QTAILQ_INIT(&env->watchpoints);
550 #if defined(TARGET_HAS_ICE)
551     QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
552         cpu_breakpoint_insert(new_env, bp->pc, bp->flags, NULL);
553     }
554     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
555         cpu_watchpoint_insert(new_env, wp->vaddr, (~wp->len_mask) + 1,
556                               wp->flags, NULL);
557     }
558 #endif
559
560     return new_env;
561 }
562
563 #if !defined(CONFIG_USER_ONLY)
564 static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t end,
565                                       uintptr_t length)
566 {
567     uintptr_t start1;
568
569     /* we modify the TLB cache so that the dirty bit will be set again
570        when accessing the range */
571     start1 = (uintptr_t)qemu_safe_ram_ptr(start);
572     /* Check that we don't span multiple blocks - this breaks the
573        address comparisons below.  */
574     if ((uintptr_t)qemu_safe_ram_ptr(end - 1) - start1
575             != (end - 1) - start) {
576         abort();
577     }
578     cpu_tlb_reset_dirty_all(start1, length);
579
580 }
581
582 /* Note: start and end must be within the same ram block.  */
583 void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
584                                      int dirty_flags)
585 {
586     uintptr_t length;
587
588     start &= TARGET_PAGE_MASK;
589     end = TARGET_PAGE_ALIGN(end);
590
591     length = end - start;
592     if (length == 0)
593         return;
594     cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
595
596     if (tcg_enabled()) {
597         tlb_reset_dirty_range_all(start, end, length);
598     }
599 }
600
601 static int cpu_physical_memory_set_dirty_tracking(int enable)
602 {
603     int ret = 0;
604     in_migration = enable;
605     return ret;
606 }
607
608 hwaddr memory_region_section_get_iotlb(CPUArchState *env,
609                                                    MemoryRegionSection *section,
610                                                    target_ulong vaddr,
611                                                    hwaddr paddr,
612                                                    int prot,
613                                                    target_ulong *address)
614 {
615     hwaddr iotlb;
616     CPUWatchpoint *wp;
617
618     if (memory_region_is_ram(section->mr)) {
619         /* Normal RAM.  */
620         iotlb = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
621             + memory_region_section_addr(section, paddr);
622         if (!section->readonly) {
623             iotlb |= phys_section_notdirty;
624         } else {
625             iotlb |= phys_section_rom;
626         }
627     } else {
628         /* IO handlers are currently passed a physical address.
629            It would be nice to pass an offset from the base address
630            of that region.  This would avoid having to special case RAM,
631            and avoid full address decoding in every device.
632            We can't use the high bits of pd for this because
633            IO_MEM_ROMD uses these as a ram address.  */
634         iotlb = section - phys_sections;
635         iotlb += memory_region_section_addr(section, paddr);
636     }
637
638     /* Make accesses to pages with watchpoints go via the
639        watchpoint trap routines.  */
640     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
641         if (vaddr == (wp->vaddr & TARGET_PAGE_MASK)) {
642             /* Avoid trapping reads of pages with a write breakpoint. */
643             if ((prot & PAGE_WRITE) || (wp->flags & BP_MEM_READ)) {
644                 iotlb = phys_section_watch + paddr;
645                 *address |= TLB_MMIO;
646                 break;
647             }
648         }
649     }
650
651     return iotlb;
652 }
653 #endif /* defined(CONFIG_USER_ONLY) */
654
655 #if !defined(CONFIG_USER_ONLY)
656
657 #define SUBPAGE_IDX(addr) ((addr) & ~TARGET_PAGE_MASK)
658 typedef struct subpage_t {
659     MemoryRegion iomem;
660     hwaddr base;
661     uint16_t sub_section[TARGET_PAGE_SIZE];
662 } subpage_t;
663
664 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
665                              uint16_t section);
666 static subpage_t *subpage_init(hwaddr base);
667 static void destroy_page_desc(uint16_t section_index)
668 {
669     MemoryRegionSection *section = &phys_sections[section_index];
670     MemoryRegion *mr = section->mr;
671
672     if (mr->subpage) {
673         subpage_t *subpage = container_of(mr, subpage_t, iomem);
674         memory_region_destroy(&subpage->iomem);
675         g_free(subpage);
676     }
677 }
678
679 static void destroy_l2_mapping(PhysPageEntry *lp, unsigned level)
680 {
681     unsigned i;
682     PhysPageEntry *p;
683
684     if (lp->ptr == PHYS_MAP_NODE_NIL) {
685         return;
686     }
687
688     p = phys_map_nodes[lp->ptr];
689     for (i = 0; i < L2_SIZE; ++i) {
690         if (!p[i].is_leaf) {
691             destroy_l2_mapping(&p[i], level - 1);
692         } else {
693             destroy_page_desc(p[i].ptr);
694         }
695     }
696     lp->is_leaf = 0;
697     lp->ptr = PHYS_MAP_NODE_NIL;
698 }
699
700 static void destroy_all_mappings(AddressSpaceDispatch *d)
701 {
702     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
703     phys_map_nodes_reset();
704 }
705
706 static uint16_t phys_section_add(MemoryRegionSection *section)
707 {
708     if (phys_sections_nb == phys_sections_nb_alloc) {
709         phys_sections_nb_alloc = MAX(phys_sections_nb_alloc * 2, 16);
710         phys_sections = g_renew(MemoryRegionSection, phys_sections,
711                                 phys_sections_nb_alloc);
712     }
713     phys_sections[phys_sections_nb] = *section;
714     return phys_sections_nb++;
715 }
716
717 static void phys_sections_clear(void)
718 {
719     phys_sections_nb = 0;
720 }
721
722 static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
723 {
724     subpage_t *subpage;
725     hwaddr base = section->offset_within_address_space
726         & TARGET_PAGE_MASK;
727     MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
728     MemoryRegionSection subsection = {
729         .offset_within_address_space = base,
730         .size = TARGET_PAGE_SIZE,
731     };
732     hwaddr start, end;
733
734     assert(existing->mr->subpage || existing->mr == &io_mem_unassigned);
735
736     if (!(existing->mr->subpage)) {
737         subpage = subpage_init(base);
738         subsection.mr = &subpage->iomem;
739         phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
740                       phys_section_add(&subsection));
741     } else {
742         subpage = container_of(existing->mr, subpage_t, iomem);
743     }
744     start = section->offset_within_address_space & ~TARGET_PAGE_MASK;
745     end = start + section->size - 1;
746     subpage_register(subpage, start, end, phys_section_add(section));
747 }
748
749
750 static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
751 {
752     hwaddr start_addr = section->offset_within_address_space;
753     ram_addr_t size = section->size;
754     hwaddr addr;
755     uint16_t section_index = phys_section_add(section);
756
757     assert(size);
758
759     addr = start_addr;
760     phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
761                   section_index);
762 }
763
764 static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
765 {
766     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
767     MemoryRegionSection now = *section, remain = *section;
768
769     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
770         || (now.size < TARGET_PAGE_SIZE)) {
771         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
772                        - now.offset_within_address_space,
773                        now.size);
774         register_subpage(d, &now);
775         remain.size -= now.size;
776         remain.offset_within_address_space += now.size;
777         remain.offset_within_region += now.size;
778     }
779     while (remain.size >= TARGET_PAGE_SIZE) {
780         now = remain;
781         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
782             now.size = TARGET_PAGE_SIZE;
783             register_subpage(d, &now);
784         } else {
785             now.size &= TARGET_PAGE_MASK;
786             register_multipage(d, &now);
787         }
788         remain.size -= now.size;
789         remain.offset_within_address_space += now.size;
790         remain.offset_within_region += now.size;
791     }
792     now = remain;
793     if (now.size) {
794         register_subpage(d, &now);
795     }
796 }
797
798 void qemu_flush_coalesced_mmio_buffer(void)
799 {
800     if (kvm_enabled())
801         kvm_flush_coalesced_mmio_buffer();
802 }
803
804 #if defined(__linux__) && !defined(TARGET_S390X)
805
806 #include <sys/vfs.h>
807
808 #define HUGETLBFS_MAGIC       0x958458f6
809
810 static long gethugepagesize(const char *path)
811 {
812     struct statfs fs;
813     int ret;
814
815     do {
816         ret = statfs(path, &fs);
817     } while (ret != 0 && errno == EINTR);
818
819     if (ret != 0) {
820         perror(path);
821         return 0;
822     }
823
824     if (fs.f_type != HUGETLBFS_MAGIC)
825         fprintf(stderr, "Warning: path not on HugeTLBFS: %s\n", path);
826
827     return fs.f_bsize;
828 }
829
830 static void *file_ram_alloc(RAMBlock *block,
831                             ram_addr_t memory,
832                             const char *path)
833 {
834     char *filename;
835     void *area;
836     int fd;
837 #ifdef MAP_POPULATE
838     int flags;
839 #endif
840     unsigned long hpagesize;
841
842     hpagesize = gethugepagesize(path);
843     if (!hpagesize) {
844         return NULL;
845     }
846
847     if (memory < hpagesize) {
848         return NULL;
849     }
850
851     if (kvm_enabled() && !kvm_has_sync_mmu()) {
852         fprintf(stderr, "host lacks kvm mmu notifiers, -mem-path unsupported\n");
853         return NULL;
854     }
855
856     if (asprintf(&filename, "%s/qemu_back_mem.XXXXXX", path) == -1) {
857         return NULL;
858     }
859
860     fd = mkstemp(filename);
861     if (fd < 0) {
862         perror("unable to create backing store for hugepages");
863         free(filename);
864         return NULL;
865     }
866     unlink(filename);
867     free(filename);
868
869     memory = (memory+hpagesize-1) & ~(hpagesize-1);
870
871     /*
872      * ftruncate is not supported by hugetlbfs in older
873      * hosts, so don't bother bailing out on errors.
874      * If anything goes wrong with it under other filesystems,
875      * mmap will fail.
876      */
877     if (ftruncate(fd, memory))
878         perror("ftruncate");
879
880 #ifdef MAP_POPULATE
881     /* NB: MAP_POPULATE won't exhaustively alloc all phys pages in the case
882      * MAP_PRIVATE is requested.  For mem_prealloc we mmap as MAP_SHARED
883      * to sidestep this quirk.
884      */
885     flags = mem_prealloc ? MAP_POPULATE | MAP_SHARED : MAP_PRIVATE;
886     area = mmap(0, memory, PROT_READ | PROT_WRITE, flags, fd, 0);
887 #else
888     area = mmap(0, memory, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
889 #endif
890     if (area == MAP_FAILED) {
891         perror("file_ram_alloc: can't mmap RAM pages");
892         close(fd);
893         return (NULL);
894     }
895     block->fd = fd;
896     return area;
897 }
898 #endif
899
900 static ram_addr_t find_ram_offset(ram_addr_t size)
901 {
902     RAMBlock *block, *next_block;
903     ram_addr_t offset = RAM_ADDR_MAX, mingap = RAM_ADDR_MAX;
904
905     if (QTAILQ_EMPTY(&ram_list.blocks))
906         return 0;
907
908     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
909         ram_addr_t end, next = RAM_ADDR_MAX;
910
911         end = block->offset + block->length;
912
913         QTAILQ_FOREACH(next_block, &ram_list.blocks, next) {
914             if (next_block->offset >= end) {
915                 next = MIN(next, next_block->offset);
916             }
917         }
918         if (next - end >= size && next - end < mingap) {
919             offset = end;
920             mingap = next - end;
921         }
922     }
923
924     if (offset == RAM_ADDR_MAX) {
925         fprintf(stderr, "Failed to find gap of requested size: %" PRIu64 "\n",
926                 (uint64_t)size);
927         abort();
928     }
929
930     return offset;
931 }
932
933 ram_addr_t last_ram_offset(void)
934 {
935     RAMBlock *block;
936     ram_addr_t last = 0;
937
938     QTAILQ_FOREACH(block, &ram_list.blocks, next)
939         last = MAX(last, block->offset + block->length);
940
941     return last;
942 }
943
944 static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
945 {
946     int ret;
947     QemuOpts *machine_opts;
948
949     /* Use MADV_DONTDUMP, if user doesn't want the guest memory in the core */
950     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
951     if (machine_opts &&
952         !qemu_opt_get_bool(machine_opts, "dump-guest-core", true)) {
953         ret = qemu_madvise(addr, size, QEMU_MADV_DONTDUMP);
954         if (ret) {
955             perror("qemu_madvise");
956             fprintf(stderr, "madvise doesn't support MADV_DONTDUMP, "
957                             "but dump_guest_core=off specified\n");
958         }
959     }
960 }
961
962 void qemu_ram_set_idstr(ram_addr_t addr, const char *name, DeviceState *dev)
963 {
964     RAMBlock *new_block, *block;
965
966     new_block = NULL;
967     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
968         if (block->offset == addr) {
969             new_block = block;
970             break;
971         }
972     }
973     assert(new_block);
974     assert(!new_block->idstr[0]);
975
976     if (dev) {
977         char *id = qdev_get_dev_path(dev);
978         if (id) {
979             snprintf(new_block->idstr, sizeof(new_block->idstr), "%s/", id);
980             g_free(id);
981         }
982     }
983     pstrcat(new_block->idstr, sizeof(new_block->idstr), name);
984
985     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
986         if (block != new_block && !strcmp(block->idstr, new_block->idstr)) {
987             fprintf(stderr, "RAMBlock \"%s\" already registered, abort!\n",
988                     new_block->idstr);
989             abort();
990         }
991     }
992 }
993
994 static int memory_try_enable_merging(void *addr, size_t len)
995 {
996     QemuOpts *opts;
997
998     opts = qemu_opts_find(qemu_find_opts("machine"), 0);
999     if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
1000         /* disabled by the user */
1001         return 0;
1002     }
1003
1004     return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
1005 }
1006
1007 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
1008                                    MemoryRegion *mr)
1009 {
1010     RAMBlock *new_block;
1011
1012     size = TARGET_PAGE_ALIGN(size);
1013     new_block = g_malloc0(sizeof(*new_block));
1014
1015     new_block->mr = mr;
1016     new_block->offset = find_ram_offset(size);
1017     if (host) {
1018         new_block->host = host;
1019         new_block->flags |= RAM_PREALLOC_MASK;
1020     } else {
1021         if (mem_path) {
1022 #if defined (__linux__) && !defined(TARGET_S390X)
1023             new_block->host = file_ram_alloc(new_block, size, mem_path);
1024             if (!new_block->host) {
1025                 new_block->host = qemu_vmalloc(size);
1026                 memory_try_enable_merging(new_block->host, size);
1027             }
1028 #else
1029             fprintf(stderr, "-mem-path option unsupported\n");
1030             exit(1);
1031 #endif
1032         } else {
1033             if (xen_enabled()) {
1034                 xen_ram_alloc(new_block->offset, size, mr);
1035             } else if (kvm_enabled()) {
1036                 /* some s390/kvm configurations have special constraints */
1037                 new_block->host = kvm_vmalloc(size);
1038             } else {
1039                 new_block->host = qemu_vmalloc(size);
1040             }
1041             memory_try_enable_merging(new_block->host, size);
1042         }
1043     }
1044     new_block->length = size;
1045
1046     QTAILQ_INSERT_HEAD(&ram_list.blocks, new_block, next);
1047     ram_list.mru_block = NULL;
1048
1049     ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
1050                                        last_ram_offset() >> TARGET_PAGE_BITS);
1051     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
1052            0, size >> TARGET_PAGE_BITS);
1053     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
1054
1055     qemu_ram_setup_dump(new_block->host, size);
1056     qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
1057
1058     if (kvm_enabled())
1059         kvm_setup_guest_memory(new_block->host, size);
1060
1061     return new_block->offset;
1062 }
1063
1064 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr)
1065 {
1066     return qemu_ram_alloc_from_ptr(size, NULL, mr);
1067 }
1068
1069 void qemu_ram_free_from_ptr(ram_addr_t addr)
1070 {
1071     RAMBlock *block;
1072
1073     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1074         if (addr == block->offset) {
1075             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1076             ram_list.mru_block = NULL;
1077             g_free(block);
1078             return;
1079         }
1080     }
1081 }
1082
1083 void qemu_ram_free(ram_addr_t addr)
1084 {
1085     RAMBlock *block;
1086
1087     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1088         if (addr == block->offset) {
1089             QTAILQ_REMOVE(&ram_list.blocks, block, next);
1090             ram_list.mru_block = NULL;
1091             if (block->flags & RAM_PREALLOC_MASK) {
1092                 ;
1093             } else if (mem_path) {
1094 #if defined (__linux__) && !defined(TARGET_S390X)
1095                 if (block->fd) {
1096                     munmap(block->host, block->length);
1097                     close(block->fd);
1098                 } else {
1099                     qemu_vfree(block->host);
1100                 }
1101 #else
1102                 abort();
1103 #endif
1104             } else {
1105 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1106                 munmap(block->host, block->length);
1107 #else
1108                 if (xen_enabled()) {
1109                     xen_invalidate_map_cache_entry(block->host);
1110                 } else {
1111                     qemu_vfree(block->host);
1112                 }
1113 #endif
1114             }
1115             g_free(block);
1116             return;
1117         }
1118     }
1119
1120 }
1121
1122 #ifndef _WIN32
1123 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length)
1124 {
1125     RAMBlock *block;
1126     ram_addr_t offset;
1127     int flags;
1128     void *area, *vaddr;
1129
1130     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1131         offset = addr - block->offset;
1132         if (offset < block->length) {
1133             vaddr = block->host + offset;
1134             if (block->flags & RAM_PREALLOC_MASK) {
1135                 ;
1136             } else {
1137                 flags = MAP_FIXED;
1138                 munmap(vaddr, length);
1139                 if (mem_path) {
1140 #if defined(__linux__) && !defined(TARGET_S390X)
1141                     if (block->fd) {
1142 #ifdef MAP_POPULATE
1143                         flags |= mem_prealloc ? MAP_POPULATE | MAP_SHARED :
1144                             MAP_PRIVATE;
1145 #else
1146                         flags |= MAP_PRIVATE;
1147 #endif
1148                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1149                                     flags, block->fd, offset);
1150                     } else {
1151                         flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1152                         area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1153                                     flags, -1, 0);
1154                     }
1155 #else
1156                     abort();
1157 #endif
1158                 } else {
1159 #if defined(TARGET_S390X) && defined(CONFIG_KVM)
1160                     flags |= MAP_SHARED | MAP_ANONYMOUS;
1161                     area = mmap(vaddr, length, PROT_EXEC|PROT_READ|PROT_WRITE,
1162                                 flags, -1, 0);
1163 #else
1164                     flags |= MAP_PRIVATE | MAP_ANONYMOUS;
1165                     area = mmap(vaddr, length, PROT_READ | PROT_WRITE,
1166                                 flags, -1, 0);
1167 #endif
1168                 }
1169                 if (area != vaddr) {
1170                     fprintf(stderr, "Could not remap addr: "
1171                             RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
1172                             length, addr);
1173                     exit(1);
1174                 }
1175                 memory_try_enable_merging(vaddr, length);
1176                 qemu_ram_setup_dump(vaddr, length);
1177             }
1178             return;
1179         }
1180     }
1181 }
1182 #endif /* !_WIN32 */
1183
1184 /* Return a host pointer to ram allocated with qemu_ram_alloc.
1185    With the exception of the softmmu code in this file, this should
1186    only be used for local memory (e.g. video ram) that the device owns,
1187    and knows it isn't going to access beyond the end of the block.
1188
1189    It should not be used for general purpose DMA.
1190    Use cpu_physical_memory_map/cpu_physical_memory_rw instead.
1191  */
1192 void *qemu_get_ram_ptr(ram_addr_t addr)
1193 {
1194     RAMBlock *block;
1195
1196     block = ram_list.mru_block;
1197     if (block && addr - block->offset < block->length) {
1198         goto found;
1199     }
1200     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1201         if (addr - block->offset < block->length) {
1202             goto found;
1203         }
1204     }
1205
1206     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1207     abort();
1208
1209 found:
1210     ram_list.mru_block = block;
1211     if (xen_enabled()) {
1212         /* We need to check if the requested address is in the RAM
1213          * because we don't want to map the entire memory in QEMU.
1214          * In that case just map until the end of the page.
1215          */
1216         if (block->offset == 0) {
1217             return xen_map_cache(addr, 0, 0);
1218         } else if (block->host == NULL) {
1219             block->host =
1220                 xen_map_cache(block->offset, block->length, 1);
1221         }
1222     }
1223     return block->host + (addr - block->offset);
1224 }
1225
1226 /* Return a host pointer to ram allocated with qemu_ram_alloc.  Same as
1227  * qemu_get_ram_ptr but do not touch ram_list.mru_block.
1228  *
1229  * ??? Is this still necessary?
1230  */
1231 static void *qemu_safe_ram_ptr(ram_addr_t addr)
1232 {
1233     RAMBlock *block;
1234
1235     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1236         if (addr - block->offset < block->length) {
1237             if (xen_enabled()) {
1238                 /* We need to check if the requested address is in the RAM
1239                  * because we don't want to map the entire memory in QEMU.
1240                  * In that case just map until the end of the page.
1241                  */
1242                 if (block->offset == 0) {
1243                     return xen_map_cache(addr, 0, 0);
1244                 } else if (block->host == NULL) {
1245                     block->host =
1246                         xen_map_cache(block->offset, block->length, 1);
1247                 }
1248             }
1249             return block->host + (addr - block->offset);
1250         }
1251     }
1252
1253     fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1254     abort();
1255
1256     return NULL;
1257 }
1258
1259 /* Return a host pointer to guest's ram. Similar to qemu_get_ram_ptr
1260  * but takes a size argument */
1261 static void *qemu_ram_ptr_length(ram_addr_t addr, ram_addr_t *size)
1262 {
1263     if (*size == 0) {
1264         return NULL;
1265     }
1266     if (xen_enabled()) {
1267         return xen_map_cache(addr, *size, 1);
1268     } else {
1269         RAMBlock *block;
1270
1271         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1272             if (addr - block->offset < block->length) {
1273                 if (addr - block->offset + *size > block->length)
1274                     *size = block->length - addr + block->offset;
1275                 return block->host + (addr - block->offset);
1276             }
1277         }
1278
1279         fprintf(stderr, "Bad ram offset %" PRIx64 "\n", (uint64_t)addr);
1280         abort();
1281     }
1282 }
1283
1284 void qemu_put_ram_ptr(void *addr)
1285 {
1286     trace_qemu_put_ram_ptr(addr);
1287 }
1288
1289 int qemu_ram_addr_from_host(void *ptr, ram_addr_t *ram_addr)
1290 {
1291     RAMBlock *block;
1292     uint8_t *host = ptr;
1293
1294     if (xen_enabled()) {
1295         *ram_addr = xen_ram_addr_from_mapcache(ptr);
1296         return 0;
1297     }
1298
1299     QTAILQ_FOREACH(block, &ram_list.blocks, next) {
1300         /* This case append when the block is not mapped. */
1301         if (block->host == NULL) {
1302             continue;
1303         }
1304         if (host - block->host < block->length) {
1305             *ram_addr = block->offset + (host - block->host);
1306             return 0;
1307         }
1308     }
1309
1310     return -1;
1311 }
1312
1313 /* Some of the softmmu routines need to translate from a host pointer
1314    (typically a TLB entry) back to a ram offset.  */
1315 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
1316 {
1317     ram_addr_t ram_addr;
1318
1319     if (qemu_ram_addr_from_host(ptr, &ram_addr)) {
1320         fprintf(stderr, "Bad ram pointer %p\n", ptr);
1321         abort();
1322     }
1323     return ram_addr;
1324 }
1325
1326 static uint64_t unassigned_mem_read(void *opaque, hwaddr addr,
1327                                     unsigned size)
1328 {
1329 #ifdef DEBUG_UNASSIGNED
1330     printf("Unassigned mem read " TARGET_FMT_plx "\n", addr);
1331 #endif
1332 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1333     cpu_unassigned_access(cpu_single_env, addr, 0, 0, 0, size);
1334 #endif
1335     return 0;
1336 }
1337
1338 static void unassigned_mem_write(void *opaque, hwaddr addr,
1339                                  uint64_t val, unsigned size)
1340 {
1341 #ifdef DEBUG_UNASSIGNED
1342     printf("Unassigned mem write " TARGET_FMT_plx " = 0x%"PRIx64"\n", addr, val);
1343 #endif
1344 #if defined(TARGET_ALPHA) || defined(TARGET_SPARC) || defined(TARGET_MICROBLAZE)
1345     cpu_unassigned_access(cpu_single_env, addr, 1, 0, 0, size);
1346 #endif
1347 }
1348
1349 static const MemoryRegionOps unassigned_mem_ops = {
1350     .read = unassigned_mem_read,
1351     .write = unassigned_mem_write,
1352     .endianness = DEVICE_NATIVE_ENDIAN,
1353 };
1354
1355 static uint64_t error_mem_read(void *opaque, hwaddr addr,
1356                                unsigned size)
1357 {
1358     abort();
1359 }
1360
1361 static void error_mem_write(void *opaque, hwaddr addr,
1362                             uint64_t value, unsigned size)
1363 {
1364     abort();
1365 }
1366
1367 static const MemoryRegionOps error_mem_ops = {
1368     .read = error_mem_read,
1369     .write = error_mem_write,
1370     .endianness = DEVICE_NATIVE_ENDIAN,
1371 };
1372
1373 static const MemoryRegionOps rom_mem_ops = {
1374     .read = error_mem_read,
1375     .write = unassigned_mem_write,
1376     .endianness = DEVICE_NATIVE_ENDIAN,
1377 };
1378
1379 static void notdirty_mem_write(void *opaque, hwaddr ram_addr,
1380                                uint64_t val, unsigned size)
1381 {
1382     int dirty_flags;
1383     dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1384     if (!(dirty_flags & CODE_DIRTY_FLAG)) {
1385 #if !defined(CONFIG_USER_ONLY)
1386         tb_invalidate_phys_page_fast(ram_addr, size);
1387         dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
1388 #endif
1389     }
1390     switch (size) {
1391     case 1:
1392         stb_p(qemu_get_ram_ptr(ram_addr), val);
1393         break;
1394     case 2:
1395         stw_p(qemu_get_ram_ptr(ram_addr), val);
1396         break;
1397     case 4:
1398         stl_p(qemu_get_ram_ptr(ram_addr), val);
1399         break;
1400     default:
1401         abort();
1402     }
1403     dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
1404     cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
1405     /* we remove the notdirty callback only if the code has been
1406        flushed */
1407     if (dirty_flags == 0xff)
1408         tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
1409 }
1410
1411 static const MemoryRegionOps notdirty_mem_ops = {
1412     .read = error_mem_read,
1413     .write = notdirty_mem_write,
1414     .endianness = DEVICE_NATIVE_ENDIAN,
1415 };
1416
1417 /* Generate a debug exception if a watchpoint has been hit.  */
1418 static void check_watchpoint(int offset, int len_mask, int flags)
1419 {
1420     CPUArchState *env = cpu_single_env;
1421     target_ulong pc, cs_base;
1422     target_ulong vaddr;
1423     CPUWatchpoint *wp;
1424     int cpu_flags;
1425
1426     if (env->watchpoint_hit) {
1427         /* We re-entered the check after replacing the TB. Now raise
1428          * the debug interrupt so that is will trigger after the
1429          * current instruction. */
1430         cpu_interrupt(env, CPU_INTERRUPT_DEBUG);
1431         return;
1432     }
1433     vaddr = (env->mem_io_vaddr & TARGET_PAGE_MASK) + offset;
1434     QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
1435         if ((vaddr == (wp->vaddr & len_mask) ||
1436              (vaddr & wp->len_mask) == wp->vaddr) && (wp->flags & flags)) {
1437             wp->flags |= BP_WATCHPOINT_HIT;
1438             if (!env->watchpoint_hit) {
1439                 env->watchpoint_hit = wp;
1440                 tb_check_watchpoint(env);
1441                 if (wp->flags & BP_STOP_BEFORE_ACCESS) {
1442                     env->exception_index = EXCP_DEBUG;
1443                     cpu_loop_exit(env);
1444                 } else {
1445                     cpu_get_tb_cpu_state(env, &pc, &cs_base, &cpu_flags);
1446                     tb_gen_code(env, pc, cs_base, cpu_flags, 1);
1447                     cpu_resume_from_signal(env, NULL);
1448                 }
1449             }
1450         } else {
1451             wp->flags &= ~BP_WATCHPOINT_HIT;
1452         }
1453     }
1454 }
1455
1456 /* Watchpoint access routines.  Watchpoints are inserted using TLB tricks,
1457    so these check for a hit then pass through to the normal out-of-line
1458    phys routines.  */
1459 static uint64_t watch_mem_read(void *opaque, hwaddr addr,
1460                                unsigned size)
1461 {
1462     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_READ);
1463     switch (size) {
1464     case 1: return ldub_phys(addr);
1465     case 2: return lduw_phys(addr);
1466     case 4: return ldl_phys(addr);
1467     default: abort();
1468     }
1469 }
1470
1471 static void watch_mem_write(void *opaque, hwaddr addr,
1472                             uint64_t val, unsigned size)
1473 {
1474     check_watchpoint(addr & ~TARGET_PAGE_MASK, ~(size - 1), BP_MEM_WRITE);
1475     switch (size) {
1476     case 1:
1477         stb_phys(addr, val);
1478         break;
1479     case 2:
1480         stw_phys(addr, val);
1481         break;
1482     case 4:
1483         stl_phys(addr, val);
1484         break;
1485     default: abort();
1486     }
1487 }
1488
1489 static const MemoryRegionOps watch_mem_ops = {
1490     .read = watch_mem_read,
1491     .write = watch_mem_write,
1492     .endianness = DEVICE_NATIVE_ENDIAN,
1493 };
1494
1495 static uint64_t subpage_read(void *opaque, hwaddr addr,
1496                              unsigned len)
1497 {
1498     subpage_t *mmio = opaque;
1499     unsigned int idx = SUBPAGE_IDX(addr);
1500     MemoryRegionSection *section;
1501 #if defined(DEBUG_SUBPAGE)
1502     printf("%s: subpage %p len %d addr " TARGET_FMT_plx " idx %d\n", __func__,
1503            mmio, len, addr, idx);
1504 #endif
1505
1506     section = &phys_sections[mmio->sub_section[idx]];
1507     addr += mmio->base;
1508     addr -= section->offset_within_address_space;
1509     addr += section->offset_within_region;
1510     return io_mem_read(section->mr, addr, len);
1511 }
1512
1513 static void subpage_write(void *opaque, hwaddr addr,
1514                           uint64_t value, unsigned len)
1515 {
1516     subpage_t *mmio = opaque;
1517     unsigned int idx = SUBPAGE_IDX(addr);
1518     MemoryRegionSection *section;
1519 #if defined(DEBUG_SUBPAGE)
1520     printf("%s: subpage %p len %d addr " TARGET_FMT_plx
1521            " idx %d value %"PRIx64"\n",
1522            __func__, mmio, len, addr, idx, value);
1523 #endif
1524
1525     section = &phys_sections[mmio->sub_section[idx]];
1526     addr += mmio->base;
1527     addr -= section->offset_within_address_space;
1528     addr += section->offset_within_region;
1529     io_mem_write(section->mr, addr, value, len);
1530 }
1531
1532 static const MemoryRegionOps subpage_ops = {
1533     .read = subpage_read,
1534     .write = subpage_write,
1535     .endianness = DEVICE_NATIVE_ENDIAN,
1536 };
1537
1538 static uint64_t subpage_ram_read(void *opaque, hwaddr addr,
1539                                  unsigned size)
1540 {
1541     ram_addr_t raddr = addr;
1542     void *ptr = qemu_get_ram_ptr(raddr);
1543     switch (size) {
1544     case 1: return ldub_p(ptr);
1545     case 2: return lduw_p(ptr);
1546     case 4: return ldl_p(ptr);
1547     default: abort();
1548     }
1549 }
1550
1551 static void subpage_ram_write(void *opaque, hwaddr addr,
1552                               uint64_t value, unsigned size)
1553 {
1554     ram_addr_t raddr = addr;
1555     void *ptr = qemu_get_ram_ptr(raddr);
1556     switch (size) {
1557     case 1: return stb_p(ptr, value);
1558     case 2: return stw_p(ptr, value);
1559     case 4: return stl_p(ptr, value);
1560     default: abort();
1561     }
1562 }
1563
1564 static const MemoryRegionOps subpage_ram_ops = {
1565     .read = subpage_ram_read,
1566     .write = subpage_ram_write,
1567     .endianness = DEVICE_NATIVE_ENDIAN,
1568 };
1569
1570 static int subpage_register (subpage_t *mmio, uint32_t start, uint32_t end,
1571                              uint16_t section)
1572 {
1573     int idx, eidx;
1574
1575     if (start >= TARGET_PAGE_SIZE || end >= TARGET_PAGE_SIZE)
1576         return -1;
1577     idx = SUBPAGE_IDX(start);
1578     eidx = SUBPAGE_IDX(end);
1579 #if defined(DEBUG_SUBPAGE)
1580     printf("%s: %p start %08x end %08x idx %08x eidx %08x mem %ld\n", __func__,
1581            mmio, start, end, idx, eidx, memory);
1582 #endif
1583     if (memory_region_is_ram(phys_sections[section].mr)) {
1584         MemoryRegionSection new_section = phys_sections[section];
1585         new_section.mr = &io_mem_subpage_ram;
1586         section = phys_section_add(&new_section);
1587     }
1588     for (; idx <= eidx; idx++) {
1589         mmio->sub_section[idx] = section;
1590     }
1591
1592     return 0;
1593 }
1594
1595 static subpage_t *subpage_init(hwaddr base)
1596 {
1597     subpage_t *mmio;
1598
1599     mmio = g_malloc0(sizeof(subpage_t));
1600
1601     mmio->base = base;
1602     memory_region_init_io(&mmio->iomem, &subpage_ops, mmio,
1603                           "subpage", TARGET_PAGE_SIZE);
1604     mmio->iomem.subpage = true;
1605 #if defined(DEBUG_SUBPAGE)
1606     printf("%s: %p base " TARGET_FMT_plx " len %08x %d\n", __func__,
1607            mmio, base, TARGET_PAGE_SIZE, subpage_memory);
1608 #endif
1609     subpage_register(mmio, 0, TARGET_PAGE_SIZE-1, phys_section_unassigned);
1610
1611     return mmio;
1612 }
1613
1614 static uint16_t dummy_section(MemoryRegion *mr)
1615 {
1616     MemoryRegionSection section = {
1617         .mr = mr,
1618         .offset_within_address_space = 0,
1619         .offset_within_region = 0,
1620         .size = UINT64_MAX,
1621     };
1622
1623     return phys_section_add(&section);
1624 }
1625
1626 MemoryRegion *iotlb_to_region(hwaddr index)
1627 {
1628     return phys_sections[index & ~TARGET_PAGE_MASK].mr;
1629 }
1630
1631 static void io_mem_init(void)
1632 {
1633     memory_region_init_io(&io_mem_ram, &error_mem_ops, NULL, "ram", UINT64_MAX);
1634     memory_region_init_io(&io_mem_rom, &rom_mem_ops, NULL, "rom", UINT64_MAX);
1635     memory_region_init_io(&io_mem_unassigned, &unassigned_mem_ops, NULL,
1636                           "unassigned", UINT64_MAX);
1637     memory_region_init_io(&io_mem_notdirty, &notdirty_mem_ops, NULL,
1638                           "notdirty", UINT64_MAX);
1639     memory_region_init_io(&io_mem_subpage_ram, &subpage_ram_ops, NULL,
1640                           "subpage-ram", UINT64_MAX);
1641     memory_region_init_io(&io_mem_watch, &watch_mem_ops, NULL,
1642                           "watch", UINT64_MAX);
1643 }
1644
1645 static void mem_begin(MemoryListener *listener)
1646 {
1647     AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
1648
1649     destroy_all_mappings(d);
1650     d->phys_map.ptr = PHYS_MAP_NODE_NIL;
1651 }
1652
1653 static void core_begin(MemoryListener *listener)
1654 {
1655     phys_sections_clear();
1656     phys_section_unassigned = dummy_section(&io_mem_unassigned);
1657     phys_section_notdirty = dummy_section(&io_mem_notdirty);
1658     phys_section_rom = dummy_section(&io_mem_rom);
1659     phys_section_watch = dummy_section(&io_mem_watch);
1660 }
1661
1662 static void tcg_commit(MemoryListener *listener)
1663 {
1664     CPUArchState *env;
1665
1666     /* since each CPU stores ram addresses in its TLB cache, we must
1667        reset the modified entries */
1668     /* XXX: slow ! */
1669     for(env = first_cpu; env != NULL; env = env->next_cpu) {
1670         tlb_flush(env, 1);
1671     }
1672 }
1673
1674 static void core_log_global_start(MemoryListener *listener)
1675 {
1676     cpu_physical_memory_set_dirty_tracking(1);
1677 }
1678
1679 static void core_log_global_stop(MemoryListener *listener)
1680 {
1681     cpu_physical_memory_set_dirty_tracking(0);
1682 }
1683
1684 static void io_region_add(MemoryListener *listener,
1685                           MemoryRegionSection *section)
1686 {
1687     MemoryRegionIORange *mrio = g_new(MemoryRegionIORange, 1);
1688
1689     mrio->mr = section->mr;
1690     mrio->offset = section->offset_within_region;
1691     iorange_init(&mrio->iorange, &memory_region_iorange_ops,
1692                  section->offset_within_address_space, section->size);
1693     ioport_register(&mrio->iorange);
1694 }
1695
1696 static void io_region_del(MemoryListener *listener,
1697                           MemoryRegionSection *section)
1698 {
1699     isa_unassign_ioport(section->offset_within_address_space, section->size);
1700 }
1701
1702 static MemoryListener core_memory_listener = {
1703     .begin = core_begin,
1704     .log_global_start = core_log_global_start,
1705     .log_global_stop = core_log_global_stop,
1706     .priority = 1,
1707 };
1708
1709 static MemoryListener io_memory_listener = {
1710     .region_add = io_region_add,
1711     .region_del = io_region_del,
1712     .priority = 0,
1713 };
1714
1715 static MemoryListener tcg_memory_listener = {
1716     .commit = tcg_commit,
1717 };
1718
1719 void address_space_init_dispatch(AddressSpace *as)
1720 {
1721     AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
1722
1723     d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
1724     d->listener = (MemoryListener) {
1725         .begin = mem_begin,
1726         .region_add = mem_add,
1727         .region_nop = mem_add,
1728         .priority = 0,
1729     };
1730     as->dispatch = d;
1731     memory_listener_register(&d->listener, as);
1732 }
1733
1734 void address_space_destroy_dispatch(AddressSpace *as)
1735 {
1736     AddressSpaceDispatch *d = as->dispatch;
1737
1738     memory_listener_unregister(&d->listener);
1739     destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
1740     g_free(d);
1741     as->dispatch = NULL;
1742 }
1743
1744 static void memory_map_init(void)
1745 {
1746     system_memory = g_malloc(sizeof(*system_memory));
1747     memory_region_init(system_memory, "system", INT64_MAX);
1748     address_space_init(&address_space_memory, system_memory);
1749     address_space_memory.name = "memory";
1750
1751     system_io = g_malloc(sizeof(*system_io));
1752     memory_region_init(system_io, "io", 65536);
1753     address_space_init(&address_space_io, system_io);
1754     address_space_io.name = "I/O";
1755
1756     memory_listener_register(&core_memory_listener, &address_space_memory);
1757     memory_listener_register(&io_memory_listener, &address_space_io);
1758     memory_listener_register(&tcg_memory_listener, &address_space_memory);
1759
1760     dma_context_init(&dma_context_memory, &address_space_memory,
1761                      NULL, NULL, NULL);
1762 }
1763
1764 MemoryRegion *get_system_memory(void)
1765 {
1766     return system_memory;
1767 }
1768
1769 MemoryRegion *get_system_io(void)
1770 {
1771     return system_io;
1772 }
1773
1774 #endif /* !defined(CONFIG_USER_ONLY) */
1775
1776 /* physical memory access (slow version, mainly for debug) */
1777 #if defined(CONFIG_USER_ONLY)
1778 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
1779                         uint8_t *buf, int len, int is_write)
1780 {
1781     int l, flags;
1782     target_ulong page;
1783     void * p;
1784
1785     while (len > 0) {
1786         page = addr & TARGET_PAGE_MASK;
1787         l = (page + TARGET_PAGE_SIZE) - addr;
1788         if (l > len)
1789             l = len;
1790         flags = page_get_flags(page);
1791         if (!(flags & PAGE_VALID))
1792             return -1;
1793         if (is_write) {
1794             if (!(flags & PAGE_WRITE))
1795                 return -1;
1796             /* XXX: this code should not depend on lock_user */
1797             if (!(p = lock_user(VERIFY_WRITE, addr, l, 0)))
1798                 return -1;
1799             memcpy(p, buf, l);
1800             unlock_user(p, addr, l);
1801         } else {
1802             if (!(flags & PAGE_READ))
1803                 return -1;
1804             /* XXX: this code should not depend on lock_user */
1805             if (!(p = lock_user(VERIFY_READ, addr, l, 1)))
1806                 return -1;
1807             memcpy(buf, p, l);
1808             unlock_user(p, addr, 0);
1809         }
1810         len -= l;
1811         buf += l;
1812         addr += l;
1813     }
1814     return 0;
1815 }
1816
1817 #else
1818
1819 static void invalidate_and_set_dirty(hwaddr addr,
1820                                      hwaddr length)
1821 {
1822     if (!cpu_physical_memory_is_dirty(addr)) {
1823         /* invalidate code */
1824         tb_invalidate_phys_page_range(addr, addr + length, 0);
1825         /* set dirty bit */
1826         cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
1827     }
1828     xen_modified_memory(addr, length);
1829 }
1830
1831 void address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf,
1832                       int len, bool is_write)
1833 {
1834     AddressSpaceDispatch *d = as->dispatch;
1835     int l;
1836     uint8_t *ptr;
1837     uint32_t val;
1838     hwaddr page;
1839     MemoryRegionSection *section;
1840
1841     while (len > 0) {
1842         page = addr & TARGET_PAGE_MASK;
1843         l = (page + TARGET_PAGE_SIZE) - addr;
1844         if (l > len)
1845             l = len;
1846         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1847
1848         if (is_write) {
1849             if (!memory_region_is_ram(section->mr)) {
1850                 hwaddr addr1;
1851                 addr1 = memory_region_section_addr(section, addr);
1852                 /* XXX: could force cpu_single_env to NULL to avoid
1853                    potential bugs */
1854                 if (l >= 4 && ((addr1 & 3) == 0)) {
1855                     /* 32 bit write access */
1856                     val = ldl_p(buf);
1857                     io_mem_write(section->mr, addr1, val, 4);
1858                     l = 4;
1859                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1860                     /* 16 bit write access */
1861                     val = lduw_p(buf);
1862                     io_mem_write(section->mr, addr1, val, 2);
1863                     l = 2;
1864                 } else {
1865                     /* 8 bit write access */
1866                     val = ldub_p(buf);
1867                     io_mem_write(section->mr, addr1, val, 1);
1868                     l = 1;
1869                 }
1870             } else if (!section->readonly) {
1871                 ram_addr_t addr1;
1872                 addr1 = memory_region_get_ram_addr(section->mr)
1873                     + memory_region_section_addr(section, addr);
1874                 /* RAM case */
1875                 ptr = qemu_get_ram_ptr(addr1);
1876                 memcpy(ptr, buf, l);
1877                 invalidate_and_set_dirty(addr1, l);
1878                 qemu_put_ram_ptr(ptr);
1879             }
1880         } else {
1881             if (!(memory_region_is_ram(section->mr) ||
1882                   memory_region_is_romd(section->mr))) {
1883                 hwaddr addr1;
1884                 /* I/O case */
1885                 addr1 = memory_region_section_addr(section, addr);
1886                 if (l >= 4 && ((addr1 & 3) == 0)) {
1887                     /* 32 bit read access */
1888                     val = io_mem_read(section->mr, addr1, 4);
1889                     stl_p(buf, val);
1890                     l = 4;
1891                 } else if (l >= 2 && ((addr1 & 1) == 0)) {
1892                     /* 16 bit read access */
1893                     val = io_mem_read(section->mr, addr1, 2);
1894                     stw_p(buf, val);
1895                     l = 2;
1896                 } else {
1897                     /* 8 bit read access */
1898                     val = io_mem_read(section->mr, addr1, 1);
1899                     stb_p(buf, val);
1900                     l = 1;
1901                 }
1902             } else {
1903                 /* RAM case */
1904                 ptr = qemu_get_ram_ptr(section->mr->ram_addr
1905                                        + memory_region_section_addr(section,
1906                                                                     addr));
1907                 memcpy(buf, ptr, l);
1908                 qemu_put_ram_ptr(ptr);
1909             }
1910         }
1911         len -= l;
1912         buf += l;
1913         addr += l;
1914     }
1915 }
1916
1917 void address_space_write(AddressSpace *as, hwaddr addr,
1918                          const uint8_t *buf, int len)
1919 {
1920     address_space_rw(as, addr, (uint8_t *)buf, len, true);
1921 }
1922
1923 /**
1924  * address_space_read: read from an address space.
1925  *
1926  * @as: #AddressSpace to be accessed
1927  * @addr: address within that address space
1928  * @buf: buffer with the data transferred
1929  */
1930 void address_space_read(AddressSpace *as, hwaddr addr, uint8_t *buf, int len)
1931 {
1932     address_space_rw(as, addr, buf, len, false);
1933 }
1934
1935
1936 void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
1937                             int len, int is_write)
1938 {
1939     return address_space_rw(&address_space_memory, addr, buf, len, is_write);
1940 }
1941
1942 /* used for ROM loading : can write in RAM and ROM */
1943 void cpu_physical_memory_write_rom(hwaddr addr,
1944                                    const uint8_t *buf, int len)
1945 {
1946     AddressSpaceDispatch *d = address_space_memory.dispatch;
1947     int l;
1948     uint8_t *ptr;
1949     hwaddr page;
1950     MemoryRegionSection *section;
1951
1952     while (len > 0) {
1953         page = addr & TARGET_PAGE_MASK;
1954         l = (page + TARGET_PAGE_SIZE) - addr;
1955         if (l > len)
1956             l = len;
1957         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
1958
1959         if (!(memory_region_is_ram(section->mr) ||
1960               memory_region_is_romd(section->mr))) {
1961             /* do nothing */
1962         } else {
1963             unsigned long addr1;
1964             addr1 = memory_region_get_ram_addr(section->mr)
1965                 + memory_region_section_addr(section, addr);
1966             /* ROM/RAM case */
1967             ptr = qemu_get_ram_ptr(addr1);
1968             memcpy(ptr, buf, l);
1969             invalidate_and_set_dirty(addr1, l);
1970             qemu_put_ram_ptr(ptr);
1971         }
1972         len -= l;
1973         buf += l;
1974         addr += l;
1975     }
1976 }
1977
1978 typedef struct {
1979     void *buffer;
1980     hwaddr addr;
1981     hwaddr len;
1982 } BounceBuffer;
1983
1984 static BounceBuffer bounce;
1985
1986 typedef struct MapClient {
1987     void *opaque;
1988     void (*callback)(void *opaque);
1989     QLIST_ENTRY(MapClient) link;
1990 } MapClient;
1991
1992 static QLIST_HEAD(map_client_list, MapClient) map_client_list
1993     = QLIST_HEAD_INITIALIZER(map_client_list);
1994
1995 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque))
1996 {
1997     MapClient *client = g_malloc(sizeof(*client));
1998
1999     client->opaque = opaque;
2000     client->callback = callback;
2001     QLIST_INSERT_HEAD(&map_client_list, client, link);
2002     return client;
2003 }
2004
2005 static void cpu_unregister_map_client(void *_client)
2006 {
2007     MapClient *client = (MapClient *)_client;
2008
2009     QLIST_REMOVE(client, link);
2010     g_free(client);
2011 }
2012
2013 static void cpu_notify_map_clients(void)
2014 {
2015     MapClient *client;
2016
2017     while (!QLIST_EMPTY(&map_client_list)) {
2018         client = QLIST_FIRST(&map_client_list);
2019         client->callback(client->opaque);
2020         cpu_unregister_map_client(client);
2021     }
2022 }
2023
2024 /* Map a physical memory region into a host virtual address.
2025  * May map a subset of the requested range, given by and returned in *plen.
2026  * May return NULL if resources needed to perform the mapping are exhausted.
2027  * Use only for reads OR writes - not for read-modify-write operations.
2028  * Use cpu_register_map_client() to know when retrying the map operation is
2029  * likely to succeed.
2030  */
2031 void *address_space_map(AddressSpace *as,
2032                         hwaddr addr,
2033                         hwaddr *plen,
2034                         bool is_write)
2035 {
2036     AddressSpaceDispatch *d = as->dispatch;
2037     hwaddr len = *plen;
2038     hwaddr todo = 0;
2039     int l;
2040     hwaddr page;
2041     MemoryRegionSection *section;
2042     ram_addr_t raddr = RAM_ADDR_MAX;
2043     ram_addr_t rlen;
2044     void *ret;
2045
2046     while (len > 0) {
2047         page = addr & TARGET_PAGE_MASK;
2048         l = (page + TARGET_PAGE_SIZE) - addr;
2049         if (l > len)
2050             l = len;
2051         section = phys_page_find(d, page >> TARGET_PAGE_BITS);
2052
2053         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
2054             if (todo || bounce.buffer) {
2055                 break;
2056             }
2057             bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE);
2058             bounce.addr = addr;
2059             bounce.len = l;
2060             if (!is_write) {
2061                 address_space_read(as, addr, bounce.buffer, l);
2062             }
2063
2064             *plen = l;
2065             return bounce.buffer;
2066         }
2067         if (!todo) {
2068             raddr = memory_region_get_ram_addr(section->mr)
2069                 + memory_region_section_addr(section, addr);
2070         }
2071
2072         len -= l;
2073         addr += l;
2074         todo += l;
2075     }
2076     rlen = todo;
2077     ret = qemu_ram_ptr_length(raddr, &rlen);
2078     *plen = rlen;
2079     return ret;
2080 }
2081
2082 /* Unmaps a memory region previously mapped by address_space_map().
2083  * Will also mark the memory as dirty if is_write == 1.  access_len gives
2084  * the amount of memory that was actually read or written by the caller.
2085  */
2086 void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len,
2087                          int is_write, hwaddr access_len)
2088 {
2089     if (buffer != bounce.buffer) {
2090         if (is_write) {
2091             ram_addr_t addr1 = qemu_ram_addr_from_host_nofail(buffer);
2092             while (access_len) {
2093                 unsigned l;
2094                 l = TARGET_PAGE_SIZE;
2095                 if (l > access_len)
2096                     l = access_len;
2097                 invalidate_and_set_dirty(addr1, l);
2098                 addr1 += l;
2099                 access_len -= l;
2100             }
2101         }
2102         if (xen_enabled()) {
2103             xen_invalidate_map_cache_entry(buffer);
2104         }
2105         return;
2106     }
2107     if (is_write) {
2108         address_space_write(as, bounce.addr, bounce.buffer, access_len);
2109     }
2110     qemu_vfree(bounce.buffer);
2111     bounce.buffer = NULL;
2112     cpu_notify_map_clients();
2113 }
2114
2115 void *cpu_physical_memory_map(hwaddr addr,
2116                               hwaddr *plen,
2117                               int is_write)
2118 {
2119     return address_space_map(&address_space_memory, addr, plen, is_write);
2120 }
2121
2122 void cpu_physical_memory_unmap(void *buffer, hwaddr len,
2123                                int is_write, hwaddr access_len)
2124 {
2125     return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
2126 }
2127
2128 /* warning: addr must be aligned */
2129 static inline uint32_t ldl_phys_internal(hwaddr addr,
2130                                          enum device_endian endian)
2131 {
2132     uint8_t *ptr;
2133     uint32_t val;
2134     MemoryRegionSection *section;
2135
2136     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2137
2138     if (!(memory_region_is_ram(section->mr) ||
2139           memory_region_is_romd(section->mr))) {
2140         /* I/O case */
2141         addr = memory_region_section_addr(section, addr);
2142         val = io_mem_read(section->mr, addr, 4);
2143 #if defined(TARGET_WORDS_BIGENDIAN)
2144         if (endian == DEVICE_LITTLE_ENDIAN) {
2145             val = bswap32(val);
2146         }
2147 #else
2148         if (endian == DEVICE_BIG_ENDIAN) {
2149             val = bswap32(val);
2150         }
2151 #endif
2152     } else {
2153         /* RAM case */
2154         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2155                                 & TARGET_PAGE_MASK)
2156                                + memory_region_section_addr(section, addr));
2157         switch (endian) {
2158         case DEVICE_LITTLE_ENDIAN:
2159             val = ldl_le_p(ptr);
2160             break;
2161         case DEVICE_BIG_ENDIAN:
2162             val = ldl_be_p(ptr);
2163             break;
2164         default:
2165             val = ldl_p(ptr);
2166             break;
2167         }
2168     }
2169     return val;
2170 }
2171
2172 uint32_t ldl_phys(hwaddr addr)
2173 {
2174     return ldl_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2175 }
2176
2177 uint32_t ldl_le_phys(hwaddr addr)
2178 {
2179     return ldl_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2180 }
2181
2182 uint32_t ldl_be_phys(hwaddr addr)
2183 {
2184     return ldl_phys_internal(addr, DEVICE_BIG_ENDIAN);
2185 }
2186
2187 /* warning: addr must be aligned */
2188 static inline uint64_t ldq_phys_internal(hwaddr addr,
2189                                          enum device_endian endian)
2190 {
2191     uint8_t *ptr;
2192     uint64_t val;
2193     MemoryRegionSection *section;
2194
2195     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2196
2197     if (!(memory_region_is_ram(section->mr) ||
2198           memory_region_is_romd(section->mr))) {
2199         /* I/O case */
2200         addr = memory_region_section_addr(section, addr);
2201
2202         /* XXX This is broken when device endian != cpu endian.
2203                Fix and add "endian" variable check */
2204 #ifdef TARGET_WORDS_BIGENDIAN
2205         val = io_mem_read(section->mr, addr, 4) << 32;
2206         val |= io_mem_read(section->mr, addr + 4, 4);
2207 #else
2208         val = io_mem_read(section->mr, addr, 4);
2209         val |= io_mem_read(section->mr, addr + 4, 4) << 32;
2210 #endif
2211     } else {
2212         /* RAM case */
2213         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2214                                 & TARGET_PAGE_MASK)
2215                                + memory_region_section_addr(section, addr));
2216         switch (endian) {
2217         case DEVICE_LITTLE_ENDIAN:
2218             val = ldq_le_p(ptr);
2219             break;
2220         case DEVICE_BIG_ENDIAN:
2221             val = ldq_be_p(ptr);
2222             break;
2223         default:
2224             val = ldq_p(ptr);
2225             break;
2226         }
2227     }
2228     return val;
2229 }
2230
2231 uint64_t ldq_phys(hwaddr addr)
2232 {
2233     return ldq_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2234 }
2235
2236 uint64_t ldq_le_phys(hwaddr addr)
2237 {
2238     return ldq_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2239 }
2240
2241 uint64_t ldq_be_phys(hwaddr addr)
2242 {
2243     return ldq_phys_internal(addr, DEVICE_BIG_ENDIAN);
2244 }
2245
2246 /* XXX: optimize */
2247 uint32_t ldub_phys(hwaddr addr)
2248 {
2249     uint8_t val;
2250     cpu_physical_memory_read(addr, &val, 1);
2251     return val;
2252 }
2253
2254 /* warning: addr must be aligned */
2255 static inline uint32_t lduw_phys_internal(hwaddr addr,
2256                                           enum device_endian endian)
2257 {
2258     uint8_t *ptr;
2259     uint64_t val;
2260     MemoryRegionSection *section;
2261
2262     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2263
2264     if (!(memory_region_is_ram(section->mr) ||
2265           memory_region_is_romd(section->mr))) {
2266         /* I/O case */
2267         addr = memory_region_section_addr(section, addr);
2268         val = io_mem_read(section->mr, addr, 2);
2269 #if defined(TARGET_WORDS_BIGENDIAN)
2270         if (endian == DEVICE_LITTLE_ENDIAN) {
2271             val = bswap16(val);
2272         }
2273 #else
2274         if (endian == DEVICE_BIG_ENDIAN) {
2275             val = bswap16(val);
2276         }
2277 #endif
2278     } else {
2279         /* RAM case */
2280         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2281                                 & TARGET_PAGE_MASK)
2282                                + memory_region_section_addr(section, addr));
2283         switch (endian) {
2284         case DEVICE_LITTLE_ENDIAN:
2285             val = lduw_le_p(ptr);
2286             break;
2287         case DEVICE_BIG_ENDIAN:
2288             val = lduw_be_p(ptr);
2289             break;
2290         default:
2291             val = lduw_p(ptr);
2292             break;
2293         }
2294     }
2295     return val;
2296 }
2297
2298 uint32_t lduw_phys(hwaddr addr)
2299 {
2300     return lduw_phys_internal(addr, DEVICE_NATIVE_ENDIAN);
2301 }
2302
2303 uint32_t lduw_le_phys(hwaddr addr)
2304 {
2305     return lduw_phys_internal(addr, DEVICE_LITTLE_ENDIAN);
2306 }
2307
2308 uint32_t lduw_be_phys(hwaddr addr)
2309 {
2310     return lduw_phys_internal(addr, DEVICE_BIG_ENDIAN);
2311 }
2312
2313 /* warning: addr must be aligned. The ram page is not masked as dirty
2314    and the code inside is not invalidated. It is useful if the dirty
2315    bits are used to track modified PTEs */
2316 void stl_phys_notdirty(hwaddr addr, uint32_t val)
2317 {
2318     uint8_t *ptr;
2319     MemoryRegionSection *section;
2320
2321     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2322
2323     if (!memory_region_is_ram(section->mr) || section->readonly) {
2324         addr = memory_region_section_addr(section, addr);
2325         if (memory_region_is_ram(section->mr)) {
2326             section = &phys_sections[phys_section_rom];
2327         }
2328         io_mem_write(section->mr, addr, val, 4);
2329     } else {
2330         unsigned long addr1 = (memory_region_get_ram_addr(section->mr)
2331                                & TARGET_PAGE_MASK)
2332             + memory_region_section_addr(section, addr);
2333         ptr = qemu_get_ram_ptr(addr1);
2334         stl_p(ptr, val);
2335
2336         if (unlikely(in_migration)) {
2337             if (!cpu_physical_memory_is_dirty(addr1)) {
2338                 /* invalidate code */
2339                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
2340                 /* set dirty bit */
2341                 cpu_physical_memory_set_dirty_flags(
2342                     addr1, (0xff & ~CODE_DIRTY_FLAG));
2343             }
2344         }
2345     }
2346 }
2347
2348 void stq_phys_notdirty(hwaddr addr, uint64_t val)
2349 {
2350     uint8_t *ptr;
2351     MemoryRegionSection *section;
2352
2353     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2354
2355     if (!memory_region_is_ram(section->mr) || section->readonly) {
2356         addr = memory_region_section_addr(section, addr);
2357         if (memory_region_is_ram(section->mr)) {
2358             section = &phys_sections[phys_section_rom];
2359         }
2360 #ifdef TARGET_WORDS_BIGENDIAN
2361         io_mem_write(section->mr, addr, val >> 32, 4);
2362         io_mem_write(section->mr, addr + 4, (uint32_t)val, 4);
2363 #else
2364         io_mem_write(section->mr, addr, (uint32_t)val, 4);
2365         io_mem_write(section->mr, addr + 4, val >> 32, 4);
2366 #endif
2367     } else {
2368         ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr)
2369                                 & TARGET_PAGE_MASK)
2370                                + memory_region_section_addr(section, addr));
2371         stq_p(ptr, val);
2372     }
2373 }
2374
2375 /* warning: addr must be aligned */
2376 static inline void stl_phys_internal(hwaddr addr, uint32_t val,
2377                                      enum device_endian endian)
2378 {
2379     uint8_t *ptr;
2380     MemoryRegionSection *section;
2381
2382     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2383
2384     if (!memory_region_is_ram(section->mr) || section->readonly) {
2385         addr = memory_region_section_addr(section, addr);
2386         if (memory_region_is_ram(section->mr)) {
2387             section = &phys_sections[phys_section_rom];
2388         }
2389 #if defined(TARGET_WORDS_BIGENDIAN)
2390         if (endian == DEVICE_LITTLE_ENDIAN) {
2391             val = bswap32(val);
2392         }
2393 #else
2394         if (endian == DEVICE_BIG_ENDIAN) {
2395             val = bswap32(val);
2396         }
2397 #endif
2398         io_mem_write(section->mr, addr, val, 4);
2399     } else {
2400         unsigned long addr1;
2401         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2402             + memory_region_section_addr(section, addr);
2403         /* RAM case */
2404         ptr = qemu_get_ram_ptr(addr1);
2405         switch (endian) {
2406         case DEVICE_LITTLE_ENDIAN:
2407             stl_le_p(ptr, val);
2408             break;
2409         case DEVICE_BIG_ENDIAN:
2410             stl_be_p(ptr, val);
2411             break;
2412         default:
2413             stl_p(ptr, val);
2414             break;
2415         }
2416         invalidate_and_set_dirty(addr1, 4);
2417     }
2418 }
2419
2420 void stl_phys(hwaddr addr, uint32_t val)
2421 {
2422     stl_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2423 }
2424
2425 void stl_le_phys(hwaddr addr, uint32_t val)
2426 {
2427     stl_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2428 }
2429
2430 void stl_be_phys(hwaddr addr, uint32_t val)
2431 {
2432     stl_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2433 }
2434
2435 /* XXX: optimize */
2436 void stb_phys(hwaddr addr, uint32_t val)
2437 {
2438     uint8_t v = val;
2439     cpu_physical_memory_write(addr, &v, 1);
2440 }
2441
2442 /* warning: addr must be aligned */
2443 static inline void stw_phys_internal(hwaddr addr, uint32_t val,
2444                                      enum device_endian endian)
2445 {
2446     uint8_t *ptr;
2447     MemoryRegionSection *section;
2448
2449     section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
2450
2451     if (!memory_region_is_ram(section->mr) || section->readonly) {
2452         addr = memory_region_section_addr(section, addr);
2453         if (memory_region_is_ram(section->mr)) {
2454             section = &phys_sections[phys_section_rom];
2455         }
2456 #if defined(TARGET_WORDS_BIGENDIAN)
2457         if (endian == DEVICE_LITTLE_ENDIAN) {
2458             val = bswap16(val);
2459         }
2460 #else
2461         if (endian == DEVICE_BIG_ENDIAN) {
2462             val = bswap16(val);
2463         }
2464 #endif
2465         io_mem_write(section->mr, addr, val, 2);
2466     } else {
2467         unsigned long addr1;
2468         addr1 = (memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK)
2469             + memory_region_section_addr(section, addr);
2470         /* RAM case */
2471         ptr = qemu_get_ram_ptr(addr1);
2472         switch (endian) {
2473         case DEVICE_LITTLE_ENDIAN:
2474             stw_le_p(ptr, val);
2475             break;
2476         case DEVICE_BIG_ENDIAN:
2477             stw_be_p(ptr, val);
2478             break;
2479         default:
2480             stw_p(ptr, val);
2481             break;
2482         }
2483         invalidate_and_set_dirty(addr1, 2);
2484     }
2485 }
2486
2487 void stw_phys(hwaddr addr, uint32_t val)
2488 {
2489     stw_phys_internal(addr, val, DEVICE_NATIVE_ENDIAN);
2490 }
2491
2492 void stw_le_phys(hwaddr addr, uint32_t val)
2493 {
2494     stw_phys_internal(addr, val, DEVICE_LITTLE_ENDIAN);
2495 }
2496
2497 void stw_be_phys(hwaddr addr, uint32_t val)
2498 {
2499     stw_phys_internal(addr, val, DEVICE_BIG_ENDIAN);
2500 }
2501
2502 /* XXX: optimize */
2503 void stq_phys(hwaddr addr, uint64_t val)
2504 {
2505     val = tswap64(val);
2506     cpu_physical_memory_write(addr, &val, 8);
2507 }
2508
2509 void stq_le_phys(hwaddr addr, uint64_t val)
2510 {
2511     val = cpu_to_le64(val);
2512     cpu_physical_memory_write(addr, &val, 8);
2513 }
2514
2515 void stq_be_phys(hwaddr addr, uint64_t val)
2516 {
2517     val = cpu_to_be64(val);
2518     cpu_physical_memory_write(addr, &val, 8);
2519 }
2520
2521 /* virtual memory access for debug (includes writing to ROM) */
2522 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
2523                         uint8_t *buf, int len, int is_write)
2524 {
2525     int l;
2526     hwaddr phys_addr;
2527     target_ulong page;
2528
2529     while (len > 0) {
2530         page = addr & TARGET_PAGE_MASK;
2531         phys_addr = cpu_get_phys_page_debug(env, page);
2532         /* if no physical page mapped, return an error */
2533         if (phys_addr == -1)
2534             return -1;
2535         l = (page + TARGET_PAGE_SIZE) - addr;
2536         if (l > len)
2537             l = len;
2538         phys_addr += (addr & ~TARGET_PAGE_MASK);
2539         if (is_write)
2540             cpu_physical_memory_write_rom(phys_addr, buf, l);
2541         else
2542             cpu_physical_memory_rw(phys_addr, buf, l, is_write);
2543         len -= l;
2544         buf += l;
2545         addr += l;
2546     }
2547     return 0;
2548 }
2549 #endif
2550
2551 #if !defined(CONFIG_USER_ONLY)
2552
2553 /*
2554  * A helper function for the _utterly broken_ virtio device model to find out if
2555  * it's running on a big endian machine. Don't do this at home kids!
2556  */
2557 bool virtio_is_big_endian(void);
2558 bool virtio_is_big_endian(void)
2559 {
2560 #if defined(TARGET_WORDS_BIGENDIAN)
2561     return true;
2562 #else
2563     return false;
2564 #endif
2565 }
2566
2567 #endif
2568
2569 #ifndef CONFIG_USER_ONLY
2570 bool cpu_physical_memory_is_io(hwaddr phys_addr)
2571 {
2572     MemoryRegionSection *section;
2573
2574     section = phys_page_find(address_space_memory.dispatch,
2575                              phys_addr >> TARGET_PAGE_BITS);
2576
2577     return !(memory_region_is_ram(section->mr) ||
2578              memory_region_is_romd(section->mr));
2579 }
2580 #endif
This page took 0.160511 seconds and 4 git commands to generate.