2 * Common CPU TLB handling
4 * Copyright (c) 2003 Fabrice Bellard
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qemu/main-loop.h"
23 #include "exec/exec-all.h"
24 #include "exec/memory.h"
25 #include "exec/address-spaces.h"
26 #include "exec/cpu_ldst.h"
27 #include "exec/cputlb.h"
28 #include "exec/memory-internal.h"
29 #include "exec/ram_addr.h"
31 #include "qemu/error-report.h"
33 #include "exec/helper-proto.h"
34 #include "qemu/atomic.h"
35 #include "qemu/atomic128.h"
36 #include "translate-all.h"
38 /* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
39 /* #define DEBUG_TLB */
40 /* #define DEBUG_TLB_LOG */
43 # define DEBUG_TLB_GATE 1
45 # define DEBUG_TLB_LOG_GATE 1
47 # define DEBUG_TLB_LOG_GATE 0
50 # define DEBUG_TLB_GATE 0
51 # define DEBUG_TLB_LOG_GATE 0
54 #define tlb_debug(fmt, ...) do { \
55 if (DEBUG_TLB_LOG_GATE) { \
56 qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
58 } else if (DEBUG_TLB_GATE) { \
59 fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
63 #define assert_cpu_is_self(cpu) do { \
64 if (DEBUG_TLB_GATE) { \
65 g_assert(!(cpu)->created || qemu_cpu_is_self(cpu)); \
69 /* run_on_cpu_data.target_ptr should always be big enough for a
70 * target_ulong even on 32 bit builds */
71 QEMU_BUILD_BUG_ON(sizeof(target_ulong) > sizeof(run_on_cpu_data));
73 /* We currently can't handle more than 16 bits in the MMUIDX bitmask.
75 QEMU_BUILD_BUG_ON(NB_MMU_MODES > 16);
76 #define ALL_MMUIDX_BITS ((1 << NB_MMU_MODES) - 1)
78 static inline size_t sizeof_tlb(CPUArchState *env, uintptr_t mmu_idx)
80 return env_tlb(env)->f[mmu_idx].mask + (1 << CPU_TLB_ENTRY_BITS);
83 static void tlb_window_reset(CPUTLBDesc *desc, int64_t ns,
86 desc->window_begin_ns = ns;
87 desc->window_max_entries = max_entries;
90 static void tlb_dyn_init(CPUArchState *env)
94 for (i = 0; i < NB_MMU_MODES; i++) {
95 CPUTLBDesc *desc = &env_tlb(env)->d[i];
96 size_t n_entries = 1 << CPU_TLB_DYN_DEFAULT_BITS;
98 tlb_window_reset(desc, get_clock_realtime(), 0);
99 desc->n_used_entries = 0;
100 env_tlb(env)->f[i].mask = (n_entries - 1) << CPU_TLB_ENTRY_BITS;
101 env_tlb(env)->f[i].table = g_new(CPUTLBEntry, n_entries);
102 env_tlb(env)->d[i].iotlb = g_new(CPUIOTLBEntry, n_entries);
107 * tlb_mmu_resize_locked() - perform TLB resize bookkeeping; resize if necessary
108 * @env: CPU that owns the TLB
109 * @mmu_idx: MMU index of the TLB
111 * Called with tlb_lock_held.
113 * We have two main constraints when resizing a TLB: (1) we only resize it
114 * on a TLB flush (otherwise we'd have to take a perf hit by either rehashing
115 * the array or unnecessarily flushing it), which means we do not control how
116 * frequently the resizing can occur; (2) we don't have access to the guest's
117 * future scheduling decisions, and therefore have to decide the magnitude of
118 * the resize based on past observations.
120 * In general, a memory-hungry process can benefit greatly from an appropriately
121 * sized TLB, since a guest TLB miss is very expensive. This doesn't mean that
122 * we just have to make the TLB as large as possible; while an oversized TLB
123 * results in minimal TLB miss rates, it also takes longer to be flushed
124 * (flushes can be _very_ frequent), and the reduced locality can also hurt
127 * To achieve near-optimal performance for all kinds of workloads, we:
129 * 1. Aggressively increase the size of the TLB when the use rate of the
130 * TLB being flushed is high, since it is likely that in the near future this
131 * memory-hungry process will execute again, and its memory hungriness will
132 * probably be similar.
134 * 2. Slowly reduce the size of the TLB as the use rate declines over a
135 * reasonably large time window. The rationale is that if in such a time window
136 * we have not observed a high TLB use rate, it is likely that we won't observe
137 * it in the near future. In that case, once a time window expires we downsize
138 * the TLB to match the maximum use rate observed in the window.
140 * 3. Try to keep the maximum use rate in a time window in the 30-70% range,
141 * since in that range performance is likely near-optimal. Recall that the TLB
142 * is direct mapped, so we want the use rate to be low (or at least not too
143 * high), since otherwise we are likely to have a significant amount of
146 static void tlb_mmu_resize_locked(CPUArchState *env, int mmu_idx)
148 CPUTLBDesc *desc = &env_tlb(env)->d[mmu_idx];
149 size_t old_size = tlb_n_entries(env, mmu_idx);
151 size_t new_size = old_size;
152 int64_t now = get_clock_realtime();
153 int64_t window_len_ms = 100;
154 int64_t window_len_ns = window_len_ms * 1000 * 1000;
155 bool window_expired = now > desc->window_begin_ns + window_len_ns;
157 if (desc->n_used_entries > desc->window_max_entries) {
158 desc->window_max_entries = desc->n_used_entries;
160 rate = desc->window_max_entries * 100 / old_size;
163 new_size = MIN(old_size << 1, 1 << CPU_TLB_DYN_MAX_BITS);
164 } else if (rate < 30 && window_expired) {
165 size_t ceil = pow2ceil(desc->window_max_entries);
166 size_t expected_rate = desc->window_max_entries * 100 / ceil;
169 * Avoid undersizing when the max number of entries seen is just below
170 * a pow2. For instance, if max_entries == 1025, the expected use rate
171 * would be 1025/2048==50%. However, if max_entries == 1023, we'd get
172 * 1023/1024==99.9% use rate, so we'd likely end up doubling the size
173 * later. Thus, make sure that the expected use rate remains below 70%.
174 * (and since we double the size, that means the lowest rate we'd
175 * expect to get is 35%, which is still in the 30-70% range where
176 * we consider that the size is appropriate.)
178 if (expected_rate > 70) {
181 new_size = MAX(ceil, 1 << CPU_TLB_DYN_MIN_BITS);
184 if (new_size == old_size) {
185 if (window_expired) {
186 tlb_window_reset(desc, now, desc->n_used_entries);
191 g_free(env_tlb(env)->f[mmu_idx].table);
192 g_free(env_tlb(env)->d[mmu_idx].iotlb);
194 tlb_window_reset(desc, now, 0);
195 /* desc->n_used_entries is cleared by the caller */
196 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
197 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
198 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
200 * If the allocations fail, try smaller sizes. We just freed some
201 * memory, so going back to half of new_size has a good chance of working.
202 * Increased memory pressure elsewhere in the system might cause the
203 * allocations to fail though, so we progressively reduce the allocation
204 * size, aborting if we cannot even allocate the smallest TLB we support.
206 while (env_tlb(env)->f[mmu_idx].table == NULL ||
207 env_tlb(env)->d[mmu_idx].iotlb == NULL) {
208 if (new_size == (1 << CPU_TLB_DYN_MIN_BITS)) {
209 error_report("%s: %s", __func__, strerror(errno));
212 new_size = MAX(new_size >> 1, 1 << CPU_TLB_DYN_MIN_BITS);
213 env_tlb(env)->f[mmu_idx].mask = (new_size - 1) << CPU_TLB_ENTRY_BITS;
215 g_free(env_tlb(env)->f[mmu_idx].table);
216 g_free(env_tlb(env)->d[mmu_idx].iotlb);
217 env_tlb(env)->f[mmu_idx].table = g_try_new(CPUTLBEntry, new_size);
218 env_tlb(env)->d[mmu_idx].iotlb = g_try_new(CPUIOTLBEntry, new_size);
222 static inline void tlb_table_flush_by_mmuidx(CPUArchState *env, int mmu_idx)
224 tlb_mmu_resize_locked(env, mmu_idx);
225 memset(env_tlb(env)->f[mmu_idx].table, -1, sizeof_tlb(env, mmu_idx));
226 env_tlb(env)->d[mmu_idx].n_used_entries = 0;
229 static inline void tlb_n_used_entries_inc(CPUArchState *env, uintptr_t mmu_idx)
231 env_tlb(env)->d[mmu_idx].n_used_entries++;
234 static inline void tlb_n_used_entries_dec(CPUArchState *env, uintptr_t mmu_idx)
236 env_tlb(env)->d[mmu_idx].n_used_entries--;
239 void tlb_init(CPUState *cpu)
241 CPUArchState *env = cpu->env_ptr;
243 qemu_spin_init(&env_tlb(env)->c.lock);
245 /* Ensure that cpu_reset performs a full flush. */
246 env_tlb(env)->c.dirty = ALL_MMUIDX_BITS;
251 /* flush_all_helper: run fn across all cpus
253 * If the wait flag is set then the src cpu's helper will be queued as
254 * "safe" work and the loop exited creating a synchronisation point
255 * where all queued work will be finished before execution starts
258 static void flush_all_helper(CPUState *src, run_on_cpu_func fn,
265 async_run_on_cpu(cpu, fn, d);
270 void tlb_flush_counts(size_t *pfull, size_t *ppart, size_t *pelide)
273 size_t full = 0, part = 0, elide = 0;
276 CPUArchState *env = cpu->env_ptr;
278 full += atomic_read(&env_tlb(env)->c.full_flush_count);
279 part += atomic_read(&env_tlb(env)->c.part_flush_count);
280 elide += atomic_read(&env_tlb(env)->c.elide_flush_count);
287 static void tlb_flush_one_mmuidx_locked(CPUArchState *env, int mmu_idx)
289 tlb_table_flush_by_mmuidx(env, mmu_idx);
290 env_tlb(env)->d[mmu_idx].large_page_addr = -1;
291 env_tlb(env)->d[mmu_idx].large_page_mask = -1;
292 env_tlb(env)->d[mmu_idx].vindex = 0;
293 memset(env_tlb(env)->d[mmu_idx].vtable, -1,
294 sizeof(env_tlb(env)->d[0].vtable));
297 static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
299 CPUArchState *env = cpu->env_ptr;
300 uint16_t asked = data.host_int;
301 uint16_t all_dirty, work, to_clean;
303 assert_cpu_is_self(cpu);
305 tlb_debug("mmu_idx:0x%04" PRIx16 "\n", asked);
307 qemu_spin_lock(&env_tlb(env)->c.lock);
309 all_dirty = env_tlb(env)->c.dirty;
310 to_clean = asked & all_dirty;
311 all_dirty &= ~to_clean;
312 env_tlb(env)->c.dirty = all_dirty;
314 for (work = to_clean; work != 0; work &= work - 1) {
315 int mmu_idx = ctz32(work);
316 tlb_flush_one_mmuidx_locked(env, mmu_idx);
319 qemu_spin_unlock(&env_tlb(env)->c.lock);
321 cpu_tb_jmp_cache_clear(cpu);
323 if (to_clean == ALL_MMUIDX_BITS) {
324 atomic_set(&env_tlb(env)->c.full_flush_count,
325 env_tlb(env)->c.full_flush_count + 1);
327 atomic_set(&env_tlb(env)->c.part_flush_count,
328 env_tlb(env)->c.part_flush_count + ctpop16(to_clean));
329 if (to_clean != asked) {
330 atomic_set(&env_tlb(env)->c.elide_flush_count,
331 env_tlb(env)->c.elide_flush_count +
332 ctpop16(asked & ~to_clean));
337 void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
339 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
341 if (cpu->created && !qemu_cpu_is_self(cpu)) {
342 async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
343 RUN_ON_CPU_HOST_INT(idxmap));
345 tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
349 void tlb_flush(CPUState *cpu)
351 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
354 void tlb_flush_by_mmuidx_all_cpus(CPUState *src_cpu, uint16_t idxmap)
356 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
358 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
360 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
361 fn(src_cpu, RUN_ON_CPU_HOST_INT(idxmap));
364 void tlb_flush_all_cpus(CPUState *src_cpu)
366 tlb_flush_by_mmuidx_all_cpus(src_cpu, ALL_MMUIDX_BITS);
369 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
371 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
373 tlb_debug("mmu_idx: 0x%"PRIx16"\n", idxmap);
375 flush_all_helper(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
376 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_HOST_INT(idxmap));
379 void tlb_flush_all_cpus_synced(CPUState *src_cpu)
381 tlb_flush_by_mmuidx_all_cpus_synced(src_cpu, ALL_MMUIDX_BITS);
384 static inline bool tlb_hit_page_anyprot(CPUTLBEntry *tlb_entry,
387 return tlb_hit_page(tlb_entry->addr_read, page) ||
388 tlb_hit_page(tlb_addr_write(tlb_entry), page) ||
389 tlb_hit_page(tlb_entry->addr_code, page);
393 * tlb_entry_is_empty - return true if the entry is not in use
394 * @te: pointer to CPUTLBEntry
396 static inline bool tlb_entry_is_empty(const CPUTLBEntry *te)
398 return te->addr_read == -1 && te->addr_write == -1 && te->addr_code == -1;
401 /* Called with tlb_c.lock held */
402 static inline bool tlb_flush_entry_locked(CPUTLBEntry *tlb_entry,
405 if (tlb_hit_page_anyprot(tlb_entry, page)) {
406 memset(tlb_entry, -1, sizeof(*tlb_entry));
412 /* Called with tlb_c.lock held */
413 static inline void tlb_flush_vtlb_page_locked(CPUArchState *env, int mmu_idx,
416 CPUTLBDesc *d = &env_tlb(env)->d[mmu_idx];
419 assert_cpu_is_self(env_cpu(env));
420 for (k = 0; k < CPU_VTLB_SIZE; k++) {
421 if (tlb_flush_entry_locked(&d->vtable[k], page)) {
422 tlb_n_used_entries_dec(env, mmu_idx);
427 static void tlb_flush_page_locked(CPUArchState *env, int midx,
430 target_ulong lp_addr = env_tlb(env)->d[midx].large_page_addr;
431 target_ulong lp_mask = env_tlb(env)->d[midx].large_page_mask;
433 /* Check if we need to flush due to large pages. */
434 if ((page & lp_mask) == lp_addr) {
435 tlb_debug("forcing full flush midx %d ("
436 TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
437 midx, lp_addr, lp_mask);
438 tlb_flush_one_mmuidx_locked(env, midx);
440 if (tlb_flush_entry_locked(tlb_entry(env, midx, page), page)) {
441 tlb_n_used_entries_dec(env, midx);
443 tlb_flush_vtlb_page_locked(env, midx, page);
447 /* As we are going to hijack the bottom bits of the page address for a
448 * mmuidx bit mask we need to fail to build if we can't do that
450 QEMU_BUILD_BUG_ON(NB_MMU_MODES > TARGET_PAGE_BITS_MIN);
452 static void tlb_flush_page_by_mmuidx_async_work(CPUState *cpu,
453 run_on_cpu_data data)
455 CPUArchState *env = cpu->env_ptr;
456 target_ulong addr_and_mmuidx = (target_ulong) data.target_ptr;
457 target_ulong addr = addr_and_mmuidx & TARGET_PAGE_MASK;
458 unsigned long mmu_idx_bitmap = addr_and_mmuidx & ALL_MMUIDX_BITS;
461 assert_cpu_is_self(cpu);
463 tlb_debug("page addr:" TARGET_FMT_lx " mmu_map:0x%lx\n",
464 addr, mmu_idx_bitmap);
466 qemu_spin_lock(&env_tlb(env)->c.lock);
467 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
468 if (test_bit(mmu_idx, &mmu_idx_bitmap)) {
469 tlb_flush_page_locked(env, mmu_idx, addr);
472 qemu_spin_unlock(&env_tlb(env)->c.lock);
474 tb_flush_jmp_cache(cpu, addr);
477 void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, uint16_t idxmap)
479 target_ulong addr_and_mmu_idx;
481 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%" PRIx16 "\n", addr, idxmap);
483 /* This should already be page aligned */
484 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
485 addr_and_mmu_idx |= idxmap;
487 if (!qemu_cpu_is_self(cpu)) {
488 async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_work,
489 RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
491 tlb_flush_page_by_mmuidx_async_work(
492 cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
496 void tlb_flush_page(CPUState *cpu, target_ulong addr)
498 tlb_flush_page_by_mmuidx(cpu, addr, ALL_MMUIDX_BITS);
501 void tlb_flush_page_by_mmuidx_all_cpus(CPUState *src_cpu, target_ulong addr,
504 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
505 target_ulong addr_and_mmu_idx;
507 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
509 /* This should already be page aligned */
510 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
511 addr_and_mmu_idx |= idxmap;
513 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
514 fn(src_cpu, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
517 void tlb_flush_page_all_cpus(CPUState *src, target_ulong addr)
519 tlb_flush_page_by_mmuidx_all_cpus(src, addr, ALL_MMUIDX_BITS);
522 void tlb_flush_page_by_mmuidx_all_cpus_synced(CPUState *src_cpu,
526 const run_on_cpu_func fn = tlb_flush_page_by_mmuidx_async_work;
527 target_ulong addr_and_mmu_idx;
529 tlb_debug("addr: "TARGET_FMT_lx" mmu_idx:%"PRIx16"\n", addr, idxmap);
531 /* This should already be page aligned */
532 addr_and_mmu_idx = addr & TARGET_PAGE_MASK;
533 addr_and_mmu_idx |= idxmap;
535 flush_all_helper(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
536 async_safe_run_on_cpu(src_cpu, fn, RUN_ON_CPU_TARGET_PTR(addr_and_mmu_idx));
539 void tlb_flush_page_all_cpus_synced(CPUState *src, target_ulong addr)
541 tlb_flush_page_by_mmuidx_all_cpus_synced(src, addr, ALL_MMUIDX_BITS);
544 /* update the TLBs so that writes to code in the virtual page 'addr'
546 void tlb_protect_code(ram_addr_t ram_addr)
548 cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
552 /* update the TLB so that writes in physical page 'phys_addr' are no longer
553 tested for self modifying code */
554 void tlb_unprotect_code(ram_addr_t ram_addr)
556 cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
561 * Dirty write flag handling
563 * When the TCG code writes to a location it looks up the address in
564 * the TLB and uses that data to compute the final address. If any of
565 * the lower bits of the address are set then the slow path is forced.
566 * There are a number of reasons to do this but for normal RAM the
567 * most usual is detecting writes to code regions which may invalidate
570 * Other vCPUs might be reading their TLBs during guest execution, so we update
571 * te->addr_write with atomic_set. We don't need to worry about this for
572 * oversized guests as MTTCG is disabled for them.
574 * Called with tlb_c.lock held.
576 static void tlb_reset_dirty_range_locked(CPUTLBEntry *tlb_entry,
577 uintptr_t start, uintptr_t length)
579 uintptr_t addr = tlb_entry->addr_write;
581 if ((addr & (TLB_INVALID_MASK | TLB_MMIO |
582 TLB_DISCARD_WRITE | TLB_NOTDIRTY)) == 0) {
583 addr &= TARGET_PAGE_MASK;
584 addr += tlb_entry->addend;
585 if ((addr - start) < length) {
586 #if TCG_OVERSIZED_GUEST
587 tlb_entry->addr_write |= TLB_NOTDIRTY;
589 atomic_set(&tlb_entry->addr_write,
590 tlb_entry->addr_write | TLB_NOTDIRTY);
597 * Called with tlb_c.lock held.
598 * Called only from the vCPU context, i.e. the TLB's owner thread.
600 static inline void copy_tlb_helper_locked(CPUTLBEntry *d, const CPUTLBEntry *s)
605 /* This is a cross vCPU call (i.e. another vCPU resetting the flags of
607 * We must take tlb_c.lock to avoid racing with another vCPU update. The only
608 * thing actually updated is the target TLB entry ->addr_write flags.
610 void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
617 qemu_spin_lock(&env_tlb(env)->c.lock);
618 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
620 unsigned int n = tlb_n_entries(env, mmu_idx);
622 for (i = 0; i < n; i++) {
623 tlb_reset_dirty_range_locked(&env_tlb(env)->f[mmu_idx].table[i],
627 for (i = 0; i < CPU_VTLB_SIZE; i++) {
628 tlb_reset_dirty_range_locked(&env_tlb(env)->d[mmu_idx].vtable[i],
632 qemu_spin_unlock(&env_tlb(env)->c.lock);
635 /* Called with tlb_c.lock held */
636 static inline void tlb_set_dirty1_locked(CPUTLBEntry *tlb_entry,
639 if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
640 tlb_entry->addr_write = vaddr;
644 /* update the TLB corresponding to virtual page vaddr
645 so that it is no longer dirty */
646 void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
648 CPUArchState *env = cpu->env_ptr;
651 assert_cpu_is_self(cpu);
653 vaddr &= TARGET_PAGE_MASK;
654 qemu_spin_lock(&env_tlb(env)->c.lock);
655 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
656 tlb_set_dirty1_locked(tlb_entry(env, mmu_idx, vaddr), vaddr);
659 for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
661 for (k = 0; k < CPU_VTLB_SIZE; k++) {
662 tlb_set_dirty1_locked(&env_tlb(env)->d[mmu_idx].vtable[k], vaddr);
665 qemu_spin_unlock(&env_tlb(env)->c.lock);
668 /* Our TLB does not support large pages, so remember the area covered by
669 large pages and trigger a full TLB flush if these are invalidated. */
670 static void tlb_add_large_page(CPUArchState *env, int mmu_idx,
671 target_ulong vaddr, target_ulong size)
673 target_ulong lp_addr = env_tlb(env)->d[mmu_idx].large_page_addr;
674 target_ulong lp_mask = ~(size - 1);
676 if (lp_addr == (target_ulong)-1) {
677 /* No previous large page. */
680 /* Extend the existing region to include the new page.
681 This is a compromise between unnecessary flushes and
682 the cost of maintaining a full variable size TLB. */
683 lp_mask &= env_tlb(env)->d[mmu_idx].large_page_mask;
684 while (((lp_addr ^ vaddr) & lp_mask) != 0) {
688 env_tlb(env)->d[mmu_idx].large_page_addr = lp_addr & lp_mask;
689 env_tlb(env)->d[mmu_idx].large_page_mask = lp_mask;
692 /* Add a new TLB entry. At most one entry for a given virtual address
693 * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
694 * supplied size is only used by tlb_flush_page.
696 * Called from TCG-generated code, which is under an RCU read-side
699 void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
700 hwaddr paddr, MemTxAttrs attrs, int prot,
701 int mmu_idx, target_ulong size)
703 CPUArchState *env = cpu->env_ptr;
704 CPUTLB *tlb = env_tlb(env);
705 CPUTLBDesc *desc = &tlb->d[mmu_idx];
706 MemoryRegionSection *section;
708 target_ulong address;
709 target_ulong write_address;
712 hwaddr iotlb, xlat, sz, paddr_page;
713 target_ulong vaddr_page;
714 int asidx = cpu_asidx_from_attrs(cpu, attrs);
716 bool is_ram, is_romd;
718 assert_cpu_is_self(cpu);
720 if (size <= TARGET_PAGE_SIZE) {
721 sz = TARGET_PAGE_SIZE;
723 tlb_add_large_page(env, mmu_idx, vaddr, size);
726 vaddr_page = vaddr & TARGET_PAGE_MASK;
727 paddr_page = paddr & TARGET_PAGE_MASK;
729 section = address_space_translate_for_iotlb(cpu, asidx, paddr_page,
730 &xlat, &sz, attrs, &prot);
731 assert(sz >= TARGET_PAGE_SIZE);
733 tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
735 vaddr, paddr, prot, mmu_idx);
737 address = vaddr_page;
738 if (size < TARGET_PAGE_SIZE) {
739 /* Repeat the MMU check and TLB fill on every access. */
740 address |= TLB_INVALID_MASK;
742 if (attrs.byte_swap) {
743 address |= TLB_BSWAP;
746 is_ram = memory_region_is_ram(section->mr);
747 is_romd = memory_region_is_romd(section->mr);
749 if (is_ram || is_romd) {
750 /* RAM and ROMD both have associated host memory. */
751 addend = (uintptr_t)memory_region_get_ram_ptr(section->mr) + xlat;
753 /* I/O does not; force the host address to NULL. */
757 write_address = address;
759 iotlb = memory_region_get_ram_addr(section->mr) + xlat;
761 * Computing is_clean is expensive; avoid all that unless
762 * the page is actually writable.
764 if (prot & PAGE_WRITE) {
765 if (section->readonly) {
766 write_address |= TLB_DISCARD_WRITE;
767 } else if (cpu_physical_memory_is_clean(iotlb)) {
768 write_address |= TLB_NOTDIRTY;
773 iotlb = memory_region_section_get_iotlb(cpu, section) + xlat;
775 * Writes to romd devices must go through MMIO to enable write.
776 * Reads to romd devices go through the ram_ptr found above,
777 * but of course reads to I/O must go through MMIO.
779 write_address |= TLB_MMIO;
781 address = write_address;
785 wp_flags = cpu_watchpoint_address_matches(cpu, vaddr_page,
788 index = tlb_index(env, mmu_idx, vaddr_page);
789 te = tlb_entry(env, mmu_idx, vaddr_page);
792 * Hold the TLB lock for the rest of the function. We could acquire/release
793 * the lock several times in the function, but it is faster to amortize the
794 * acquisition cost by acquiring it just once. Note that this leads to
795 * a longer critical section, but this is not a concern since the TLB lock
796 * is unlikely to be contended.
798 qemu_spin_lock(&tlb->c.lock);
800 /* Note that the tlb is no longer clean. */
801 tlb->c.dirty |= 1 << mmu_idx;
803 /* Make sure there's no cached translation for the new page. */
804 tlb_flush_vtlb_page_locked(env, mmu_idx, vaddr_page);
807 * Only evict the old entry to the victim tlb if it's for a
808 * different page; otherwise just overwrite the stale data.
810 if (!tlb_hit_page_anyprot(te, vaddr_page) && !tlb_entry_is_empty(te)) {
811 unsigned vidx = desc->vindex++ % CPU_VTLB_SIZE;
812 CPUTLBEntry *tv = &desc->vtable[vidx];
814 /* Evict the old entry into the victim tlb. */
815 copy_tlb_helper_locked(tv, te);
816 desc->viotlb[vidx] = desc->iotlb[index];
817 tlb_n_used_entries_dec(env, mmu_idx);
822 * At this point iotlb contains a physical section number in the lower
823 * TARGET_PAGE_BITS, and either
824 * + the ram_addr_t of the page base of the target RAM (RAM)
825 * + the offset within section->mr of the page base (I/O, ROMD)
826 * We subtract the vaddr_page (which is page aligned and thus won't
827 * disturb the low bits) to give an offset which can be added to the
828 * (non-page-aligned) vaddr of the eventual memory access to get
829 * the MemoryRegion offset for the access. Note that the vaddr we
830 * subtract here is that of the page base, and not the same as the
831 * vaddr we add back in io_readx()/io_writex()/get_page_addr_code().
833 desc->iotlb[index].addr = iotlb - vaddr_page;
834 desc->iotlb[index].attrs = attrs;
836 /* Now calculate the new entry */
837 tn.addend = addend - vaddr_page;
838 if (prot & PAGE_READ) {
839 tn.addr_read = address;
840 if (wp_flags & BP_MEM_READ) {
841 tn.addr_read |= TLB_WATCHPOINT;
847 if (prot & PAGE_EXEC) {
848 tn.addr_code = address;
854 if (prot & PAGE_WRITE) {
855 tn.addr_write = write_address;
856 if (prot & PAGE_WRITE_INV) {
857 tn.addr_write |= TLB_INVALID_MASK;
859 if (wp_flags & BP_MEM_WRITE) {
860 tn.addr_write |= TLB_WATCHPOINT;
864 copy_tlb_helper_locked(te, &tn);
865 tlb_n_used_entries_inc(env, mmu_idx);
866 qemu_spin_unlock(&tlb->c.lock);
869 /* Add a new TLB entry, but without specifying the memory
870 * transaction attributes to be used.
872 void tlb_set_page(CPUState *cpu, target_ulong vaddr,
873 hwaddr paddr, int prot,
874 int mmu_idx, target_ulong size)
876 tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
877 prot, mmu_idx, size);
880 static inline ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr)
884 ram_addr = qemu_ram_addr_from_host(ptr);
885 if (ram_addr == RAM_ADDR_INVALID) {
886 error_report("Bad ram pointer %p", ptr);
893 * Note: tlb_fill() can trigger a resize of the TLB. This means that all of the
894 * caller's prior references to the TLB table (e.g. CPUTLBEntry pointers) must
895 * be discarded and looked up again (e.g. via tlb_entry()).
897 static void tlb_fill(CPUState *cpu, target_ulong addr, int size,
898 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
900 CPUClass *cc = CPU_GET_CLASS(cpu);
904 * This is not a probe, so only valid return is success; failure
905 * should result in exception + longjmp to the cpu loop.
907 ok = cc->tlb_fill(cpu, addr, size, access_type, mmu_idx, false, retaddr);
911 static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
912 int mmu_idx, target_ulong addr, uintptr_t retaddr,
913 MMUAccessType access_type, MemOp op)
915 CPUState *cpu = env_cpu(env);
917 MemoryRegionSection *section;
923 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
925 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
926 cpu->mem_io_pc = retaddr;
927 if (!cpu->can_do_io) {
928 cpu_io_recompile(cpu, retaddr);
931 cpu->mem_io_access_type = access_type;
933 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
934 qemu_mutex_lock_iothread();
937 r = memory_region_dispatch_read(mr, mr_offset, &val, op, iotlbentry->attrs);
939 hwaddr physaddr = mr_offset +
940 section->offset_within_address_space -
941 section->offset_within_region;
943 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op), access_type,
944 mmu_idx, iotlbentry->attrs, r, retaddr);
947 qemu_mutex_unlock_iothread();
953 static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
954 int mmu_idx, uint64_t val, target_ulong addr,
955 uintptr_t retaddr, MemOp op)
957 CPUState *cpu = env_cpu(env);
959 MemoryRegionSection *section;
964 section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs);
966 mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr;
967 if (!cpu->can_do_io) {
968 cpu_io_recompile(cpu, retaddr);
970 cpu->mem_io_pc = retaddr;
972 if (mr->global_locking && !qemu_mutex_iothread_locked()) {
973 qemu_mutex_lock_iothread();
976 r = memory_region_dispatch_write(mr, mr_offset, val, op, iotlbentry->attrs);
978 hwaddr physaddr = mr_offset +
979 section->offset_within_address_space -
980 section->offset_within_region;
982 cpu_transaction_failed(cpu, physaddr, addr, memop_size(op),
983 MMU_DATA_STORE, mmu_idx, iotlbentry->attrs, r,
987 qemu_mutex_unlock_iothread();
991 static inline target_ulong tlb_read_ofs(CPUTLBEntry *entry, size_t ofs)
993 #if TCG_OVERSIZED_GUEST
994 return *(target_ulong *)((uintptr_t)entry + ofs);
996 /* ofs might correspond to .addr_write, so use atomic_read */
997 return atomic_read((target_ulong *)((uintptr_t)entry + ofs));
1001 /* Return true if ADDR is present in the victim tlb, and has been copied
1002 back to the main tlb. */
1003 static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
1004 size_t elt_ofs, target_ulong page)
1008 assert_cpu_is_self(env_cpu(env));
1009 for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
1010 CPUTLBEntry *vtlb = &env_tlb(env)->d[mmu_idx].vtable[vidx];
1013 /* elt_ofs might correspond to .addr_write, so use atomic_read */
1014 #if TCG_OVERSIZED_GUEST
1015 cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
1017 cmp = atomic_read((target_ulong *)((uintptr_t)vtlb + elt_ofs));
1021 /* Found entry in victim tlb, swap tlb and iotlb. */
1022 CPUTLBEntry tmptlb, *tlb = &env_tlb(env)->f[mmu_idx].table[index];
1024 qemu_spin_lock(&env_tlb(env)->c.lock);
1025 copy_tlb_helper_locked(&tmptlb, tlb);
1026 copy_tlb_helper_locked(tlb, vtlb);
1027 copy_tlb_helper_locked(vtlb, &tmptlb);
1028 qemu_spin_unlock(&env_tlb(env)->c.lock);
1030 CPUIOTLBEntry tmpio, *io = &env_tlb(env)->d[mmu_idx].iotlb[index];
1031 CPUIOTLBEntry *vio = &env_tlb(env)->d[mmu_idx].viotlb[vidx];
1032 tmpio = *io; *io = *vio; *vio = tmpio;
1039 /* Macro to call the above, with local variables from the use context. */
1040 #define VICTIM_TLB_HIT(TY, ADDR) \
1041 victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
1042 (ADDR) & TARGET_PAGE_MASK)
1045 * Return a ram_addr_t for the virtual address for execution.
1047 * Return -1 if we can't translate and execute from an entire page
1048 * of RAM. This will force us to execute by loading and translating
1049 * one insn at a time, without caching.
1051 * NOTE: This function will trigger an exception if the page is
1054 tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
1057 uintptr_t mmu_idx = cpu_mmu_index(env, true);
1058 uintptr_t index = tlb_index(env, mmu_idx, addr);
1059 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1062 if (unlikely(!tlb_hit(entry->addr_code, addr))) {
1063 if (!VICTIM_TLB_HIT(addr_code, addr)) {
1064 tlb_fill(env_cpu(env), addr, 0, MMU_INST_FETCH, mmu_idx, 0);
1065 index = tlb_index(env, mmu_idx, addr);
1066 entry = tlb_entry(env, mmu_idx, addr);
1068 if (unlikely(entry->addr_code & TLB_INVALID_MASK)) {
1070 * The MMU protection covers a smaller range than a target
1071 * page, so we must redo the MMU check for every insn.
1076 assert(tlb_hit(entry->addr_code, addr));
1079 if (unlikely(entry->addr_code & TLB_MMIO)) {
1080 /* The region is not backed by RAM. */
1087 p = (void *)((uintptr_t)addr + entry->addend);
1091 return qemu_ram_addr_from_host_nofail(p);
1094 tb_page_addr_t get_page_addr_code(CPUArchState *env, target_ulong addr)
1096 return get_page_addr_code_hostp(env, addr, NULL);
1099 static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
1100 CPUIOTLBEntry *iotlbentry, uintptr_t retaddr)
1102 ram_addr_t ram_addr = mem_vaddr + iotlbentry->addr;
1104 trace_memory_notdirty_write_access(mem_vaddr, ram_addr, size);
1106 if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
1107 struct page_collection *pages
1108 = page_collection_lock(ram_addr, ram_addr + size);
1109 tb_invalidate_phys_page_fast(pages, ram_addr, size, retaddr);
1110 page_collection_unlock(pages);
1114 * Set both VGA and migration bits for simplicity and to remove
1115 * the notdirty callback faster.
1117 cpu_physical_memory_set_dirty_range(ram_addr, size, DIRTY_CLIENTS_NOCODE);
1119 /* We remove the notdirty callback only if the code has been flushed. */
1120 if (!cpu_physical_memory_is_clean(ram_addr)) {
1121 trace_memory_notdirty_set_dirty(mem_vaddr);
1122 tlb_set_dirty(cpu, mem_vaddr);
1127 * Probe for whether the specified guest access is permitted. If it is not
1128 * permitted then an exception will be taken in the same way as if this
1129 * were a real access (and we will not return).
1130 * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
1131 * returns the address of the host page similar to tlb_vaddr_to_host().
1133 void *probe_access(CPUArchState *env, target_ulong addr, int size,
1134 MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
1136 uintptr_t index = tlb_index(env, mmu_idx, addr);
1137 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1138 target_ulong tlb_addr;
1142 g_assert(-(addr | TARGET_PAGE_MASK) >= size);
1144 switch (access_type) {
1146 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1147 wp_access = BP_MEM_READ;
1149 case MMU_DATA_STORE:
1150 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1151 wp_access = BP_MEM_WRITE;
1153 case MMU_INST_FETCH:
1154 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1155 wp_access = BP_MEM_READ;
1158 g_assert_not_reached();
1160 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1162 if (unlikely(!tlb_hit(tlb_addr, addr))) {
1163 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
1164 addr & TARGET_PAGE_MASK)) {
1165 tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
1166 /* TLB resize via tlb_fill may have moved the entry. */
1167 index = tlb_index(env, mmu_idx, addr);
1168 entry = tlb_entry(env, mmu_idx, addr);
1170 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1177 if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
1178 CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1180 /* Reject I/O access, or other required slow-path. */
1181 if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
1185 /* Handle watchpoints. */
1186 if (tlb_addr & TLB_WATCHPOINT) {
1187 cpu_check_watchpoint(env_cpu(env), addr, size,
1188 iotlbentry->attrs, wp_access, retaddr);
1191 /* Handle clean RAM pages. */
1192 if (tlb_addr & TLB_NOTDIRTY) {
1193 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1197 return (void *)((uintptr_t)addr + entry->addend);
1200 void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
1201 MMUAccessType access_type, int mmu_idx)
1203 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1204 uintptr_t tlb_addr, page;
1207 switch (access_type) {
1209 elt_ofs = offsetof(CPUTLBEntry, addr_read);
1211 case MMU_DATA_STORE:
1212 elt_ofs = offsetof(CPUTLBEntry, addr_write);
1214 case MMU_INST_FETCH:
1215 elt_ofs = offsetof(CPUTLBEntry, addr_code);
1218 g_assert_not_reached();
1221 page = addr & TARGET_PAGE_MASK;
1222 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1224 if (!tlb_hit_page(tlb_addr, page)) {
1225 uintptr_t index = tlb_index(env, mmu_idx, addr);
1227 if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
1228 CPUState *cs = env_cpu(env);
1229 CPUClass *cc = CPU_GET_CLASS(cs);
1231 if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
1232 /* Non-faulting page table read failed. */
1236 /* TLB resize via tlb_fill may have moved the entry. */
1237 entry = tlb_entry(env, mmu_idx, addr);
1239 tlb_addr = tlb_read_ofs(entry, elt_ofs);
1242 if (tlb_addr & ~TARGET_PAGE_MASK) {
1247 return (void *)((uintptr_t)addr + entry->addend);
1250 /* Probe for a read-modify-write atomic operation. Do not allow unaligned
1251 * operations, or io operations to proceed. Return the host address. */
1252 static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
1253 TCGMemOpIdx oi, uintptr_t retaddr)
1255 size_t mmu_idx = get_mmuidx(oi);
1256 uintptr_t index = tlb_index(env, mmu_idx, addr);
1257 CPUTLBEntry *tlbe = tlb_entry(env, mmu_idx, addr);
1258 target_ulong tlb_addr = tlb_addr_write(tlbe);
1259 MemOp mop = get_memop(oi);
1260 int a_bits = get_alignment_bits(mop);
1261 int s_bits = mop & MO_SIZE;
1264 /* Adjust the given return address. */
1265 retaddr -= GETPC_ADJ;
1267 /* Enforce guest required alignment. */
1268 if (unlikely(a_bits > 0 && (addr & ((1 << a_bits) - 1)))) {
1269 /* ??? Maybe indicate atomic op to cpu_unaligned_access */
1270 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1274 /* Enforce qemu required alignment. */
1275 if (unlikely(addr & ((1 << s_bits) - 1))) {
1276 /* We get here if guest alignment was not requested,
1277 or was not enforced by cpu_unaligned_access above.
1278 We might widen the access and emulate, but for now
1279 mark an exception and exit the cpu loop. */
1280 goto stop_the_world;
1283 /* Check TLB entry and enforce page permissions. */
1284 if (!tlb_hit(tlb_addr, addr)) {
1285 if (!VICTIM_TLB_HIT(addr_write, addr)) {
1286 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_STORE,
1288 index = tlb_index(env, mmu_idx, addr);
1289 tlbe = tlb_entry(env, mmu_idx, addr);
1291 tlb_addr = tlb_addr_write(tlbe) & ~TLB_INVALID_MASK;
1294 /* Notice an IO access or a needs-MMU-lookup access */
1295 if (unlikely(tlb_addr & TLB_MMIO)) {
1296 /* There's really nothing that can be done to
1297 support this apart from stop-the-world. */
1298 goto stop_the_world;
1301 /* Let the guest notice RMW on a write-only page. */
1302 if (unlikely(tlbe->addr_read != (tlb_addr & ~TLB_NOTDIRTY))) {
1303 tlb_fill(env_cpu(env), addr, 1 << s_bits, MMU_DATA_LOAD,
1305 /* Since we don't support reads and writes to different addresses,
1306 and we do have the proper page loaded for write, this shouldn't
1307 ever return. But just in case, handle via stop-the-world. */
1308 goto stop_the_world;
1311 hostaddr = (void *)((uintptr_t)addr + tlbe->addend);
1313 if (unlikely(tlb_addr & TLB_NOTDIRTY)) {
1314 notdirty_write(env_cpu(env), addr, 1 << s_bits,
1315 &env_tlb(env)->d[mmu_idx].iotlb[index], retaddr);
1321 cpu_loop_exit_atomic(env_cpu(env), retaddr);
1327 * We support two different access types. SOFTMMU_CODE_ACCESS is
1328 * specifically for reading instructions from system memory. It is
1329 * called by the translation loop and in some helpers where the code
1330 * is disassembled. It shouldn't be called directly by guest code.
1333 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
1334 TCGMemOpIdx oi, uintptr_t retaddr);
1336 static inline uint64_t QEMU_ALWAYS_INLINE
1337 load_memop(const void *haddr, MemOp op)
1341 return ldub_p(haddr);
1343 return lduw_be_p(haddr);
1345 return lduw_le_p(haddr);
1347 return (uint32_t)ldl_be_p(haddr);
1349 return (uint32_t)ldl_le_p(haddr);
1351 return ldq_be_p(haddr);
1353 return ldq_le_p(haddr);
1355 qemu_build_not_reached();
1359 static inline uint64_t QEMU_ALWAYS_INLINE
1360 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
1361 uintptr_t retaddr, MemOp op, bool code_read,
1362 FullLoadHelper *full_load)
1364 uintptr_t mmu_idx = get_mmuidx(oi);
1365 uintptr_t index = tlb_index(env, mmu_idx, addr);
1366 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1367 target_ulong tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1368 const size_t tlb_off = code_read ?
1369 offsetof(CPUTLBEntry, addr_code) : offsetof(CPUTLBEntry, addr_read);
1370 const MMUAccessType access_type =
1371 code_read ? MMU_INST_FETCH : MMU_DATA_LOAD;
1372 unsigned a_bits = get_alignment_bits(get_memop(oi));
1375 size_t size = memop_size(op);
1377 /* Handle CPU specific unaligned behaviour */
1378 if (addr & ((1 << a_bits) - 1)) {
1379 cpu_unaligned_access(env_cpu(env), addr, access_type,
1383 /* If the TLB entry is for a different page, reload and try again. */
1384 if (!tlb_hit(tlb_addr, addr)) {
1385 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1386 addr & TARGET_PAGE_MASK)) {
1387 tlb_fill(env_cpu(env), addr, size,
1388 access_type, mmu_idx, retaddr);
1389 index = tlb_index(env, mmu_idx, addr);
1390 entry = tlb_entry(env, mmu_idx, addr);
1392 tlb_addr = code_read ? entry->addr_code : entry->addr_read;
1393 tlb_addr &= ~TLB_INVALID_MASK;
1396 /* Handle anything that isn't just a straight memory access. */
1397 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1398 CPUIOTLBEntry *iotlbentry;
1401 /* For anything that is unaligned, recurse through full_load. */
1402 if ((addr & (size - 1)) != 0) {
1403 goto do_unaligned_access;
1406 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1408 /* Handle watchpoints. */
1409 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1410 /* On watchpoint hit, this will longjmp out. */
1411 cpu_check_watchpoint(env_cpu(env), addr, size,
1412 iotlbentry->attrs, BP_MEM_READ, retaddr);
1415 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1417 /* Handle I/O access. */
1418 if (likely(tlb_addr & TLB_MMIO)) {
1419 return io_readx(env, iotlbentry, mmu_idx, addr, retaddr,
1420 access_type, op ^ (need_swap * MO_BSWAP));
1423 haddr = (void *)((uintptr_t)addr + entry->addend);
1426 * Keep these two load_memop separate to ensure that the compiler
1427 * is able to fold the entire function to a single instruction.
1428 * There is a build-time assert inside to remind you of this. ;-)
1430 if (unlikely(need_swap)) {
1431 return load_memop(haddr, op ^ MO_BSWAP);
1433 return load_memop(haddr, op);
1436 /* Handle slow unaligned access (it spans two pages or IO). */
1438 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1439 >= TARGET_PAGE_SIZE)) {
1440 target_ulong addr1, addr2;
1443 do_unaligned_access:
1444 addr1 = addr & ~((target_ulong)size - 1);
1445 addr2 = addr1 + size;
1446 r1 = full_load(env, addr1, oi, retaddr);
1447 r2 = full_load(env, addr2, oi, retaddr);
1448 shift = (addr & (size - 1)) * 8;
1450 if (memop_big_endian(op)) {
1451 /* Big-endian combine. */
1452 res = (r1 << shift) | (r2 >> ((size * 8) - shift));
1454 /* Little-endian combine. */
1455 res = (r1 >> shift) | (r2 << ((size * 8) - shift));
1457 return res & MAKE_64BIT_MASK(0, size * 8);
1460 haddr = (void *)((uintptr_t)addr + entry->addend);
1461 return load_memop(haddr, op);
1465 * For the benefit of TCG generated code, we want to avoid the
1466 * complication of ABI-specific return type promotion and always
1467 * return a value extended to the register size of the host. This is
1468 * tcg_target_long, except in the case of a 32-bit host and 64-bit
1469 * data, and for that we always have uint64_t.
1471 * We don't bother with this widened value for SOFTMMU_CODE_ACCESS.
1474 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
1475 TCGMemOpIdx oi, uintptr_t retaddr)
1477 return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
1480 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
1481 TCGMemOpIdx oi, uintptr_t retaddr)
1483 return full_ldub_mmu(env, addr, oi, retaddr);
1486 static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1487 TCGMemOpIdx oi, uintptr_t retaddr)
1489 return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
1493 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
1494 TCGMemOpIdx oi, uintptr_t retaddr)
1496 return full_le_lduw_mmu(env, addr, oi, retaddr);
1499 static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1500 TCGMemOpIdx oi, uintptr_t retaddr)
1502 return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
1506 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
1507 TCGMemOpIdx oi, uintptr_t retaddr)
1509 return full_be_lduw_mmu(env, addr, oi, retaddr);
1512 static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1513 TCGMemOpIdx oi, uintptr_t retaddr)
1515 return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
1519 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
1520 TCGMemOpIdx oi, uintptr_t retaddr)
1522 return full_le_ldul_mmu(env, addr, oi, retaddr);
1525 static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1526 TCGMemOpIdx oi, uintptr_t retaddr)
1528 return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
1532 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
1533 TCGMemOpIdx oi, uintptr_t retaddr)
1535 return full_be_ldul_mmu(env, addr, oi, retaddr);
1538 uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
1539 TCGMemOpIdx oi, uintptr_t retaddr)
1541 return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
1545 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
1546 TCGMemOpIdx oi, uintptr_t retaddr)
1548 return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
1553 * Provide signed versions of the load routines as well. We can of course
1554 * avoid this for 64-bit data, or for 32-bit data on 32-bit host.
1558 tcg_target_ulong helper_ret_ldsb_mmu(CPUArchState *env, target_ulong addr,
1559 TCGMemOpIdx oi, uintptr_t retaddr)
1561 return (int8_t)helper_ret_ldub_mmu(env, addr, oi, retaddr);
1564 tcg_target_ulong helper_le_ldsw_mmu(CPUArchState *env, target_ulong addr,
1565 TCGMemOpIdx oi, uintptr_t retaddr)
1567 return (int16_t)helper_le_lduw_mmu(env, addr, oi, retaddr);
1570 tcg_target_ulong helper_be_ldsw_mmu(CPUArchState *env, target_ulong addr,
1571 TCGMemOpIdx oi, uintptr_t retaddr)
1573 return (int16_t)helper_be_lduw_mmu(env, addr, oi, retaddr);
1576 tcg_target_ulong helper_le_ldsl_mmu(CPUArchState *env, target_ulong addr,
1577 TCGMemOpIdx oi, uintptr_t retaddr)
1579 return (int32_t)helper_le_ldul_mmu(env, addr, oi, retaddr);
1582 tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
1583 TCGMemOpIdx oi, uintptr_t retaddr)
1585 return (int32_t)helper_be_ldul_mmu(env, addr, oi, retaddr);
1592 static inline void QEMU_ALWAYS_INLINE
1593 store_memop(void *haddr, uint64_t val, MemOp op)
1600 stw_be_p(haddr, val);
1603 stw_le_p(haddr, val);
1606 stl_be_p(haddr, val);
1609 stl_le_p(haddr, val);
1612 stq_be_p(haddr, val);
1615 stq_le_p(haddr, val);
1618 qemu_build_not_reached();
1622 static inline void QEMU_ALWAYS_INLINE
1623 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
1624 TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
1626 uintptr_t mmu_idx = get_mmuidx(oi);
1627 uintptr_t index = tlb_index(env, mmu_idx, addr);
1628 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
1629 target_ulong tlb_addr = tlb_addr_write(entry);
1630 const size_t tlb_off = offsetof(CPUTLBEntry, addr_write);
1631 unsigned a_bits = get_alignment_bits(get_memop(oi));
1633 size_t size = memop_size(op);
1635 /* Handle CPU specific unaligned behaviour */
1636 if (addr & ((1 << a_bits) - 1)) {
1637 cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
1641 /* If the TLB entry is for a different page, reload and try again. */
1642 if (!tlb_hit(tlb_addr, addr)) {
1643 if (!victim_tlb_hit(env, mmu_idx, index, tlb_off,
1644 addr & TARGET_PAGE_MASK)) {
1645 tlb_fill(env_cpu(env), addr, size, MMU_DATA_STORE,
1647 index = tlb_index(env, mmu_idx, addr);
1648 entry = tlb_entry(env, mmu_idx, addr);
1650 tlb_addr = tlb_addr_write(entry) & ~TLB_INVALID_MASK;
1653 /* Handle anything that isn't just a straight memory access. */
1654 if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
1655 CPUIOTLBEntry *iotlbentry;
1658 /* For anything that is unaligned, recurse through byte stores. */
1659 if ((addr & (size - 1)) != 0) {
1660 goto do_unaligned_access;
1663 iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
1665 /* Handle watchpoints. */
1666 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1667 /* On watchpoint hit, this will longjmp out. */
1668 cpu_check_watchpoint(env_cpu(env), addr, size,
1669 iotlbentry->attrs, BP_MEM_WRITE, retaddr);
1672 need_swap = size > 1 && (tlb_addr & TLB_BSWAP);
1674 /* Handle I/O access. */
1675 if (tlb_addr & TLB_MMIO) {
1676 io_writex(env, iotlbentry, mmu_idx, val, addr, retaddr,
1677 op ^ (need_swap * MO_BSWAP));
1681 /* Ignore writes to ROM. */
1682 if (unlikely(tlb_addr & TLB_DISCARD_WRITE)) {
1686 /* Handle clean RAM pages. */
1687 if (tlb_addr & TLB_NOTDIRTY) {
1688 notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
1691 haddr = (void *)((uintptr_t)addr + entry->addend);
1694 * Keep these two store_memop separate to ensure that the compiler
1695 * is able to fold the entire function to a single instruction.
1696 * There is a build-time assert inside to remind you of this. ;-)
1698 if (unlikely(need_swap)) {
1699 store_memop(haddr, val, op ^ MO_BSWAP);
1701 store_memop(haddr, val, op);
1706 /* Handle slow unaligned access (it spans two pages or IO). */
1708 && unlikely((addr & ~TARGET_PAGE_MASK) + size - 1
1709 >= TARGET_PAGE_SIZE)) {
1712 CPUTLBEntry *entry2;
1713 target_ulong page2, tlb_addr2;
1716 do_unaligned_access:
1718 * Ensure the second page is in the TLB. Note that the first page
1719 * is already guaranteed to be filled, and that the second page
1720 * cannot evict the first.
1722 page2 = (addr + size) & TARGET_PAGE_MASK;
1723 size2 = (addr + size) & ~TARGET_PAGE_MASK;
1724 index2 = tlb_index(env, mmu_idx, page2);
1725 entry2 = tlb_entry(env, mmu_idx, page2);
1726 tlb_addr2 = tlb_addr_write(entry2);
1727 if (!tlb_hit_page(tlb_addr2, page2)) {
1728 if (!victim_tlb_hit(env, mmu_idx, index2, tlb_off, page2)) {
1729 tlb_fill(env_cpu(env), page2, size2, MMU_DATA_STORE,
1731 index2 = tlb_index(env, mmu_idx, page2);
1732 entry2 = tlb_entry(env, mmu_idx, page2);
1734 tlb_addr2 = tlb_addr_write(entry2);
1738 * Handle watchpoints. Since this may trap, all checks
1739 * must happen before any store.
1741 if (unlikely(tlb_addr & TLB_WATCHPOINT)) {
1742 cpu_check_watchpoint(env_cpu(env), addr, size - size2,
1743 env_tlb(env)->d[mmu_idx].iotlb[index].attrs,
1744 BP_MEM_WRITE, retaddr);
1746 if (unlikely(tlb_addr2 & TLB_WATCHPOINT)) {
1747 cpu_check_watchpoint(env_cpu(env), page2, size2,
1748 env_tlb(env)->d[mmu_idx].iotlb[index2].attrs,
1749 BP_MEM_WRITE, retaddr);
1753 * XXX: not efficient, but simple.
1754 * This loop must go in the forward direction to avoid issues
1755 * with self-modifying code in Windows 64-bit.
1757 for (i = 0; i < size; ++i) {
1759 if (memop_big_endian(op)) {
1760 /* Big-endian extract. */
1761 val8 = val >> (((size - 1) * 8) - (i * 8));
1763 /* Little-endian extract. */
1764 val8 = val >> (i * 8);
1766 helper_ret_stb_mmu(env, addr + i, val8, oi, retaddr);
1771 haddr = (void *)((uintptr_t)addr + entry->addend);
1772 store_memop(haddr, val, op);
1775 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
1776 TCGMemOpIdx oi, uintptr_t retaddr)
1778 store_helper(env, addr, val, oi, retaddr, MO_UB);
1781 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1782 TCGMemOpIdx oi, uintptr_t retaddr)
1784 store_helper(env, addr, val, oi, retaddr, MO_LEUW);
1787 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
1788 TCGMemOpIdx oi, uintptr_t retaddr)
1790 store_helper(env, addr, val, oi, retaddr, MO_BEUW);
1793 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1794 TCGMemOpIdx oi, uintptr_t retaddr)
1796 store_helper(env, addr, val, oi, retaddr, MO_LEUL);
1799 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
1800 TCGMemOpIdx oi, uintptr_t retaddr)
1802 store_helper(env, addr, val, oi, retaddr, MO_BEUL);
1805 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1806 TCGMemOpIdx oi, uintptr_t retaddr)
1808 store_helper(env, addr, val, oi, retaddr, MO_LEQ);
1811 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
1812 TCGMemOpIdx oi, uintptr_t retaddr)
1814 store_helper(env, addr, val, oi, retaddr, MO_BEQ);
1817 /* First set of helpers allows passing in of OI and RETADDR. This makes
1818 them callable from other helpers. */
1820 #define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr
1821 #define ATOMIC_NAME(X) \
1822 HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu))
1823 #define ATOMIC_MMU_DECLS
1824 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, retaddr)
1825 #define ATOMIC_MMU_CLEANUP
1826 #define ATOMIC_MMU_IDX get_mmuidx(oi)
1829 #include "atomic_template.h"
1832 #include "atomic_template.h"
1835 #include "atomic_template.h"
1837 #ifdef CONFIG_ATOMIC64
1839 #include "atomic_template.h"
1842 #if HAVE_CMPXCHG128 || HAVE_ATOMIC128
1843 #define DATA_SIZE 16
1844 #include "atomic_template.h"
1847 /* Second set of helpers are directly callable from TCG as helpers. */
1851 #undef ATOMIC_MMU_LOOKUP
1852 #define EXTRA_ARGS , TCGMemOpIdx oi
1853 #define ATOMIC_NAME(X) HELPER(glue(glue(atomic_ ## X, SUFFIX), END))
1854 #define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, oi, GETPC())
1857 #include "atomic_template.h"
1860 #include "atomic_template.h"
1863 #include "atomic_template.h"
1865 #ifdef CONFIG_ATOMIC64
1867 #include "atomic_template.h"
1869 #undef ATOMIC_MMU_IDX
1871 /* Code access functions. */
1873 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
1874 TCGMemOpIdx oi, uintptr_t retaddr)
1876 return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
1879 uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,
1880 TCGMemOpIdx oi, uintptr_t retaddr)
1882 return full_ldub_cmmu(env, addr, oi, retaddr);
1885 static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
1886 TCGMemOpIdx oi, uintptr_t retaddr)
1888 return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
1892 uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,
1893 TCGMemOpIdx oi, uintptr_t retaddr)
1895 return full_le_lduw_cmmu(env, addr, oi, retaddr);
1898 static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
1899 TCGMemOpIdx oi, uintptr_t retaddr)
1901 return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
1905 uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,
1906 TCGMemOpIdx oi, uintptr_t retaddr)
1908 return full_be_lduw_cmmu(env, addr, oi, retaddr);
1911 static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
1912 TCGMemOpIdx oi, uintptr_t retaddr)
1914 return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
1918 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
1919 TCGMemOpIdx oi, uintptr_t retaddr)
1921 return full_le_ldul_cmmu(env, addr, oi, retaddr);
1924 static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
1925 TCGMemOpIdx oi, uintptr_t retaddr)
1927 return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
1931 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
1932 TCGMemOpIdx oi, uintptr_t retaddr)
1934 return full_be_ldul_cmmu(env, addr, oi, retaddr);
1937 uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
1938 TCGMemOpIdx oi, uintptr_t retaddr)
1940 return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
1941 helper_le_ldq_cmmu);
1944 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
1945 TCGMemOpIdx oi, uintptr_t retaddr)
1947 return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
1948 helper_be_ldq_cmmu);