2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
57 # define ELF_CLASS ELFCLASS64
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
62 # define ELF_DATA ELFDATA2LSB
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74 intptr_t value, intptr_t addend);
76 /* The CIE and FDE header definitions will be common to all hosts. */
78 uint32_t len __attribute__((aligned((sizeof(void *)))));
84 uint8_t return_column;
87 typedef struct QEMU_PACKED {
88 uint32_t len __attribute__((aligned((sizeof(void *)))));
92 } DebugFrameFDEHeader;
94 typedef struct QEMU_PACKED {
96 DebugFrameFDEHeader fde;
99 static void tcg_register_jit_int(const void *buf, size_t size,
100 const void *debug_frame,
101 size_t debug_frame_size)
102 __attribute__((unused));
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109 TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111 const TCGArg args[TCG_MAX_OP_ARGS],
112 const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121 unsigned vecl, unsigned vece,
122 const TCGArg args[TCG_MAX_OP_ARGS],
123 const int const_args[TCG_MAX_OP_ARGS]);
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126 TCGReg dst, TCGReg src)
128 g_assert_not_reached();
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131 TCGReg dst, TCGReg base, intptr_t offset)
133 g_assert_not_reached();
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136 TCGReg dst, int64_t arg)
138 g_assert_not_reached();
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141 unsigned vecl, unsigned vece,
142 const TCGArg args[TCG_MAX_OP_ARGS],
143 const int const_args[TCG_MAX_OP_ARGS])
145 g_assert_not_reached();
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
159 #define TCG_HIGHWATER 1024
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 const void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
171 struct tcg_region_tree {
174 /* padding to avoid false sharing is computed at run-time */
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
183 struct tcg_region_state {
186 /* fields set at init time */
191 size_t size; /* size of one region */
192 size_t stride; /* .size + guard size */
194 /* fields protected by the lock */
195 size_t current; /* current region index */
196 size_t agg_size_full; /* aggregate size of full regions */
199 static struct tcg_region_state region;
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229 tcg_insn_unit *p = s->code_ptr;
230 memcpy(p, &v, sizeof(v));
231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
241 memcpy(p, &v, sizeof(v));
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252 tcg_insn_unit *p = s->code_ptr;
253 memcpy(p, &v, sizeof(v));
254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
264 memcpy(p, &v, sizeof(v));
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275 tcg_insn_unit *p = s->code_ptr;
276 memcpy(p, &v, sizeof(v));
277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
287 memcpy(p, &v, sizeof(v));
292 /* label relocation processing */
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295 TCGLabel *l, intptr_t addend)
297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
305 static void tcg_out_label(TCGContext *s, TCGLabel *l)
307 tcg_debug_assert(!l->has_value);
309 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
312 TCGLabel *gen_new_label(void)
314 TCGContext *s = tcg_ctx;
315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
317 memset(l, 0, sizeof(TCGLabel));
318 l->id = s->nb_labels++;
319 QSIMPLEQ_INIT(&l->relocs);
321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
326 static bool tcg_resolve_relocs(TCGContext *s)
330 QSIMPLEQ_FOREACH(l, &s->labels, next) {
332 uintptr_t value = l->u.value;
334 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
343 static void set_jmp_reset_offset(TCGContext *s, int which)
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
352 /* Signal overflow, starting over with fewer guest insns. */
353 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
355 siglongjmp(s->jmp_trans, -2);
358 #define C_PFX1(P, A) P##A
359 #define C_PFX2(P, A, B) P##A##_##B
360 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
361 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
362 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
363 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
365 /* Define an enumeration for the various combinations. */
367 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
368 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
369 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
370 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
372 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
373 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
374 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
375 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
377 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
379 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
380 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
381 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
382 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
385 #include "tcg-target-con-set.h"
386 } TCGConstraintSetIndex;
388 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
404 /* Put all of the constraint sets into an array, indexed by the enum. */
406 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
407 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
408 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
409 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
411 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
412 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
413 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
414 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
416 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
418 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
419 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
420 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
421 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
423 static const TCGTargetOpDef constraint_sets[] = {
424 #include "tcg-target-con-set.h"
442 /* Expand the enumerator to be returned from tcg_target_op_def(). */
444 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
445 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
446 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
447 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
449 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
450 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
451 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
452 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
454 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
456 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
457 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
458 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
459 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
461 #include "tcg-target.c.inc"
463 /* compare a pointer @ptr and a tb_tc @s */
464 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
466 if (ptr >= s->ptr + s->size) {
468 } else if (ptr < s->ptr) {
474 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
476 const struct tb_tc *a = ap;
477 const struct tb_tc *b = bp;
480 * When both sizes are set, we know this isn't a lookup.
481 * This is the most likely case: every TB must be inserted; lookups
482 * are a lot less frequent.
484 if (likely(a->size && b->size)) {
485 if (a->ptr > b->ptr) {
487 } else if (a->ptr < b->ptr) {
490 /* a->ptr == b->ptr should happen only on deletions */
491 g_assert(a->size == b->size);
495 * All lookups have either .size field set to 0.
496 * From the glib sources we see that @ap is always the lookup key. However
497 * the docs provide no guarantee, so we just mark this case as likely.
499 if (likely(a->size == 0)) {
500 return ptr_cmp_tb_tc(a->ptr, b);
502 return ptr_cmp_tb_tc(b->ptr, a);
505 static void tcg_region_trees_init(void)
509 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
510 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
511 for (i = 0; i < region.n; i++) {
512 struct tcg_region_tree *rt = region_trees + i * tree_size;
514 qemu_mutex_init(&rt->lock);
515 rt->tree = g_tree_new(tb_tc_cmp);
519 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
524 * Like tcg_splitwx_to_rw, with no assert. The pc may come from
525 * a signal handler over which the caller has no control.
527 if (!in_code_gen_buffer(p)) {
528 p -= tcg_splitwx_diff;
529 if (!in_code_gen_buffer(p)) {
534 if (p < region.start_aligned) {
537 ptrdiff_t offset = p - region.start_aligned;
539 if (offset > region.stride * (region.n - 1)) {
540 region_idx = region.n - 1;
542 region_idx = offset / region.stride;
545 return region_trees + region_idx * tree_size;
548 void tcg_tb_insert(TranslationBlock *tb)
550 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
552 g_assert(rt != NULL);
553 qemu_mutex_lock(&rt->lock);
554 g_tree_insert(rt->tree, &tb->tc, tb);
555 qemu_mutex_unlock(&rt->lock);
558 void tcg_tb_remove(TranslationBlock *tb)
560 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
562 g_assert(rt != NULL);
563 qemu_mutex_lock(&rt->lock);
564 g_tree_remove(rt->tree, &tb->tc);
565 qemu_mutex_unlock(&rt->lock);
569 * Find the TB 'tb' such that
570 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
571 * Return NULL if not found.
573 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
575 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
576 TranslationBlock *tb;
577 struct tb_tc s = { .ptr = (void *)tc_ptr };
583 qemu_mutex_lock(&rt->lock);
584 tb = g_tree_lookup(rt->tree, &s);
585 qemu_mutex_unlock(&rt->lock);
589 static void tcg_region_tree_lock_all(void)
593 for (i = 0; i < region.n; i++) {
594 struct tcg_region_tree *rt = region_trees + i * tree_size;
596 qemu_mutex_lock(&rt->lock);
600 static void tcg_region_tree_unlock_all(void)
604 for (i = 0; i < region.n; i++) {
605 struct tcg_region_tree *rt = region_trees + i * tree_size;
607 qemu_mutex_unlock(&rt->lock);
611 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
615 tcg_region_tree_lock_all();
616 for (i = 0; i < region.n; i++) {
617 struct tcg_region_tree *rt = region_trees + i * tree_size;
619 g_tree_foreach(rt->tree, func, user_data);
621 tcg_region_tree_unlock_all();
624 size_t tcg_nb_tbs(void)
629 tcg_region_tree_lock_all();
630 for (i = 0; i < region.n; i++) {
631 struct tcg_region_tree *rt = region_trees + i * tree_size;
633 nb_tbs += g_tree_nnodes(rt->tree);
635 tcg_region_tree_unlock_all();
639 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
641 TranslationBlock *tb = v;
647 static void tcg_region_tree_reset_all(void)
651 tcg_region_tree_lock_all();
652 for (i = 0; i < region.n; i++) {
653 struct tcg_region_tree *rt = region_trees + i * tree_size;
655 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
656 /* Increment the refcount first so that destroy acts as a reset */
657 g_tree_ref(rt->tree);
658 g_tree_destroy(rt->tree);
660 tcg_region_tree_unlock_all();
663 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
667 start = region.start_aligned + curr_region * region.stride;
668 end = start + region.size;
670 if (curr_region == 0) {
671 start = region.start;
673 if (curr_region == region.n - 1) {
681 static void tcg_region_assign(TCGContext *s, size_t curr_region)
685 tcg_region_bounds(curr_region, &start, &end);
687 s->code_gen_buffer = start;
688 s->code_gen_ptr = start;
689 s->code_gen_buffer_size = end - start;
690 s->code_gen_highwater = end - TCG_HIGHWATER;
693 static bool tcg_region_alloc__locked(TCGContext *s)
695 if (region.current == region.n) {
698 tcg_region_assign(s, region.current);
704 * Request a new region once the one in use has filled up.
705 * Returns true on error.
707 static bool tcg_region_alloc(TCGContext *s)
710 /* read the region size now; alloc__locked will overwrite it on success */
711 size_t size_full = s->code_gen_buffer_size;
713 qemu_mutex_lock(®ion.lock);
714 err = tcg_region_alloc__locked(s);
716 region.agg_size_full += size_full - TCG_HIGHWATER;
718 qemu_mutex_unlock(®ion.lock);
723 * Perform a context's first region allocation.
724 * This function does _not_ increment region.agg_size_full.
726 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
728 return tcg_region_alloc__locked(s);
731 /* Call from a safe-work context */
732 void tcg_region_reset_all(void)
734 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
737 qemu_mutex_lock(®ion.lock);
739 region.agg_size_full = 0;
741 for (i = 0; i < n_ctxs; i++) {
742 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
743 bool err = tcg_region_initial_alloc__locked(s);
747 qemu_mutex_unlock(®ion.lock);
749 tcg_region_tree_reset_all();
752 #ifdef CONFIG_USER_ONLY
753 static size_t tcg_n_regions(void)
759 * It is likely that some vCPUs will translate more code than others, so we
760 * first try to set more regions than max_cpus, with those regions being of
761 * reasonable size. If that's not possible we make do by evenly dividing
762 * the code_gen_buffer among the vCPUs.
764 static size_t tcg_n_regions(void)
768 /* Use a single region if all we have is one vCPU thread */
769 #if !defined(CONFIG_USER_ONLY)
770 MachineState *ms = MACHINE(qdev_get_machine());
771 unsigned int max_cpus = ms->smp.max_cpus;
773 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
777 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
778 for (i = 8; i > 0; i--) {
779 size_t regions_per_thread = i;
782 region_size = tcg_init_ctx.code_gen_buffer_size;
783 region_size /= max_cpus * regions_per_thread;
785 if (region_size >= 2 * 1024u * 1024) {
786 return max_cpus * regions_per_thread;
789 /* If we can't, then just allocate one region per vCPU thread */
795 * Initializes region partitioning.
797 * Called at init time from the parent thread (i.e. the one calling
798 * tcg_context_init), after the target's TCG globals have been set.
800 * Region partitioning works by splitting code_gen_buffer into separate regions,
801 * and then assigning regions to TCG threads so that the threads can translate
802 * code in parallel without synchronization.
804 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
805 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
806 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
807 * must have been parsed before calling this function, since it calls
808 * qemu_tcg_mttcg_enabled().
810 * In user-mode we use a single region. Having multiple regions in user-mode
811 * is not supported, because the number of vCPU threads (recall that each thread
812 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
813 * OS, and usually this number is huge (tens of thousands is not uncommon).
814 * Thus, given this large bound on the number of vCPU threads and the fact
815 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
816 * that the availability of at least one region per vCPU thread.
818 * However, this user-mode limitation is unlikely to be a significant problem
819 * in practice. Multi-threaded guests share most if not all of their translated
820 * code, which makes parallel code generation less appealing than in softmmu.
822 void tcg_region_init(void)
824 void *buf = tcg_init_ctx.code_gen_buffer;
826 size_t size = tcg_init_ctx.code_gen_buffer_size;
827 size_t page_size = qemu_real_host_page_size;
832 n_regions = tcg_n_regions();
834 /* The first region will be 'aligned - buf' bytes larger than the others */
835 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
836 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
838 * Make region_size a multiple of page_size, using aligned as the start.
839 * As a result of this we might end up with a few extra pages at the end of
840 * the buffer; we will assign those to the last region.
842 region_size = (size - (aligned - buf)) / n_regions;
843 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
845 /* A region must have at least 2 pages; one code, one guard */
846 g_assert(region_size >= 2 * page_size);
848 /* init the region struct */
849 qemu_mutex_init(®ion.lock);
850 region.n = n_regions;
851 region.size = region_size - page_size;
852 region.stride = region_size;
854 region.start_aligned = aligned;
855 /* page-align the end, since its last page will be a guard page */
856 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
857 /* account for that last guard page */
858 region.end -= page_size;
861 * Set guard pages in the rw buffer, as that's the one into which
862 * buffer overruns could occur. Do not set guard pages in the rx
863 * buffer -- let that one use hugepages throughout.
865 for (i = 0; i < region.n; i++) {
869 tcg_region_bounds(i, &start, &end);
870 rc = qemu_mprotect_none(end, page_size);
874 tcg_region_trees_init();
876 /* In user-mode we support only one ctx, so do the initial allocation now */
877 #ifdef CONFIG_USER_ONLY
879 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
886 #ifdef CONFIG_DEBUG_TCG
887 const void *tcg_splitwx_to_rx(void *rw)
889 /* Pass NULL pointers unchanged. */
891 g_assert(in_code_gen_buffer(rw));
892 rw += tcg_splitwx_diff;
897 void *tcg_splitwx_to_rw(const void *rx)
899 /* Pass NULL pointers unchanged. */
901 rx -= tcg_splitwx_diff;
902 /* Assert that we end with a pointer in the rw region. */
903 g_assert(in_code_gen_buffer(rx));
907 #endif /* CONFIG_DEBUG_TCG */
909 static void alloc_tcg_plugin_context(TCGContext *s)
912 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
913 s->plugin_tb->insns =
914 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
919 * All TCG threads except the parent (i.e. the one that called tcg_context_init
920 * and registered the target's TCG globals) must register with this function
921 * before initiating translation.
923 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
924 * of tcg_region_init() for the reasoning behind this.
926 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
927 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
928 * is not used anymore for translation once this function is called.
930 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
931 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
933 #ifdef CONFIG_USER_ONLY
934 void tcg_register_thread(void)
936 tcg_ctx = &tcg_init_ctx;
939 void tcg_register_thread(void)
941 MachineState *ms = MACHINE(qdev_get_machine());
942 TCGContext *s = g_malloc(sizeof(*s));
948 /* Relink mem_base. */
949 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
950 if (tcg_init_ctx.temps[i].mem_base) {
951 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
952 tcg_debug_assert(b >= 0 && b < n);
953 s->temps[i].mem_base = &s->temps[b];
957 /* Claim an entry in tcg_ctxs */
958 n = qatomic_fetch_inc(&n_tcg_ctxs);
959 g_assert(n < ms->smp.max_cpus);
960 qatomic_set(&tcg_ctxs[n], s);
963 alloc_tcg_plugin_context(s);
967 qemu_mutex_lock(®ion.lock);
968 err = tcg_region_initial_alloc__locked(tcg_ctx);
970 qemu_mutex_unlock(®ion.lock);
972 #endif /* !CONFIG_USER_ONLY */
975 * Returns the size (in bytes) of all translated code (i.e. from all regions)
976 * currently in the cache.
977 * See also: tcg_code_capacity()
978 * Do not confuse with tcg_current_code_size(); that one applies to a single
981 size_t tcg_code_size(void)
983 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
987 qemu_mutex_lock(®ion.lock);
988 total = region.agg_size_full;
989 for (i = 0; i < n_ctxs; i++) {
990 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
993 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
994 g_assert(size <= s->code_gen_buffer_size);
997 qemu_mutex_unlock(®ion.lock);
1002 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1004 * See also: tcg_code_size()
1006 size_t tcg_code_capacity(void)
1008 size_t guard_size, capacity;
1010 /* no need for synchronization; these variables are set at init time */
1011 guard_size = region.stride - region.size;
1012 capacity = region.end + guard_size - region.start;
1013 capacity -= region.n * (guard_size + TCG_HIGHWATER);
1017 size_t tcg_tb_phys_invalidate_count(void)
1019 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1023 for (i = 0; i < n_ctxs; i++) {
1024 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1026 total += qatomic_read(&s->tb_phys_invalidate_count);
1031 /* pool based memory allocation */
1032 void *tcg_malloc_internal(TCGContext *s, int size)
1037 if (size > TCG_POOL_CHUNK_SIZE) {
1038 /* big malloc: insert a new pool (XXX: could optimize) */
1039 p = g_malloc(sizeof(TCGPool) + size);
1041 p->next = s->pool_first_large;
1042 s->pool_first_large = p;
1045 p = s->pool_current;
1053 pool_size = TCG_POOL_CHUNK_SIZE;
1054 p = g_malloc(sizeof(TCGPool) + pool_size);
1055 p->size = pool_size;
1057 if (s->pool_current)
1058 s->pool_current->next = p;
1066 s->pool_current = p;
1067 s->pool_cur = p->data + size;
1068 s->pool_end = p->data + p->size;
1072 void tcg_pool_reset(TCGContext *s)
1075 for (p = s->pool_first_large; p; p = t) {
1079 s->pool_first_large = NULL;
1080 s->pool_cur = s->pool_end = NULL;
1081 s->pool_current = NULL;
1084 typedef struct TCGHelperInfo {
1091 #include "exec/helper-proto.h"
1093 static const TCGHelperInfo all_helpers[] = {
1094 #include "exec/helper-tcg.h"
1096 static GHashTable *helper_table;
1098 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1099 static void process_op_defs(TCGContext *s);
1100 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1101 TCGReg reg, const char *name);
1103 void tcg_context_init(TCGContext *s)
1105 int op, total_args, n, i;
1107 TCGArgConstraint *args_ct;
1110 memset(s, 0, sizeof(*s));
1113 /* Count total number of arguments and allocate the corresponding
1116 for(op = 0; op < NB_OPS; op++) {
1117 def = &tcg_op_defs[op];
1118 n = def->nb_iargs + def->nb_oargs;
1122 args_ct = g_new0(TCGArgConstraint, total_args);
1124 for(op = 0; op < NB_OPS; op++) {
1125 def = &tcg_op_defs[op];
1126 def->args_ct = args_ct;
1127 n = def->nb_iargs + def->nb_oargs;
1131 /* Register helpers. */
1132 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1133 helper_table = g_hash_table_new(NULL, NULL);
1135 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1136 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1137 (gpointer)&all_helpers[i]);
1143 /* Reverse the order of the saved registers, assuming they're all at
1144 the start of tcg_target_reg_alloc_order. */
1145 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1146 int r = tcg_target_reg_alloc_order[n];
1147 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1151 for (i = 0; i < n; ++i) {
1152 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1154 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1155 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1158 alloc_tcg_plugin_context(s);
1162 * In user-mode we simply share the init context among threads, since we
1163 * use a single region. See the documentation tcg_region_init() for the
1164 * reasoning behind this.
1165 * In softmmu we will have at most max_cpus TCG threads.
1167 #ifdef CONFIG_USER_ONLY
1168 tcg_ctxs = &tcg_ctx;
1171 MachineState *ms = MACHINE(qdev_get_machine());
1172 unsigned int max_cpus = ms->smp.max_cpus;
1173 tcg_ctxs = g_new(TCGContext *, max_cpus);
1176 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1177 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1178 cpu_env = temp_tcgv_ptr(ts);
1182 * Allocate TBs right before their corresponding translated code, making
1183 * sure that TBs and code are on different cache lines.
1185 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1187 uintptr_t align = qemu_icache_linesize;
1188 TranslationBlock *tb;
1192 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1193 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1195 if (unlikely(next > s->code_gen_highwater)) {
1196 if (tcg_region_alloc(s)) {
1201 qatomic_set(&s->code_gen_ptr, next);
1202 s->data_gen_ptr = NULL;
1206 void tcg_prologue_init(TCGContext *s)
1208 size_t prologue_size, total_size;
1211 /* Put the prologue at the beginning of code_gen_buffer. */
1212 buf0 = s->code_gen_buffer;
1213 total_size = s->code_gen_buffer_size;
1216 s->data_gen_ptr = NULL;
1219 * The region trees are not yet configured, but tcg_splitwx_to_rx
1220 * needs the bounds for an assert.
1222 region.start = buf0;
1223 region.end = buf0 + total_size;
1225 #ifndef CONFIG_TCG_INTERPRETER
1226 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1229 /* Compute a high-water mark, at which we voluntarily flush the buffer
1230 and start over. The size here is arbitrary, significantly larger
1231 than we expect the code generation for any one opcode to require. */
1232 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1234 #ifdef TCG_TARGET_NEED_POOL_LABELS
1235 s->pool_labels = NULL;
1238 qemu_thread_jit_write();
1239 /* Generate the prologue. */
1240 tcg_target_qemu_prologue(s);
1242 #ifdef TCG_TARGET_NEED_POOL_LABELS
1243 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1245 int result = tcg_out_pool_finalize(s);
1246 tcg_debug_assert(result == 0);
1251 #ifndef CONFIG_TCG_INTERPRETER
1252 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1253 tcg_ptr_byte_diff(buf1, buf0));
1256 /* Deduct the prologue from the buffer. */
1257 prologue_size = tcg_current_code_size(s);
1258 s->code_gen_ptr = buf1;
1259 s->code_gen_buffer = buf1;
1261 total_size -= prologue_size;
1262 s->code_gen_buffer_size = total_size;
1264 tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1267 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1268 FILE *logfile = qemu_log_lock();
1269 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1270 if (s->data_gen_ptr) {
1271 size_t code_size = s->data_gen_ptr - buf0;
1272 size_t data_size = prologue_size - code_size;
1275 log_disas(buf0, code_size);
1277 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1278 if (sizeof(tcg_target_ulong) == 8) {
1279 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1280 (uintptr_t)s->data_gen_ptr + i,
1281 *(uint64_t *)(s->data_gen_ptr + i));
1283 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1284 (uintptr_t)s->data_gen_ptr + i,
1285 *(uint32_t *)(s->data_gen_ptr + i));
1289 log_disas(buf0, prologue_size);
1293 qemu_log_unlock(logfile);
1297 /* Assert that goto_ptr is implemented completely. */
1298 if (TCG_TARGET_HAS_goto_ptr) {
1299 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1303 void tcg_func_start(TCGContext *s)
1306 s->nb_temps = s->nb_globals;
1308 /* No temps have been previously allocated for size or locality. */
1309 memset(s->free_temps, 0, sizeof(s->free_temps));
1311 /* No constant temps have been previously allocated. */
1312 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1313 if (s->const_table[i]) {
1314 g_hash_table_remove_all(s->const_table[i]);
1320 s->current_frame_offset = s->frame_start;
1322 #ifdef CONFIG_DEBUG_TCG
1323 s->goto_tb_issue_mask = 0;
1326 QTAILQ_INIT(&s->ops);
1327 QTAILQ_INIT(&s->free_ops);
1328 QSIMPLEQ_INIT(&s->labels);
1331 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1333 int n = s->nb_temps++;
1335 if (n >= TCG_MAX_TEMPS) {
1336 tcg_raise_tb_overflow(s);
1338 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1341 static TCGTemp *tcg_global_alloc(TCGContext *s)
1345 tcg_debug_assert(s->nb_globals == s->nb_temps);
1346 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1348 ts = tcg_temp_alloc(s);
1349 ts->kind = TEMP_GLOBAL;
1354 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1355 TCGReg reg, const char *name)
1359 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1363 ts = tcg_global_alloc(s);
1364 ts->base_type = type;
1366 ts->kind = TEMP_FIXED;
1369 tcg_regset_set_reg(s->reserved_regs, reg);
1374 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1376 s->frame_start = start;
1377 s->frame_end = start + size;
1379 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1382 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1383 intptr_t offset, const char *name)
1385 TCGContext *s = tcg_ctx;
1386 TCGTemp *base_ts = tcgv_ptr_temp(base);
1387 TCGTemp *ts = tcg_global_alloc(s);
1388 int indirect_reg = 0, bigendian = 0;
1389 #ifdef HOST_WORDS_BIGENDIAN
1393 switch (base_ts->kind) {
1397 /* We do not support double-indirect registers. */
1398 tcg_debug_assert(!base_ts->indirect_reg);
1399 base_ts->indirect_base = 1;
1400 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1405 g_assert_not_reached();
1408 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1409 TCGTemp *ts2 = tcg_global_alloc(s);
1412 ts->base_type = TCG_TYPE_I64;
1413 ts->type = TCG_TYPE_I32;
1414 ts->indirect_reg = indirect_reg;
1415 ts->mem_allocated = 1;
1416 ts->mem_base = base_ts;
1417 ts->mem_offset = offset + bigendian * 4;
1418 pstrcpy(buf, sizeof(buf), name);
1419 pstrcat(buf, sizeof(buf), "_0");
1420 ts->name = strdup(buf);
1422 tcg_debug_assert(ts2 == ts + 1);
1423 ts2->base_type = TCG_TYPE_I64;
1424 ts2->type = TCG_TYPE_I32;
1425 ts2->indirect_reg = indirect_reg;
1426 ts2->mem_allocated = 1;
1427 ts2->mem_base = base_ts;
1428 ts2->mem_offset = offset + (1 - bigendian) * 4;
1429 pstrcpy(buf, sizeof(buf), name);
1430 pstrcat(buf, sizeof(buf), "_1");
1431 ts2->name = strdup(buf);
1433 ts->base_type = type;
1435 ts->indirect_reg = indirect_reg;
1436 ts->mem_allocated = 1;
1437 ts->mem_base = base_ts;
1438 ts->mem_offset = offset;
1444 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1446 TCGContext *s = tcg_ctx;
1447 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1451 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1452 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1453 if (idx < TCG_MAX_TEMPS) {
1454 /* There is already an available temp with the right type. */
1455 clear_bit(idx, s->free_temps[k].l);
1457 ts = &s->temps[idx];
1458 ts->temp_allocated = 1;
1459 tcg_debug_assert(ts->base_type == type);
1460 tcg_debug_assert(ts->kind == kind);
1462 ts = tcg_temp_alloc(s);
1463 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1464 TCGTemp *ts2 = tcg_temp_alloc(s);
1466 ts->base_type = type;
1467 ts->type = TCG_TYPE_I32;
1468 ts->temp_allocated = 1;
1471 tcg_debug_assert(ts2 == ts + 1);
1472 ts2->base_type = TCG_TYPE_I64;
1473 ts2->type = TCG_TYPE_I32;
1474 ts2->temp_allocated = 1;
1477 ts->base_type = type;
1479 ts->temp_allocated = 1;
1484 #if defined(CONFIG_DEBUG_TCG)
1490 TCGv_vec tcg_temp_new_vec(TCGType type)
1494 #ifdef CONFIG_DEBUG_TCG
1497 assert(TCG_TARGET_HAS_v64);
1500 assert(TCG_TARGET_HAS_v128);
1503 assert(TCG_TARGET_HAS_v256);
1506 g_assert_not_reached();
1510 t = tcg_temp_new_internal(type, 0);
1511 return temp_tcgv_vec(t);
1514 /* Create a new temp of the same type as an existing temp. */
1515 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1517 TCGTemp *t = tcgv_vec_temp(match);
1519 tcg_debug_assert(t->temp_allocated != 0);
1521 t = tcg_temp_new_internal(t->base_type, 0);
1522 return temp_tcgv_vec(t);
1525 void tcg_temp_free_internal(TCGTemp *ts)
1527 TCGContext *s = tcg_ctx;
1530 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1531 if (ts->kind == TEMP_CONST) {
1535 #if defined(CONFIG_DEBUG_TCG)
1537 if (s->temps_in_use < 0) {
1538 fprintf(stderr, "More temporaries freed than allocated!\n");
1542 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1543 tcg_debug_assert(ts->temp_allocated != 0);
1544 ts->temp_allocated = 0;
1547 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1548 set_bit(idx, s->free_temps[k].l);
1551 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1553 TCGContext *s = tcg_ctx;
1554 GHashTable *h = s->const_table[type];
1558 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1559 s->const_table[type] = h;
1562 ts = g_hash_table_lookup(h, &val);
1564 ts = tcg_temp_alloc(s);
1566 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1567 TCGTemp *ts2 = tcg_temp_alloc(s);
1569 ts->base_type = TCG_TYPE_I64;
1570 ts->type = TCG_TYPE_I32;
1571 ts->kind = TEMP_CONST;
1572 ts->temp_allocated = 1;
1574 * Retain the full value of the 64-bit constant in the low
1575 * part, so that the hash table works. Actual uses will
1576 * truncate the value to the low part.
1580 tcg_debug_assert(ts2 == ts + 1);
1581 ts2->base_type = TCG_TYPE_I64;
1582 ts2->type = TCG_TYPE_I32;
1583 ts2->kind = TEMP_CONST;
1584 ts2->temp_allocated = 1;
1585 ts2->val = val >> 32;
1587 ts->base_type = type;
1589 ts->kind = TEMP_CONST;
1590 ts->temp_allocated = 1;
1593 g_hash_table_insert(h, &ts->val, ts);
1599 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1601 val = dup_const(vece, val);
1602 return temp_tcgv_vec(tcg_constant_internal(type, val));
1605 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1607 TCGTemp *t = tcgv_vec_temp(match);
1609 tcg_debug_assert(t->temp_allocated != 0);
1610 return tcg_constant_vec(t->base_type, vece, val);
1613 TCGv_i32 tcg_const_i32(int32_t val)
1616 t0 = tcg_temp_new_i32();
1617 tcg_gen_movi_i32(t0, val);
1621 TCGv_i64 tcg_const_i64(int64_t val)
1624 t0 = tcg_temp_new_i64();
1625 tcg_gen_movi_i64(t0, val);
1629 TCGv_i32 tcg_const_local_i32(int32_t val)
1632 t0 = tcg_temp_local_new_i32();
1633 tcg_gen_movi_i32(t0, val);
1637 TCGv_i64 tcg_const_local_i64(int64_t val)
1640 t0 = tcg_temp_local_new_i64();
1641 tcg_gen_movi_i64(t0, val);
1645 #if defined(CONFIG_DEBUG_TCG)
1646 void tcg_clear_temp_count(void)
1648 TCGContext *s = tcg_ctx;
1649 s->temps_in_use = 0;
1652 int tcg_check_temp_count(void)
1654 TCGContext *s = tcg_ctx;
1655 if (s->temps_in_use) {
1656 /* Clear the count so that we don't give another
1657 * warning immediately next time around.
1659 s->temps_in_use = 0;
1666 /* Return true if OP may appear in the opcode stream.
1667 Test the runtime variable that controls each opcode. */
1668 bool tcg_op_supported(TCGOpcode op)
1671 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1674 case INDEX_op_discard:
1675 case INDEX_op_set_label:
1679 case INDEX_op_insn_start:
1680 case INDEX_op_exit_tb:
1681 case INDEX_op_goto_tb:
1682 case INDEX_op_qemu_ld_i32:
1683 case INDEX_op_qemu_st_i32:
1684 case INDEX_op_qemu_ld_i64:
1685 case INDEX_op_qemu_st_i64:
1688 case INDEX_op_qemu_st8_i32:
1689 return TCG_TARGET_HAS_qemu_st8_i32;
1691 case INDEX_op_goto_ptr:
1692 return TCG_TARGET_HAS_goto_ptr;
1694 case INDEX_op_mov_i32:
1695 case INDEX_op_setcond_i32:
1696 case INDEX_op_brcond_i32:
1697 case INDEX_op_ld8u_i32:
1698 case INDEX_op_ld8s_i32:
1699 case INDEX_op_ld16u_i32:
1700 case INDEX_op_ld16s_i32:
1701 case INDEX_op_ld_i32:
1702 case INDEX_op_st8_i32:
1703 case INDEX_op_st16_i32:
1704 case INDEX_op_st_i32:
1705 case INDEX_op_add_i32:
1706 case INDEX_op_sub_i32:
1707 case INDEX_op_mul_i32:
1708 case INDEX_op_and_i32:
1709 case INDEX_op_or_i32:
1710 case INDEX_op_xor_i32:
1711 case INDEX_op_shl_i32:
1712 case INDEX_op_shr_i32:
1713 case INDEX_op_sar_i32:
1716 case INDEX_op_movcond_i32:
1717 return TCG_TARGET_HAS_movcond_i32;
1718 case INDEX_op_div_i32:
1719 case INDEX_op_divu_i32:
1720 return TCG_TARGET_HAS_div_i32;
1721 case INDEX_op_rem_i32:
1722 case INDEX_op_remu_i32:
1723 return TCG_TARGET_HAS_rem_i32;
1724 case INDEX_op_div2_i32:
1725 case INDEX_op_divu2_i32:
1726 return TCG_TARGET_HAS_div2_i32;
1727 case INDEX_op_rotl_i32:
1728 case INDEX_op_rotr_i32:
1729 return TCG_TARGET_HAS_rot_i32;
1730 case INDEX_op_deposit_i32:
1731 return TCG_TARGET_HAS_deposit_i32;
1732 case INDEX_op_extract_i32:
1733 return TCG_TARGET_HAS_extract_i32;
1734 case INDEX_op_sextract_i32:
1735 return TCG_TARGET_HAS_sextract_i32;
1736 case INDEX_op_extract2_i32:
1737 return TCG_TARGET_HAS_extract2_i32;
1738 case INDEX_op_add2_i32:
1739 return TCG_TARGET_HAS_add2_i32;
1740 case INDEX_op_sub2_i32:
1741 return TCG_TARGET_HAS_sub2_i32;
1742 case INDEX_op_mulu2_i32:
1743 return TCG_TARGET_HAS_mulu2_i32;
1744 case INDEX_op_muls2_i32:
1745 return TCG_TARGET_HAS_muls2_i32;
1746 case INDEX_op_muluh_i32:
1747 return TCG_TARGET_HAS_muluh_i32;
1748 case INDEX_op_mulsh_i32:
1749 return TCG_TARGET_HAS_mulsh_i32;
1750 case INDEX_op_ext8s_i32:
1751 return TCG_TARGET_HAS_ext8s_i32;
1752 case INDEX_op_ext16s_i32:
1753 return TCG_TARGET_HAS_ext16s_i32;
1754 case INDEX_op_ext8u_i32:
1755 return TCG_TARGET_HAS_ext8u_i32;
1756 case INDEX_op_ext16u_i32:
1757 return TCG_TARGET_HAS_ext16u_i32;
1758 case INDEX_op_bswap16_i32:
1759 return TCG_TARGET_HAS_bswap16_i32;
1760 case INDEX_op_bswap32_i32:
1761 return TCG_TARGET_HAS_bswap32_i32;
1762 case INDEX_op_not_i32:
1763 return TCG_TARGET_HAS_not_i32;
1764 case INDEX_op_neg_i32:
1765 return TCG_TARGET_HAS_neg_i32;
1766 case INDEX_op_andc_i32:
1767 return TCG_TARGET_HAS_andc_i32;
1768 case INDEX_op_orc_i32:
1769 return TCG_TARGET_HAS_orc_i32;
1770 case INDEX_op_eqv_i32:
1771 return TCG_TARGET_HAS_eqv_i32;
1772 case INDEX_op_nand_i32:
1773 return TCG_TARGET_HAS_nand_i32;
1774 case INDEX_op_nor_i32:
1775 return TCG_TARGET_HAS_nor_i32;
1776 case INDEX_op_clz_i32:
1777 return TCG_TARGET_HAS_clz_i32;
1778 case INDEX_op_ctz_i32:
1779 return TCG_TARGET_HAS_ctz_i32;
1780 case INDEX_op_ctpop_i32:
1781 return TCG_TARGET_HAS_ctpop_i32;
1783 case INDEX_op_brcond2_i32:
1784 case INDEX_op_setcond2_i32:
1785 return TCG_TARGET_REG_BITS == 32;
1787 case INDEX_op_mov_i64:
1788 case INDEX_op_setcond_i64:
1789 case INDEX_op_brcond_i64:
1790 case INDEX_op_ld8u_i64:
1791 case INDEX_op_ld8s_i64:
1792 case INDEX_op_ld16u_i64:
1793 case INDEX_op_ld16s_i64:
1794 case INDEX_op_ld32u_i64:
1795 case INDEX_op_ld32s_i64:
1796 case INDEX_op_ld_i64:
1797 case INDEX_op_st8_i64:
1798 case INDEX_op_st16_i64:
1799 case INDEX_op_st32_i64:
1800 case INDEX_op_st_i64:
1801 case INDEX_op_add_i64:
1802 case INDEX_op_sub_i64:
1803 case INDEX_op_mul_i64:
1804 case INDEX_op_and_i64:
1805 case INDEX_op_or_i64:
1806 case INDEX_op_xor_i64:
1807 case INDEX_op_shl_i64:
1808 case INDEX_op_shr_i64:
1809 case INDEX_op_sar_i64:
1810 case INDEX_op_ext_i32_i64:
1811 case INDEX_op_extu_i32_i64:
1812 return TCG_TARGET_REG_BITS == 64;
1814 case INDEX_op_movcond_i64:
1815 return TCG_TARGET_HAS_movcond_i64;
1816 case INDEX_op_div_i64:
1817 case INDEX_op_divu_i64:
1818 return TCG_TARGET_HAS_div_i64;
1819 case INDEX_op_rem_i64:
1820 case INDEX_op_remu_i64:
1821 return TCG_TARGET_HAS_rem_i64;
1822 case INDEX_op_div2_i64:
1823 case INDEX_op_divu2_i64:
1824 return TCG_TARGET_HAS_div2_i64;
1825 case INDEX_op_rotl_i64:
1826 case INDEX_op_rotr_i64:
1827 return TCG_TARGET_HAS_rot_i64;
1828 case INDEX_op_deposit_i64:
1829 return TCG_TARGET_HAS_deposit_i64;
1830 case INDEX_op_extract_i64:
1831 return TCG_TARGET_HAS_extract_i64;
1832 case INDEX_op_sextract_i64:
1833 return TCG_TARGET_HAS_sextract_i64;
1834 case INDEX_op_extract2_i64:
1835 return TCG_TARGET_HAS_extract2_i64;
1836 case INDEX_op_extrl_i64_i32:
1837 return TCG_TARGET_HAS_extrl_i64_i32;
1838 case INDEX_op_extrh_i64_i32:
1839 return TCG_TARGET_HAS_extrh_i64_i32;
1840 case INDEX_op_ext8s_i64:
1841 return TCG_TARGET_HAS_ext8s_i64;
1842 case INDEX_op_ext16s_i64:
1843 return TCG_TARGET_HAS_ext16s_i64;
1844 case INDEX_op_ext32s_i64:
1845 return TCG_TARGET_HAS_ext32s_i64;
1846 case INDEX_op_ext8u_i64:
1847 return TCG_TARGET_HAS_ext8u_i64;
1848 case INDEX_op_ext16u_i64:
1849 return TCG_TARGET_HAS_ext16u_i64;
1850 case INDEX_op_ext32u_i64:
1851 return TCG_TARGET_HAS_ext32u_i64;
1852 case INDEX_op_bswap16_i64:
1853 return TCG_TARGET_HAS_bswap16_i64;
1854 case INDEX_op_bswap32_i64:
1855 return TCG_TARGET_HAS_bswap32_i64;
1856 case INDEX_op_bswap64_i64:
1857 return TCG_TARGET_HAS_bswap64_i64;
1858 case INDEX_op_not_i64:
1859 return TCG_TARGET_HAS_not_i64;
1860 case INDEX_op_neg_i64:
1861 return TCG_TARGET_HAS_neg_i64;
1862 case INDEX_op_andc_i64:
1863 return TCG_TARGET_HAS_andc_i64;
1864 case INDEX_op_orc_i64:
1865 return TCG_TARGET_HAS_orc_i64;
1866 case INDEX_op_eqv_i64:
1867 return TCG_TARGET_HAS_eqv_i64;
1868 case INDEX_op_nand_i64:
1869 return TCG_TARGET_HAS_nand_i64;
1870 case INDEX_op_nor_i64:
1871 return TCG_TARGET_HAS_nor_i64;
1872 case INDEX_op_clz_i64:
1873 return TCG_TARGET_HAS_clz_i64;
1874 case INDEX_op_ctz_i64:
1875 return TCG_TARGET_HAS_ctz_i64;
1876 case INDEX_op_ctpop_i64:
1877 return TCG_TARGET_HAS_ctpop_i64;
1878 case INDEX_op_add2_i64:
1879 return TCG_TARGET_HAS_add2_i64;
1880 case INDEX_op_sub2_i64:
1881 return TCG_TARGET_HAS_sub2_i64;
1882 case INDEX_op_mulu2_i64:
1883 return TCG_TARGET_HAS_mulu2_i64;
1884 case INDEX_op_muls2_i64:
1885 return TCG_TARGET_HAS_muls2_i64;
1886 case INDEX_op_muluh_i64:
1887 return TCG_TARGET_HAS_muluh_i64;
1888 case INDEX_op_mulsh_i64:
1889 return TCG_TARGET_HAS_mulsh_i64;
1891 case INDEX_op_mov_vec:
1892 case INDEX_op_dup_vec:
1893 case INDEX_op_dupm_vec:
1894 case INDEX_op_ld_vec:
1895 case INDEX_op_st_vec:
1896 case INDEX_op_add_vec:
1897 case INDEX_op_sub_vec:
1898 case INDEX_op_and_vec:
1899 case INDEX_op_or_vec:
1900 case INDEX_op_xor_vec:
1901 case INDEX_op_cmp_vec:
1903 case INDEX_op_dup2_vec:
1904 return have_vec && TCG_TARGET_REG_BITS == 32;
1905 case INDEX_op_not_vec:
1906 return have_vec && TCG_TARGET_HAS_not_vec;
1907 case INDEX_op_neg_vec:
1908 return have_vec && TCG_TARGET_HAS_neg_vec;
1909 case INDEX_op_abs_vec:
1910 return have_vec && TCG_TARGET_HAS_abs_vec;
1911 case INDEX_op_andc_vec:
1912 return have_vec && TCG_TARGET_HAS_andc_vec;
1913 case INDEX_op_orc_vec:
1914 return have_vec && TCG_TARGET_HAS_orc_vec;
1915 case INDEX_op_mul_vec:
1916 return have_vec && TCG_TARGET_HAS_mul_vec;
1917 case INDEX_op_shli_vec:
1918 case INDEX_op_shri_vec:
1919 case INDEX_op_sari_vec:
1920 return have_vec && TCG_TARGET_HAS_shi_vec;
1921 case INDEX_op_shls_vec:
1922 case INDEX_op_shrs_vec:
1923 case INDEX_op_sars_vec:
1924 return have_vec && TCG_TARGET_HAS_shs_vec;
1925 case INDEX_op_shlv_vec:
1926 case INDEX_op_shrv_vec:
1927 case INDEX_op_sarv_vec:
1928 return have_vec && TCG_TARGET_HAS_shv_vec;
1929 case INDEX_op_rotli_vec:
1930 return have_vec && TCG_TARGET_HAS_roti_vec;
1931 case INDEX_op_rotls_vec:
1932 return have_vec && TCG_TARGET_HAS_rots_vec;
1933 case INDEX_op_rotlv_vec:
1934 case INDEX_op_rotrv_vec:
1935 return have_vec && TCG_TARGET_HAS_rotv_vec;
1936 case INDEX_op_ssadd_vec:
1937 case INDEX_op_usadd_vec:
1938 case INDEX_op_sssub_vec:
1939 case INDEX_op_ussub_vec:
1940 return have_vec && TCG_TARGET_HAS_sat_vec;
1941 case INDEX_op_smin_vec:
1942 case INDEX_op_umin_vec:
1943 case INDEX_op_smax_vec:
1944 case INDEX_op_umax_vec:
1945 return have_vec && TCG_TARGET_HAS_minmax_vec;
1946 case INDEX_op_bitsel_vec:
1947 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1948 case INDEX_op_cmpsel_vec:
1949 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1952 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1957 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1958 and endian swap. Maybe it would be better to do the alignment
1959 and endian swap in tcg_reg_alloc_call(). */
1960 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1962 int i, real_args, nb_rets, pi;
1963 unsigned sizemask, flags;
1964 TCGHelperInfo *info;
1967 info = g_hash_table_lookup(helper_table, (gpointer)func);
1968 flags = info->flags;
1969 sizemask = info->sizemask;
1971 #ifdef CONFIG_PLUGIN
1972 /* detect non-plugin helpers */
1973 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1974 tcg_ctx->plugin_insn->calls_helpers = true;
1978 #if defined(__sparc__) && !defined(__arch64__) \
1979 && !defined(CONFIG_TCG_INTERPRETER)
1980 /* We have 64-bit values in one register, but need to pass as two
1981 separate parameters. Split them. */
1982 int orig_sizemask = sizemask;
1983 int orig_nargs = nargs;
1984 TCGv_i64 retl, reth;
1985 TCGTemp *split_args[MAX_OPC_PARAM];
1989 if (sizemask != 0) {
1990 for (i = real_args = 0; i < nargs; ++i) {
1991 int is_64bit = sizemask & (1 << (i+1)*2);
1993 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1994 TCGv_i32 h = tcg_temp_new_i32();
1995 TCGv_i32 l = tcg_temp_new_i32();
1996 tcg_gen_extr_i64_i32(l, h, orig);
1997 split_args[real_args++] = tcgv_i32_temp(h);
1998 split_args[real_args++] = tcgv_i32_temp(l);
2000 split_args[real_args++] = args[i];
2007 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2008 for (i = 0; i < nargs; ++i) {
2009 int is_64bit = sizemask & (1 << (i+1)*2);
2010 int is_signed = sizemask & (2 << (i+1)*2);
2012 TCGv_i64 temp = tcg_temp_new_i64();
2013 TCGv_i64 orig = temp_tcgv_i64(args[i]);
2015 tcg_gen_ext32s_i64(temp, orig);
2017 tcg_gen_ext32u_i64(temp, orig);
2019 args[i] = tcgv_i64_temp(temp);
2022 #endif /* TCG_TARGET_EXTEND_ARGS */
2024 op = tcg_emit_op(INDEX_op_call);
2028 #if defined(__sparc__) && !defined(__arch64__) \
2029 && !defined(CONFIG_TCG_INTERPRETER)
2030 if (orig_sizemask & 1) {
2031 /* The 32-bit ABI is going to return the 64-bit value in
2032 the %o0/%o1 register pair. Prepare for this by using
2033 two return temporaries, and reassemble below. */
2034 retl = tcg_temp_new_i64();
2035 reth = tcg_temp_new_i64();
2036 op->args[pi++] = tcgv_i64_arg(reth);
2037 op->args[pi++] = tcgv_i64_arg(retl);
2040 op->args[pi++] = temp_arg(ret);
2044 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2045 #ifdef HOST_WORDS_BIGENDIAN
2046 op->args[pi++] = temp_arg(ret + 1);
2047 op->args[pi++] = temp_arg(ret);
2049 op->args[pi++] = temp_arg(ret);
2050 op->args[pi++] = temp_arg(ret + 1);
2054 op->args[pi++] = temp_arg(ret);
2061 TCGOP_CALLO(op) = nb_rets;
2064 for (i = 0; i < nargs; i++) {
2065 int is_64bit = sizemask & (1 << (i+1)*2);
2066 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2067 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2068 /* some targets want aligned 64 bit args */
2069 if (real_args & 1) {
2070 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2074 /* If stack grows up, then we will be placing successive
2075 arguments at lower addresses, which means we need to
2076 reverse the order compared to how we would normally
2077 treat either big or little-endian. For those arguments
2078 that will wind up in registers, this still works for
2079 HPPA (the only current STACK_GROWSUP target) since the
2080 argument registers are *also* allocated in decreasing
2081 order. If another such target is added, this logic may
2082 have to get more complicated to differentiate between
2083 stack arguments and register arguments. */
2084 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2085 op->args[pi++] = temp_arg(args[i] + 1);
2086 op->args[pi++] = temp_arg(args[i]);
2088 op->args[pi++] = temp_arg(args[i]);
2089 op->args[pi++] = temp_arg(args[i] + 1);
2095 op->args[pi++] = temp_arg(args[i]);
2098 op->args[pi++] = (uintptr_t)func;
2099 op->args[pi++] = flags;
2100 TCGOP_CALLI(op) = real_args;
2102 /* Make sure the fields didn't overflow. */
2103 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2104 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2106 #if defined(__sparc__) && !defined(__arch64__) \
2107 && !defined(CONFIG_TCG_INTERPRETER)
2108 /* Free all of the parts we allocated above. */
2109 for (i = real_args = 0; i < orig_nargs; ++i) {
2110 int is_64bit = orig_sizemask & (1 << (i+1)*2);
2112 tcg_temp_free_internal(args[real_args++]);
2113 tcg_temp_free_internal(args[real_args++]);
2118 if (orig_sizemask & 1) {
2119 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2120 Note that describing these as TCGv_i64 eliminates an unnecessary
2121 zero-extension that tcg_gen_concat_i32_i64 would create. */
2122 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2123 tcg_temp_free_i64(retl);
2124 tcg_temp_free_i64(reth);
2126 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2127 for (i = 0; i < nargs; ++i) {
2128 int is_64bit = sizemask & (1 << (i+1)*2);
2130 tcg_temp_free_internal(args[i]);
2133 #endif /* TCG_TARGET_EXTEND_ARGS */
2136 static void tcg_reg_alloc_start(TCGContext *s)
2140 for (i = 0, n = s->nb_temps; i < n; i++) {
2141 TCGTemp *ts = &s->temps[i];
2142 TCGTempVal val = TEMP_VAL_MEM;
2146 val = TEMP_VAL_CONST;
2154 val = TEMP_VAL_DEAD;
2157 ts->mem_allocated = 0;
2160 g_assert_not_reached();
2165 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2168 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2171 int idx = temp_idx(ts);
2176 pstrcpy(buf, buf_size, ts->name);
2179 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2182 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2187 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2189 #if TCG_TARGET_REG_BITS > 32
2191 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2197 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2198 64 << (ts->type - TCG_TYPE_V64), ts->val);
2201 g_assert_not_reached();
2208 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2209 int buf_size, TCGArg arg)
2211 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2214 /* Find helper name. */
2215 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2217 const char *ret = NULL;
2219 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2227 static const char * const cond_name[] =
2229 [TCG_COND_NEVER] = "never",
2230 [TCG_COND_ALWAYS] = "always",
2231 [TCG_COND_EQ] = "eq",
2232 [TCG_COND_NE] = "ne",
2233 [TCG_COND_LT] = "lt",
2234 [TCG_COND_GE] = "ge",
2235 [TCG_COND_LE] = "le",
2236 [TCG_COND_GT] = "gt",
2237 [TCG_COND_LTU] = "ltu",
2238 [TCG_COND_GEU] = "geu",
2239 [TCG_COND_LEU] = "leu",
2240 [TCG_COND_GTU] = "gtu"
2243 static const char * const ldst_name[] =
2259 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2260 #ifdef TARGET_ALIGNED_ONLY
2261 [MO_UNALN >> MO_ASHIFT] = "un+",
2262 [MO_ALIGN >> MO_ASHIFT] = "",
2264 [MO_UNALN >> MO_ASHIFT] = "",
2265 [MO_ALIGN >> MO_ASHIFT] = "al+",
2267 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2268 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2269 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2270 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2271 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2272 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2275 static inline bool tcg_regset_single(TCGRegSet d)
2277 return (d & (d - 1)) == 0;
2280 static inline TCGReg tcg_regset_first(TCGRegSet d)
2282 if (TCG_TARGET_NB_REGS <= 32) {
2289 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2294 QTAILQ_FOREACH(op, &s->ops, link) {
2295 int i, k, nb_oargs, nb_iargs, nb_cargs;
2296 const TCGOpDef *def;
2301 def = &tcg_op_defs[c];
2303 if (c == INDEX_op_insn_start) {
2305 col += qemu_log("\n ----");
2307 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2309 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2310 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2314 col += qemu_log(" " TARGET_FMT_lx, a);
2316 } else if (c == INDEX_op_call) {
2317 /* variable number of arguments */
2318 nb_oargs = TCGOP_CALLO(op);
2319 nb_iargs = TCGOP_CALLI(op);
2320 nb_cargs = def->nb_cargs;
2322 /* function name, flags, out args */
2323 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2324 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2325 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2326 for (i = 0; i < nb_oargs; i++) {
2327 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2330 for (i = 0; i < nb_iargs; i++) {
2331 TCGArg arg = op->args[nb_oargs + i];
2332 const char *t = "<dummy>";
2333 if (arg != TCG_CALL_DUMMY_ARG) {
2334 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2336 col += qemu_log(",%s", t);
2339 col += qemu_log(" %s ", def->name);
2341 nb_oargs = def->nb_oargs;
2342 nb_iargs = def->nb_iargs;
2343 nb_cargs = def->nb_cargs;
2345 if (def->flags & TCG_OPF_VECTOR) {
2346 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2347 8 << TCGOP_VECE(op));
2351 for (i = 0; i < nb_oargs; i++) {
2353 col += qemu_log(",");
2355 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2358 for (i = 0; i < nb_iargs; i++) {
2360 col += qemu_log(",");
2362 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2366 case INDEX_op_brcond_i32:
2367 case INDEX_op_setcond_i32:
2368 case INDEX_op_movcond_i32:
2369 case INDEX_op_brcond2_i32:
2370 case INDEX_op_setcond2_i32:
2371 case INDEX_op_brcond_i64:
2372 case INDEX_op_setcond_i64:
2373 case INDEX_op_movcond_i64:
2374 case INDEX_op_cmp_vec:
2375 case INDEX_op_cmpsel_vec:
2376 if (op->args[k] < ARRAY_SIZE(cond_name)
2377 && cond_name[op->args[k]]) {
2378 col += qemu_log(",%s", cond_name[op->args[k++]]);
2380 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2384 case INDEX_op_qemu_ld_i32:
2385 case INDEX_op_qemu_st_i32:
2386 case INDEX_op_qemu_st8_i32:
2387 case INDEX_op_qemu_ld_i64:
2388 case INDEX_op_qemu_st_i64:
2390 TCGMemOpIdx oi = op->args[k++];
2391 MemOp op = get_memop(oi);
2392 unsigned ix = get_mmuidx(oi);
2394 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2395 col += qemu_log(",$0x%x,%u", op, ix);
2397 const char *s_al, *s_op;
2398 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2399 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2400 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2410 case INDEX_op_set_label:
2412 case INDEX_op_brcond_i32:
2413 case INDEX_op_brcond_i64:
2414 case INDEX_op_brcond2_i32:
2415 col += qemu_log("%s$L%d", k ? "," : "",
2416 arg_label(op->args[k])->id);
2422 for (; i < nb_cargs; i++, k++) {
2423 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2427 if (have_prefs || op->life) {
2429 QemuLogFile *logfile;
2432 logfile = qatomic_rcu_read(&qemu_logfile);
2434 for (; col < 40; ++col) {
2435 putc(' ', logfile->fd);
2442 unsigned life = op->life;
2444 if (life & (SYNC_ARG * 3)) {
2446 for (i = 0; i < 2; ++i) {
2447 if (life & (SYNC_ARG << i)) {
2455 for (i = 0; life; ++i, life >>= 1) {
2464 for (i = 0; i < nb_oargs; ++i) {
2465 TCGRegSet set = op->output_pref[i];
2474 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2476 #ifdef CONFIG_DEBUG_TCG
2477 } else if (tcg_regset_single(set)) {
2478 TCGReg reg = tcg_regset_first(set);
2479 qemu_log("%s", tcg_target_reg_names[reg]);
2481 } else if (TCG_TARGET_NB_REGS <= 32) {
2482 qemu_log("%#x", (uint32_t)set);
2484 qemu_log("%#" PRIx64, (uint64_t)set);
2493 /* we give more priority to constraints with less registers */
2494 static int get_constraint_priority(const TCGOpDef *def, int k)
2496 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2499 if (arg_ct->oalias) {
2500 /* an alias is equivalent to a single register */
2503 n = ctpop64(arg_ct->regs);
2505 return TCG_TARGET_NB_REGS - n + 1;
2508 /* sort from highest priority to lowest */
2509 static void sort_constraints(TCGOpDef *def, int start, int n)
2512 TCGArgConstraint *a = def->args_ct;
2514 for (i = 0; i < n; i++) {
2515 a[start + i].sort_index = start + i;
2520 for (i = 0; i < n - 1; i++) {
2521 for (j = i + 1; j < n; j++) {
2522 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2523 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2525 int tmp = a[start + i].sort_index;
2526 a[start + i].sort_index = a[start + j].sort_index;
2527 a[start + j].sort_index = tmp;
2533 static void process_op_defs(TCGContext *s)
2537 for (op = 0; op < NB_OPS; op++) {
2538 TCGOpDef *def = &tcg_op_defs[op];
2539 const TCGTargetOpDef *tdefs;
2542 if (def->flags & TCG_OPF_NOT_PRESENT) {
2546 nb_args = def->nb_iargs + def->nb_oargs;
2552 * Macro magic should make it impossible, but double-check that
2553 * the array index is in range. Since the signness of an enum
2554 * is implementation defined, force the result to unsigned.
2556 unsigned con_set = tcg_target_op_def(op);
2557 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2558 tdefs = &constraint_sets[con_set];
2560 for (i = 0; i < nb_args; i++) {
2561 const char *ct_str = tdefs->args_ct_str[i];
2562 /* Incomplete TCGTargetOpDef entry. */
2563 tcg_debug_assert(ct_str != NULL);
2565 while (*ct_str != '\0') {
2569 int oarg = *ct_str - '0';
2570 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2571 tcg_debug_assert(oarg < def->nb_oargs);
2572 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2573 def->args_ct[i] = def->args_ct[oarg];
2574 /* The output sets oalias. */
2575 def->args_ct[oarg].oalias = true;
2576 def->args_ct[oarg].alias_index = i;
2577 /* The input sets ialias. */
2578 def->args_ct[i].ialias = true;
2579 def->args_ct[i].alias_index = oarg;
2584 def->args_ct[i].newreg = true;
2588 def->args_ct[i].ct |= TCG_CT_CONST;
2592 /* Include all of the target-specific constraints. */
2595 #define CONST(CASE, MASK) \
2596 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2597 #define REGS(CASE, MASK) \
2598 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2600 #include "tcg-target-con-str.h"
2605 /* Typo in TCGTargetOpDef constraint. */
2606 g_assert_not_reached();
2611 /* TCGTargetOpDef entry with too much information? */
2612 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2614 /* sort the constraints (XXX: this is just an heuristic) */
2615 sort_constraints(def, 0, def->nb_oargs);
2616 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2620 void tcg_op_remove(TCGContext *s, TCGOp *op)
2626 label = arg_label(op->args[0]);
2629 case INDEX_op_brcond_i32:
2630 case INDEX_op_brcond_i64:
2631 label = arg_label(op->args[3]);
2634 case INDEX_op_brcond2_i32:
2635 label = arg_label(op->args[5]);
2642 QTAILQ_REMOVE(&s->ops, op, link);
2643 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2646 #ifdef CONFIG_PROFILER
2647 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2651 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2653 TCGContext *s = tcg_ctx;
2656 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2657 op = tcg_malloc(sizeof(TCGOp));
2659 op = QTAILQ_FIRST(&s->free_ops);
2660 QTAILQ_REMOVE(&s->free_ops, op, link);
2662 memset(op, 0, offsetof(TCGOp, link));
2669 TCGOp *tcg_emit_op(TCGOpcode opc)
2671 TCGOp *op = tcg_op_alloc(opc);
2672 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2676 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2678 TCGOp *new_op = tcg_op_alloc(opc);
2679 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2683 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2685 TCGOp *new_op = tcg_op_alloc(opc);
2686 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2690 /* Reachable analysis : remove unreachable code. */
2691 static void reachable_code_pass(TCGContext *s)
2693 TCGOp *op, *op_next;
2696 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2702 case INDEX_op_set_label:
2703 label = arg_label(op->args[0]);
2704 if (label->refs == 0) {
2706 * While there is an occasional backward branch, virtually
2707 * all branches generated by the translators are forward.
2708 * Which means that generally we will have already removed
2709 * all references to the label that will be, and there is
2710 * little to be gained by iterating.
2714 /* Once we see a label, insns become live again. */
2719 * Optimization can fold conditional branches to unconditional.
2720 * If we find a label with one reference which is preceded by
2721 * an unconditional branch to it, remove both. This needed to
2722 * wait until the dead code in between them was removed.
2724 if (label->refs == 1) {
2725 TCGOp *op_prev = QTAILQ_PREV(op, link);
2726 if (op_prev->opc == INDEX_op_br &&
2727 label == arg_label(op_prev->args[0])) {
2728 tcg_op_remove(s, op_prev);
2736 case INDEX_op_exit_tb:
2737 case INDEX_op_goto_ptr:
2738 /* Unconditional branches; everything following is dead. */
2743 /* Notice noreturn helper calls, raising exceptions. */
2744 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2745 if (call_flags & TCG_CALL_NO_RETURN) {
2750 case INDEX_op_insn_start:
2751 /* Never remove -- we need to keep these for unwind. */
2760 tcg_op_remove(s, op);
2768 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2769 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2771 /* For liveness_pass_1, the register preferences for a given temp. */
2772 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2774 return ts->state_ptr;
2777 /* For liveness_pass_1, reset the preferences for a given temp to the
2778 * maximal regset for its type.
2780 static inline void la_reset_pref(TCGTemp *ts)
2783 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2786 /* liveness analysis: end of function: all temps are dead, and globals
2787 should be in memory. */
2788 static void la_func_end(TCGContext *s, int ng, int nt)
2792 for (i = 0; i < ng; ++i) {
2793 s->temps[i].state = TS_DEAD | TS_MEM;
2794 la_reset_pref(&s->temps[i]);
2796 for (i = ng; i < nt; ++i) {
2797 s->temps[i].state = TS_DEAD;
2798 la_reset_pref(&s->temps[i]);
2802 /* liveness analysis: end of basic block: all temps are dead, globals
2803 and local temps should be in memory. */
2804 static void la_bb_end(TCGContext *s, int ng, int nt)
2808 for (i = 0; i < nt; ++i) {
2809 TCGTemp *ts = &s->temps[i];
2816 state = TS_DEAD | TS_MEM;
2823 g_assert_not_reached();
2830 /* liveness analysis: sync globals back to memory. */
2831 static void la_global_sync(TCGContext *s, int ng)
2835 for (i = 0; i < ng; ++i) {
2836 int state = s->temps[i].state;
2837 s->temps[i].state = state | TS_MEM;
2838 if (state == TS_DEAD) {
2839 /* If the global was previously dead, reset prefs. */
2840 la_reset_pref(&s->temps[i]);
2846 * liveness analysis: conditional branch: all temps are dead,
2847 * globals and local temps should be synced.
2849 static void la_bb_sync(TCGContext *s, int ng, int nt)
2851 la_global_sync(s, ng);
2853 for (int i = ng; i < nt; ++i) {
2854 TCGTemp *ts = &s->temps[i];
2860 ts->state = state | TS_MEM;
2861 if (state != TS_DEAD) {
2866 s->temps[i].state = TS_DEAD;
2871 g_assert_not_reached();
2873 la_reset_pref(&s->temps[i]);
2877 /* liveness analysis: sync globals back to memory and kill. */
2878 static void la_global_kill(TCGContext *s, int ng)
2882 for (i = 0; i < ng; i++) {
2883 s->temps[i].state = TS_DEAD | TS_MEM;
2884 la_reset_pref(&s->temps[i]);
2888 /* liveness analysis: note live globals crossing calls. */
2889 static void la_cross_call(TCGContext *s, int nt)
2891 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2894 for (i = 0; i < nt; i++) {
2895 TCGTemp *ts = &s->temps[i];
2896 if (!(ts->state & TS_DEAD)) {
2897 TCGRegSet *pset = la_temp_pref(ts);
2898 TCGRegSet set = *pset;
2901 /* If the combination is not possible, restart. */
2903 set = tcg_target_available_regs[ts->type] & mask;
2910 /* Liveness analysis : update the opc_arg_life array to tell if a
2911 given input arguments is dead. Instructions updating dead
2912 temporaries are removed. */
2913 static void liveness_pass_1(TCGContext *s)
2915 int nb_globals = s->nb_globals;
2916 int nb_temps = s->nb_temps;
2917 TCGOp *op, *op_prev;
2921 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2922 for (i = 0; i < nb_temps; ++i) {
2923 s->temps[i].state_ptr = prefs + i;
2926 /* ??? Should be redundant with the exit_tb that ends the TB. */
2927 la_func_end(s, nb_globals, nb_temps);
2929 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2930 int nb_iargs, nb_oargs;
2931 TCGOpcode opc_new, opc_new2;
2933 TCGLifeData arg_life = 0;
2935 TCGOpcode opc = op->opc;
2936 const TCGOpDef *def = &tcg_op_defs[opc];
2944 nb_oargs = TCGOP_CALLO(op);
2945 nb_iargs = TCGOP_CALLI(op);
2946 call_flags = op->args[nb_oargs + nb_iargs + 1];
2948 /* pure functions can be removed if their result is unused */
2949 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2950 for (i = 0; i < nb_oargs; i++) {
2951 ts = arg_temp(op->args[i]);
2952 if (ts->state != TS_DEAD) {
2953 goto do_not_remove_call;
2960 /* Output args are dead. */
2961 for (i = 0; i < nb_oargs; i++) {
2962 ts = arg_temp(op->args[i]);
2963 if (ts->state & TS_DEAD) {
2964 arg_life |= DEAD_ARG << i;
2966 if (ts->state & TS_MEM) {
2967 arg_life |= SYNC_ARG << i;
2969 ts->state = TS_DEAD;
2972 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2973 op->output_pref[i] = 0;
2976 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2977 TCG_CALL_NO_READ_GLOBALS))) {
2978 la_global_kill(s, nb_globals);
2979 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2980 la_global_sync(s, nb_globals);
2983 /* Record arguments that die in this helper. */
2984 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2985 ts = arg_temp(op->args[i]);
2986 if (ts && ts->state & TS_DEAD) {
2987 arg_life |= DEAD_ARG << i;
2991 /* For all live registers, remove call-clobbered prefs. */
2992 la_cross_call(s, nb_temps);
2994 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2996 /* Input arguments are live for preceding opcodes. */
2997 for (i = 0; i < nb_iargs; i++) {
2998 ts = arg_temp(op->args[i + nb_oargs]);
2999 if (ts && ts->state & TS_DEAD) {
3000 /* For those arguments that die, and will be allocated
3001 * in registers, clear the register set for that arg,
3002 * to be filled in below. For args that will be on
3003 * the stack, reset to any available reg.
3006 = (i < nb_call_regs ? 0 :
3007 tcg_target_available_regs[ts->type]);
3008 ts->state &= ~TS_DEAD;
3012 /* For each input argument, add its input register to prefs.
3013 If a temp is used once, this produces a single set bit. */
3014 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3015 ts = arg_temp(op->args[i + nb_oargs]);
3017 tcg_regset_set_reg(*la_temp_pref(ts),
3018 tcg_target_call_iarg_regs[i]);
3023 case INDEX_op_insn_start:
3025 case INDEX_op_discard:
3026 /* mark the temporary as dead */
3027 ts = arg_temp(op->args[0]);
3028 ts->state = TS_DEAD;
3032 case INDEX_op_add2_i32:
3033 opc_new = INDEX_op_add_i32;
3035 case INDEX_op_sub2_i32:
3036 opc_new = INDEX_op_sub_i32;
3038 case INDEX_op_add2_i64:
3039 opc_new = INDEX_op_add_i64;
3041 case INDEX_op_sub2_i64:
3042 opc_new = INDEX_op_sub_i64;
3046 /* Test if the high part of the operation is dead, but not
3047 the low part. The result can be optimized to a simple
3048 add or sub. This happens often for x86_64 guest when the
3049 cpu mode is set to 32 bit. */
3050 if (arg_temp(op->args[1])->state == TS_DEAD) {
3051 if (arg_temp(op->args[0])->state == TS_DEAD) {
3054 /* Replace the opcode and adjust the args in place,
3055 leaving 3 unused args at the end. */
3056 op->opc = opc = opc_new;
3057 op->args[1] = op->args[2];
3058 op->args[2] = op->args[4];
3059 /* Fall through and mark the single-word operation live. */
3065 case INDEX_op_mulu2_i32:
3066 opc_new = INDEX_op_mul_i32;
3067 opc_new2 = INDEX_op_muluh_i32;
3068 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3070 case INDEX_op_muls2_i32:
3071 opc_new = INDEX_op_mul_i32;
3072 opc_new2 = INDEX_op_mulsh_i32;
3073 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3075 case INDEX_op_mulu2_i64:
3076 opc_new = INDEX_op_mul_i64;
3077 opc_new2 = INDEX_op_muluh_i64;
3078 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3080 case INDEX_op_muls2_i64:
3081 opc_new = INDEX_op_mul_i64;
3082 opc_new2 = INDEX_op_mulsh_i64;
3083 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3088 if (arg_temp(op->args[1])->state == TS_DEAD) {
3089 if (arg_temp(op->args[0])->state == TS_DEAD) {
3090 /* Both parts of the operation are dead. */
3093 /* The high part of the operation is dead; generate the low. */
3094 op->opc = opc = opc_new;
3095 op->args[1] = op->args[2];
3096 op->args[2] = op->args[3];
3097 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3098 /* The low part of the operation is dead; generate the high. */
3099 op->opc = opc = opc_new2;
3100 op->args[0] = op->args[1];
3101 op->args[1] = op->args[2];
3102 op->args[2] = op->args[3];
3106 /* Mark the single-word operation live. */
3111 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3112 nb_iargs = def->nb_iargs;
3113 nb_oargs = def->nb_oargs;
3115 /* Test if the operation can be removed because all
3116 its outputs are dead. We assume that nb_oargs == 0
3117 implies side effects */
3118 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3119 for (i = 0; i < nb_oargs; i++) {
3120 if (arg_temp(op->args[i])->state != TS_DEAD) {
3129 tcg_op_remove(s, op);
3133 for (i = 0; i < nb_oargs; i++) {
3134 ts = arg_temp(op->args[i]);
3136 /* Remember the preference of the uses that followed. */
3137 op->output_pref[i] = *la_temp_pref(ts);
3139 /* Output args are dead. */
3140 if (ts->state & TS_DEAD) {
3141 arg_life |= DEAD_ARG << i;
3143 if (ts->state & TS_MEM) {
3144 arg_life |= SYNC_ARG << i;
3146 ts->state = TS_DEAD;
3150 /* If end of basic block, update. */
3151 if (def->flags & TCG_OPF_BB_EXIT) {
3152 la_func_end(s, nb_globals, nb_temps);
3153 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3154 la_bb_sync(s, nb_globals, nb_temps);
3155 } else if (def->flags & TCG_OPF_BB_END) {
3156 la_bb_end(s, nb_globals, nb_temps);
3157 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3158 la_global_sync(s, nb_globals);
3159 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3160 la_cross_call(s, nb_temps);
3164 /* Record arguments that die in this opcode. */
3165 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3166 ts = arg_temp(op->args[i]);
3167 if (ts->state & TS_DEAD) {
3168 arg_life |= DEAD_ARG << i;
3172 /* Input arguments are live for preceding opcodes. */
3173 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3174 ts = arg_temp(op->args[i]);
3175 if (ts->state & TS_DEAD) {
3176 /* For operands that were dead, initially allow
3177 all regs for the type. */
3178 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3179 ts->state &= ~TS_DEAD;
3183 /* Incorporate constraints for this operand. */
3185 case INDEX_op_mov_i32:
3186 case INDEX_op_mov_i64:
3187 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3188 have proper constraints. That said, special case
3189 moves to propagate preferences backward. */
3190 if (IS_DEAD_ARG(1)) {
3191 *la_temp_pref(arg_temp(op->args[0]))
3192 = *la_temp_pref(arg_temp(op->args[1]));
3197 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3198 const TCGArgConstraint *ct = &def->args_ct[i];
3199 TCGRegSet set, *pset;
3201 ts = arg_temp(op->args[i]);
3202 pset = la_temp_pref(ts);
3207 set &= op->output_pref[ct->alias_index];
3209 /* If the combination is not possible, restart. */
3219 op->life = arg_life;
3223 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3224 static bool liveness_pass_2(TCGContext *s)
3226 int nb_globals = s->nb_globals;
3228 bool changes = false;
3229 TCGOp *op, *op_next;
3231 /* Create a temporary for each indirect global. */
3232 for (i = 0; i < nb_globals; ++i) {
3233 TCGTemp *its = &s->temps[i];
3234 if (its->indirect_reg) {
3235 TCGTemp *dts = tcg_temp_alloc(s);
3236 dts->type = its->type;
3237 dts->base_type = its->base_type;
3238 its->state_ptr = dts;
3240 its->state_ptr = NULL;
3242 /* All globals begin dead. */
3243 its->state = TS_DEAD;
3245 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3246 TCGTemp *its = &s->temps[i];
3247 its->state_ptr = NULL;
3248 its->state = TS_DEAD;
3251 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3252 TCGOpcode opc = op->opc;
3253 const TCGOpDef *def = &tcg_op_defs[opc];
3254 TCGLifeData arg_life = op->life;
3255 int nb_iargs, nb_oargs, call_flags;
3256 TCGTemp *arg_ts, *dir_ts;
3258 if (opc == INDEX_op_call) {
3259 nb_oargs = TCGOP_CALLO(op);
3260 nb_iargs = TCGOP_CALLI(op);
3261 call_flags = op->args[nb_oargs + nb_iargs + 1];
3263 nb_iargs = def->nb_iargs;
3264 nb_oargs = def->nb_oargs;
3266 /* Set flags similar to how calls require. */
3267 if (def->flags & TCG_OPF_COND_BRANCH) {
3268 /* Like reading globals: sync_globals */
3269 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3270 } else if (def->flags & TCG_OPF_BB_END) {
3271 /* Like writing globals: save_globals */
3273 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3274 /* Like reading globals: sync_globals */
3275 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3277 /* No effect on globals. */
3278 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3279 TCG_CALL_NO_WRITE_GLOBALS);
3283 /* Make sure that input arguments are available. */
3284 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3285 arg_ts = arg_temp(op->args[i]);
3287 dir_ts = arg_ts->state_ptr;
3288 if (dir_ts && arg_ts->state == TS_DEAD) {
3289 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3292 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3294 lop->args[0] = temp_arg(dir_ts);
3295 lop->args[1] = temp_arg(arg_ts->mem_base);
3296 lop->args[2] = arg_ts->mem_offset;
3298 /* Loaded, but synced with memory. */
3299 arg_ts->state = TS_MEM;
3304 /* Perform input replacement, and mark inputs that became dead.
3305 No action is required except keeping temp_state up to date
3306 so that we reload when needed. */
3307 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3308 arg_ts = arg_temp(op->args[i]);
3310 dir_ts = arg_ts->state_ptr;
3312 op->args[i] = temp_arg(dir_ts);
3314 if (IS_DEAD_ARG(i)) {
3315 arg_ts->state = TS_DEAD;
3321 /* Liveness analysis should ensure that the following are
3322 all correct, for call sites and basic block end points. */
3323 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3325 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3326 for (i = 0; i < nb_globals; ++i) {
3327 /* Liveness should see that globals are synced back,
3328 that is, either TS_DEAD or TS_MEM. */
3329 arg_ts = &s->temps[i];
3330 tcg_debug_assert(arg_ts->state_ptr == 0
3331 || arg_ts->state != 0);
3334 for (i = 0; i < nb_globals; ++i) {
3335 /* Liveness should see that globals are saved back,
3336 that is, TS_DEAD, waiting to be reloaded. */
3337 arg_ts = &s->temps[i];
3338 tcg_debug_assert(arg_ts->state_ptr == 0
3339 || arg_ts->state == TS_DEAD);
3343 /* Outputs become available. */
3344 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3345 arg_ts = arg_temp(op->args[0]);
3346 dir_ts = arg_ts->state_ptr;
3348 op->args[0] = temp_arg(dir_ts);
3351 /* The output is now live and modified. */
3354 if (NEED_SYNC_ARG(0)) {
3355 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3358 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3359 TCGTemp *out_ts = dir_ts;
3361 if (IS_DEAD_ARG(0)) {
3362 out_ts = arg_temp(op->args[1]);
3363 arg_ts->state = TS_DEAD;
3364 tcg_op_remove(s, op);
3366 arg_ts->state = TS_MEM;
3369 sop->args[0] = temp_arg(out_ts);
3370 sop->args[1] = temp_arg(arg_ts->mem_base);
3371 sop->args[2] = arg_ts->mem_offset;
3373 tcg_debug_assert(!IS_DEAD_ARG(0));
3377 for (i = 0; i < nb_oargs; i++) {
3378 arg_ts = arg_temp(op->args[i]);
3379 dir_ts = arg_ts->state_ptr;
3383 op->args[i] = temp_arg(dir_ts);
3386 /* The output is now live and modified. */
3389 /* Sync outputs upon their last write. */
3390 if (NEED_SYNC_ARG(i)) {
3391 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3394 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3396 sop->args[0] = temp_arg(dir_ts);
3397 sop->args[1] = temp_arg(arg_ts->mem_base);
3398 sop->args[2] = arg_ts->mem_offset;
3400 arg_ts->state = TS_MEM;
3402 /* Drop outputs that are dead. */
3403 if (IS_DEAD_ARG(i)) {
3404 arg_ts->state = TS_DEAD;
3413 #ifdef CONFIG_DEBUG_TCG
3414 static void dump_regs(TCGContext *s)
3420 for(i = 0; i < s->nb_temps; i++) {
3422 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3423 switch(ts->val_type) {
3425 printf("%s", tcg_target_reg_names[ts->reg]);
3428 printf("%d(%s)", (int)ts->mem_offset,
3429 tcg_target_reg_names[ts->mem_base->reg]);
3431 case TEMP_VAL_CONST:
3432 printf("$0x%" PRIx64, ts->val);
3444 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3445 if (s->reg_to_temp[i] != NULL) {
3447 tcg_target_reg_names[i],
3448 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3453 static void check_regs(TCGContext *s)
3460 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3461 ts = s->reg_to_temp[reg];
3463 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3464 printf("Inconsistency for register %s:\n",
3465 tcg_target_reg_names[reg]);
3470 for (k = 0; k < s->nb_temps; k++) {
3472 if (ts->val_type == TEMP_VAL_REG
3473 && ts->kind != TEMP_FIXED
3474 && s->reg_to_temp[ts->reg] != ts) {
3475 printf("Inconsistency for temp %s:\n",
3476 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3478 printf("reg state:\n");
3486 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3488 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3489 /* Sparc64 stack is accessed with offset of 2047 */
3490 s->current_frame_offset = (s->current_frame_offset +
3491 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3492 ~(sizeof(tcg_target_long) - 1);
3494 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3498 ts->mem_offset = s->current_frame_offset;
3499 ts->mem_base = s->frame_temp;
3500 ts->mem_allocated = 1;
3501 s->current_frame_offset += sizeof(tcg_target_long);
3504 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3506 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3507 mark it free; otherwise mark it dead. */
3508 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3510 TCGTempVal new_type;
3517 new_type = TEMP_VAL_MEM;
3520 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3523 new_type = TEMP_VAL_CONST;
3526 g_assert_not_reached();
3528 if (ts->val_type == TEMP_VAL_REG) {
3529 s->reg_to_temp[ts->reg] = NULL;
3531 ts->val_type = new_type;
3534 /* Mark a temporary as dead. */
3535 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3537 temp_free_or_dead(s, ts, 1);
3540 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3541 registers needs to be allocated to store a constant. If 'free_or_dead'
3542 is non-zero, subsequently release the temporary; if it is positive, the
3543 temp is dead; if it is negative, the temp is free. */
3544 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3545 TCGRegSet preferred_regs, int free_or_dead)
3547 if (!temp_readonly(ts) && !ts->mem_coherent) {
3548 if (!ts->mem_allocated) {
3549 temp_allocate_frame(s, ts);
3551 switch (ts->val_type) {
3552 case TEMP_VAL_CONST:
3553 /* If we're going to free the temp immediately, then we won't
3554 require it later in a register, so attempt to store the
3555 constant to memory directly. */
3557 && tcg_out_sti(s, ts->type, ts->val,
3558 ts->mem_base->reg, ts->mem_offset)) {
3561 temp_load(s, ts, tcg_target_available_regs[ts->type],
3562 allocated_regs, preferred_regs);
3566 tcg_out_st(s, ts->type, ts->reg,
3567 ts->mem_base->reg, ts->mem_offset);
3577 ts->mem_coherent = 1;
3580 temp_free_or_dead(s, ts, free_or_dead);
3584 /* free register 'reg' by spilling the corresponding temporary if necessary */
3585 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3587 TCGTemp *ts = s->reg_to_temp[reg];
3589 temp_sync(s, ts, allocated_regs, 0, -1);
3595 * @required_regs: Set of registers in which we must allocate.
3596 * @allocated_regs: Set of registers which must be avoided.
3597 * @preferred_regs: Set of registers we should prefer.
3598 * @rev: True if we search the registers in "indirect" order.
3600 * The allocated register must be in @required_regs & ~@allocated_regs,
3601 * but if we can put it in @preferred_regs we may save a move later.
3603 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3604 TCGRegSet allocated_regs,
3605 TCGRegSet preferred_regs, bool rev)
3607 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3608 TCGRegSet reg_ct[2];
3611 reg_ct[1] = required_regs & ~allocated_regs;
3612 tcg_debug_assert(reg_ct[1] != 0);
3613 reg_ct[0] = reg_ct[1] & preferred_regs;
3615 /* Skip the preferred_regs option if it cannot be satisfied,
3616 or if the preference made no difference. */
3617 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3619 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3621 /* Try free registers, preferences first. */
3622 for (j = f; j < 2; j++) {
3623 TCGRegSet set = reg_ct[j];
3625 if (tcg_regset_single(set)) {
3626 /* One register in the set. */
3627 TCGReg reg = tcg_regset_first(set);
3628 if (s->reg_to_temp[reg] == NULL) {
3632 for (i = 0; i < n; i++) {
3633 TCGReg reg = order[i];
3634 if (s->reg_to_temp[reg] == NULL &&
3635 tcg_regset_test_reg(set, reg)) {
3642 /* We must spill something. */
3643 for (j = f; j < 2; j++) {
3644 TCGRegSet set = reg_ct[j];
3646 if (tcg_regset_single(set)) {
3647 /* One register in the set. */
3648 TCGReg reg = tcg_regset_first(set);
3649 tcg_reg_free(s, reg, allocated_regs);
3652 for (i = 0; i < n; i++) {
3653 TCGReg reg = order[i];
3654 if (tcg_regset_test_reg(set, reg)) {
3655 tcg_reg_free(s, reg, allocated_regs);
3665 /* Make sure the temporary is in a register. If needed, allocate the register
3666 from DESIRED while avoiding ALLOCATED. */
3667 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3668 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3672 switch (ts->val_type) {
3675 case TEMP_VAL_CONST:
3676 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3677 preferred_regs, ts->indirect_base);
3678 if (ts->type <= TCG_TYPE_I64) {
3679 tcg_out_movi(s, ts->type, reg, ts->val);
3681 uint64_t val = ts->val;
3685 * Find the minimal vector element that matches the constant.
3686 * The targets will, in general, have to do this search anyway,
3687 * do this generically.
3689 if (val == dup_const(MO_8, val)) {
3691 } else if (val == dup_const(MO_16, val)) {
3693 } else if (val == dup_const(MO_32, val)) {
3697 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3699 ts->mem_coherent = 0;
3702 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3703 preferred_regs, ts->indirect_base);
3704 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3705 ts->mem_coherent = 1;
3712 ts->val_type = TEMP_VAL_REG;
3713 s->reg_to_temp[reg] = ts;
3716 /* Save a temporary to memory. 'allocated_regs' is used in case a
3717 temporary registers needs to be allocated to store a constant. */
3718 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3720 /* The liveness analysis already ensures that globals are back
3721 in memory. Keep an tcg_debug_assert for safety. */
3722 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3725 /* save globals to their canonical location and assume they can be
3726 modified be the following code. 'allocated_regs' is used in case a
3727 temporary registers needs to be allocated to store a constant. */
3728 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3732 for (i = 0, n = s->nb_globals; i < n; i++) {
3733 temp_save(s, &s->temps[i], allocated_regs);
3737 /* sync globals to their canonical location and assume they can be
3738 read by the following code. 'allocated_regs' is used in case a
3739 temporary registers needs to be allocated to store a constant. */
3740 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3744 for (i = 0, n = s->nb_globals; i < n; i++) {
3745 TCGTemp *ts = &s->temps[i];
3746 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3747 || ts->kind == TEMP_FIXED
3748 || ts->mem_coherent);
3752 /* at the end of a basic block, we assume all temporaries are dead and
3753 all globals are stored at their canonical location. */
3754 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3758 for (i = s->nb_globals; i < s->nb_temps; i++) {
3759 TCGTemp *ts = &s->temps[i];
3763 temp_save(s, ts, allocated_regs);
3766 /* The liveness analysis already ensures that temps are dead.
3767 Keep an tcg_debug_assert for safety. */
3768 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3771 /* Similarly, we should have freed any allocated register. */
3772 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3775 g_assert_not_reached();
3779 save_globals(s, allocated_regs);
3783 * At a conditional branch, we assume all temporaries are dead and
3784 * all globals and local temps are synced to their location.
3786 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3788 sync_globals(s, allocated_regs);
3790 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3791 TCGTemp *ts = &s->temps[i];
3793 * The liveness analysis already ensures that temps are dead.
3794 * Keep tcg_debug_asserts for safety.
3798 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3801 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3806 g_assert_not_reached();
3812 * Specialized code generation for INDEX_op_mov_* with a constant.
3814 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3815 tcg_target_ulong val, TCGLifeData arg_life,
3816 TCGRegSet preferred_regs)
3818 /* ENV should not be modified. */
3819 tcg_debug_assert(!temp_readonly(ots));
3821 /* The movi is not explicitly generated here. */
3822 if (ots->val_type == TEMP_VAL_REG) {
3823 s->reg_to_temp[ots->reg] = NULL;
3825 ots->val_type = TEMP_VAL_CONST;
3827 ots->mem_coherent = 0;
3828 if (NEED_SYNC_ARG(0)) {
3829 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3830 } else if (IS_DEAD_ARG(0)) {
3836 * Specialized code generation for INDEX_op_mov_*.
3838 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3840 const TCGLifeData arg_life = op->life;
3841 TCGRegSet allocated_regs, preferred_regs;
3843 TCGType otype, itype;
3845 allocated_regs = s->reserved_regs;
3846 preferred_regs = op->output_pref[0];
3847 ots = arg_temp(op->args[0]);
3848 ts = arg_temp(op->args[1]);
3850 /* ENV should not be modified. */
3851 tcg_debug_assert(!temp_readonly(ots));
3853 /* Note that otype != itype for no-op truncation. */
3857 if (ts->val_type == TEMP_VAL_CONST) {
3858 /* propagate constant or generate sti */
3859 tcg_target_ulong val = ts->val;
3860 if (IS_DEAD_ARG(1)) {
3863 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3867 /* If the source value is in memory we're going to be forced
3868 to have it in a register in order to perform the copy. Copy
3869 the SOURCE value into its own register first, that way we
3870 don't have to reload SOURCE the next time it is used. */
3871 if (ts->val_type == TEMP_VAL_MEM) {
3872 temp_load(s, ts, tcg_target_available_regs[itype],
3873 allocated_regs, preferred_regs);
3876 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3877 if (IS_DEAD_ARG(0)) {
3878 /* mov to a non-saved dead register makes no sense (even with
3879 liveness analysis disabled). */
3880 tcg_debug_assert(NEED_SYNC_ARG(0));
3881 if (!ots->mem_allocated) {
3882 temp_allocate_frame(s, ots);
3884 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3885 if (IS_DEAD_ARG(1)) {
3890 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3891 /* the mov can be suppressed */
3892 if (ots->val_type == TEMP_VAL_REG) {
3893 s->reg_to_temp[ots->reg] = NULL;
3898 if (ots->val_type != TEMP_VAL_REG) {
3899 /* When allocating a new register, make sure to not spill the
3901 tcg_regset_set_reg(allocated_regs, ts->reg);
3902 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3903 allocated_regs, preferred_regs,
3904 ots->indirect_base);
3906 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3908 * Cross register class move not supported.
3909 * Store the source register into the destination slot
3910 * and leave the destination temp as TEMP_VAL_MEM.
3912 assert(!temp_readonly(ots));
3913 if (!ts->mem_allocated) {
3914 temp_allocate_frame(s, ots);
3916 tcg_out_st(s, ts->type, ts->reg,
3917 ots->mem_base->reg, ots->mem_offset);
3918 ots->mem_coherent = 1;
3919 temp_free_or_dead(s, ots, -1);
3923 ots->val_type = TEMP_VAL_REG;
3924 ots->mem_coherent = 0;
3925 s->reg_to_temp[ots->reg] = ots;
3926 if (NEED_SYNC_ARG(0)) {
3927 temp_sync(s, ots, allocated_regs, 0, 0);
3933 * Specialized code generation for INDEX_op_dup_vec.
3935 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3937 const TCGLifeData arg_life = op->life;
3938 TCGRegSet dup_out_regs, dup_in_regs;
3940 TCGType itype, vtype;
3941 intptr_t endian_fixup;
3945 ots = arg_temp(op->args[0]);
3946 its = arg_temp(op->args[1]);
3948 /* ENV should not be modified. */
3949 tcg_debug_assert(!temp_readonly(ots));
3952 vece = TCGOP_VECE(op);
3953 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3955 if (its->val_type == TEMP_VAL_CONST) {
3956 /* Propagate constant via movi -> dupi. */
3957 tcg_target_ulong val = its->val;
3958 if (IS_DEAD_ARG(1)) {
3961 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3965 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3966 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3968 /* Allocate the output register now. */
3969 if (ots->val_type != TEMP_VAL_REG) {
3970 TCGRegSet allocated_regs = s->reserved_regs;
3972 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3973 /* Make sure to not spill the input register. */
3974 tcg_regset_set_reg(allocated_regs, its->reg);
3976 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3977 op->output_pref[0], ots->indirect_base);
3978 ots->val_type = TEMP_VAL_REG;
3979 ots->mem_coherent = 0;
3980 s->reg_to_temp[ots->reg] = ots;
3983 switch (its->val_type) {
3986 * The dup constriaints must be broad, covering all possible VECE.
3987 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3988 * to fail, indicating that extra moves are required for that case.
3990 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3991 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3994 /* Try again from memory or a vector input register. */
3996 if (!its->mem_coherent) {
3998 * The input register is not synced, and so an extra store
3999 * would be required to use memory. Attempt an integer-vector
4000 * register move first. We do not have a TCGRegSet for this.
4002 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4005 /* Sync the temp back to its slot and load from there. */
4006 temp_sync(s, its, s->reserved_regs, 0, 0);
4011 #ifdef HOST_WORDS_BIGENDIAN
4012 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4013 endian_fixup -= 1 << vece;
4017 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4018 its->mem_offset + endian_fixup)) {
4021 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4025 g_assert_not_reached();
4028 /* We now have a vector input register, so dup must succeed. */
4029 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4030 tcg_debug_assert(ok);
4033 if (IS_DEAD_ARG(1)) {
4036 if (NEED_SYNC_ARG(0)) {
4037 temp_sync(s, ots, s->reserved_regs, 0, 0);
4039 if (IS_DEAD_ARG(0)) {
4044 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4046 const TCGLifeData arg_life = op->life;
4047 const TCGOpDef * const def = &tcg_op_defs[op->opc];
4048 TCGRegSet i_allocated_regs;
4049 TCGRegSet o_allocated_regs;
4050 int i, k, nb_iargs, nb_oargs;
4053 const TCGArgConstraint *arg_ct;
4055 TCGArg new_args[TCG_MAX_OP_ARGS];
4056 int const_args[TCG_MAX_OP_ARGS];
4058 nb_oargs = def->nb_oargs;
4059 nb_iargs = def->nb_iargs;
4061 /* copy constants */
4062 memcpy(new_args + nb_oargs + nb_iargs,
4063 op->args + nb_oargs + nb_iargs,
4064 sizeof(TCGArg) * def->nb_cargs);
4066 i_allocated_regs = s->reserved_regs;
4067 o_allocated_regs = s->reserved_regs;
4069 /* satisfy input constraints */
4070 for (k = 0; k < nb_iargs; k++) {
4071 TCGRegSet i_preferred_regs, o_preferred_regs;
4073 i = def->args_ct[nb_oargs + k].sort_index;
4075 arg_ct = &def->args_ct[i];
4078 if (ts->val_type == TEMP_VAL_CONST
4079 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4080 /* constant is OK for instruction */
4082 new_args[i] = ts->val;
4086 i_preferred_regs = o_preferred_regs = 0;
4087 if (arg_ct->ialias) {
4088 o_preferred_regs = op->output_pref[arg_ct->alias_index];
4091 * If the input is readonly, then it cannot also be an
4092 * output and aliased to itself. If the input is not
4093 * dead after the instruction, we must allocate a new
4094 * register and move it.
4096 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4097 goto allocate_in_reg;
4101 * Check if the current register has already been allocated
4102 * for another input aliased to an output.
4104 if (ts->val_type == TEMP_VAL_REG) {
4106 for (int k2 = 0; k2 < k; k2++) {
4107 int i2 = def->args_ct[nb_oargs + k2].sort_index;
4108 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4109 goto allocate_in_reg;
4113 i_preferred_regs = o_preferred_regs;
4116 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4119 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4122 * Allocate a new register matching the constraint
4123 * and move the temporary register into it.
4125 temp_load(s, ts, tcg_target_available_regs[ts->type],
4126 i_allocated_regs, 0);
4127 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4128 o_preferred_regs, ts->indirect_base);
4129 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4131 * Cross register class move not supported. Sync the
4132 * temp back to its slot and load from there.
4134 temp_sync(s, ts, i_allocated_regs, 0, 0);
4135 tcg_out_ld(s, ts->type, reg,
4136 ts->mem_base->reg, ts->mem_offset);
4141 tcg_regset_set_reg(i_allocated_regs, reg);
4144 /* mark dead temporaries and free the associated registers */
4145 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4146 if (IS_DEAD_ARG(i)) {
4147 temp_dead(s, arg_temp(op->args[i]));
4151 if (def->flags & TCG_OPF_COND_BRANCH) {
4152 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4153 } else if (def->flags & TCG_OPF_BB_END) {
4154 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4156 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4157 /* XXX: permit generic clobber register list ? */
4158 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4159 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4160 tcg_reg_free(s, i, i_allocated_regs);
4164 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4165 /* sync globals if the op has side effects and might trigger
4167 sync_globals(s, i_allocated_regs);
4170 /* satisfy the output constraints */
4171 for(k = 0; k < nb_oargs; k++) {
4172 i = def->args_ct[k].sort_index;
4174 arg_ct = &def->args_ct[i];
4177 /* ENV should not be modified. */
4178 tcg_debug_assert(!temp_readonly(ts));
4180 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4181 reg = new_args[arg_ct->alias_index];
4182 } else if (arg_ct->newreg) {
4183 reg = tcg_reg_alloc(s, arg_ct->regs,
4184 i_allocated_regs | o_allocated_regs,
4185 op->output_pref[k], ts->indirect_base);
4187 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4188 op->output_pref[k], ts->indirect_base);
4190 tcg_regset_set_reg(o_allocated_regs, reg);
4191 if (ts->val_type == TEMP_VAL_REG) {
4192 s->reg_to_temp[ts->reg] = NULL;
4194 ts->val_type = TEMP_VAL_REG;
4197 * Temp value is modified, so the value kept in memory is
4198 * potentially not the same.
4200 ts->mem_coherent = 0;
4201 s->reg_to_temp[reg] = ts;
4206 /* emit instruction */
4207 if (def->flags & TCG_OPF_VECTOR) {
4208 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4209 new_args, const_args);
4211 tcg_out_op(s, op->opc, new_args, const_args);
4214 /* move the outputs in the correct register if needed */
4215 for(i = 0; i < nb_oargs; i++) {
4216 ts = arg_temp(op->args[i]);
4218 /* ENV should not be modified. */
4219 tcg_debug_assert(!temp_readonly(ts));
4221 if (NEED_SYNC_ARG(i)) {
4222 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4223 } else if (IS_DEAD_ARG(i)) {
4229 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4231 const TCGLifeData arg_life = op->life;
4232 TCGTemp *ots, *itsl, *itsh;
4233 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4235 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4236 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4237 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4239 ots = arg_temp(op->args[0]);
4240 itsl = arg_temp(op->args[1]);
4241 itsh = arg_temp(op->args[2]);
4243 /* ENV should not be modified. */
4244 tcg_debug_assert(!temp_readonly(ots));
4246 /* Allocate the output register now. */
4247 if (ots->val_type != TEMP_VAL_REG) {
4248 TCGRegSet allocated_regs = s->reserved_regs;
4249 TCGRegSet dup_out_regs =
4250 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4252 /* Make sure to not spill the input registers. */
4253 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4254 tcg_regset_set_reg(allocated_regs, itsl->reg);
4256 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4257 tcg_regset_set_reg(allocated_regs, itsh->reg);
4260 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4261 op->output_pref[0], ots->indirect_base);
4262 ots->val_type = TEMP_VAL_REG;
4263 ots->mem_coherent = 0;
4264 s->reg_to_temp[ots->reg] = ots;
4267 /* Promote dup2 of immediates to dupi_vec. */
4268 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4269 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4272 if (val == dup_const(MO_8, val)) {
4274 } else if (val == dup_const(MO_16, val)) {
4276 } else if (val == dup_const(MO_32, val)) {
4280 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4284 /* If the two inputs form one 64-bit value, try dupm_vec. */
4285 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4286 if (!itsl->mem_coherent) {
4287 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4289 if (!itsh->mem_coherent) {
4290 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4292 #ifdef HOST_WORDS_BIGENDIAN
4293 TCGTemp *its = itsh;
4295 TCGTemp *its = itsl;
4297 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4298 its->mem_base->reg, its->mem_offset)) {
4303 /* Fall back to generic expansion. */
4307 if (IS_DEAD_ARG(1)) {
4310 if (IS_DEAD_ARG(2)) {
4313 if (NEED_SYNC_ARG(0)) {
4314 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4315 } else if (IS_DEAD_ARG(0)) {
4321 #ifdef TCG_TARGET_STACK_GROWSUP
4322 #define STACK_DIR(x) (-(x))
4324 #define STACK_DIR(x) (x)
4327 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4329 const int nb_oargs = TCGOP_CALLO(op);
4330 const int nb_iargs = TCGOP_CALLI(op);
4331 const TCGLifeData arg_life = op->life;
4332 int flags, nb_regs, i;
4336 intptr_t stack_offset;
4337 size_t call_stack_size;
4338 tcg_insn_unit *func_addr;
4340 TCGRegSet allocated_regs;
4342 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4343 flags = op->args[nb_oargs + nb_iargs + 1];
4345 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4346 if (nb_regs > nb_iargs) {
4350 /* assign stack slots first */
4351 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4352 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4353 ~(TCG_TARGET_STACK_ALIGN - 1);
4354 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4355 if (allocate_args) {
4356 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4357 preallocate call stack */
4361 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4362 for (i = nb_regs; i < nb_iargs; i++) {
4363 arg = op->args[nb_oargs + i];
4364 #ifdef TCG_TARGET_STACK_GROWSUP
4365 stack_offset -= sizeof(tcg_target_long);
4367 if (arg != TCG_CALL_DUMMY_ARG) {
4369 temp_load(s, ts, tcg_target_available_regs[ts->type],
4370 s->reserved_regs, 0);
4371 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4373 #ifndef TCG_TARGET_STACK_GROWSUP
4374 stack_offset += sizeof(tcg_target_long);
4378 /* assign input registers */
4379 allocated_regs = s->reserved_regs;
4380 for (i = 0; i < nb_regs; i++) {
4381 arg = op->args[nb_oargs + i];
4382 if (arg != TCG_CALL_DUMMY_ARG) {
4384 reg = tcg_target_call_iarg_regs[i];
4386 if (ts->val_type == TEMP_VAL_REG) {
4387 if (ts->reg != reg) {
4388 tcg_reg_free(s, reg, allocated_regs);
4389 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4391 * Cross register class move not supported. Sync the
4392 * temp back to its slot and load from there.
4394 temp_sync(s, ts, allocated_regs, 0, 0);
4395 tcg_out_ld(s, ts->type, reg,
4396 ts->mem_base->reg, ts->mem_offset);
4400 TCGRegSet arg_set = 0;
4402 tcg_reg_free(s, reg, allocated_regs);
4403 tcg_regset_set_reg(arg_set, reg);
4404 temp_load(s, ts, arg_set, allocated_regs, 0);
4407 tcg_regset_set_reg(allocated_regs, reg);
4411 /* mark dead temporaries and free the associated registers */
4412 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4413 if (IS_DEAD_ARG(i)) {
4414 temp_dead(s, arg_temp(op->args[i]));
4418 /* clobber call registers */
4419 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4420 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4421 tcg_reg_free(s, i, allocated_regs);
4425 /* Save globals if they might be written by the helper, sync them if
4426 they might be read. */
4427 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4429 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4430 sync_globals(s, allocated_regs);
4432 save_globals(s, allocated_regs);
4435 tcg_out_call(s, func_addr);
4437 /* assign output registers and emit moves if needed */
4438 for(i = 0; i < nb_oargs; i++) {
4442 /* ENV should not be modified. */
4443 tcg_debug_assert(!temp_readonly(ts));
4445 reg = tcg_target_call_oarg_regs[i];
4446 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4447 if (ts->val_type == TEMP_VAL_REG) {
4448 s->reg_to_temp[ts->reg] = NULL;
4450 ts->val_type = TEMP_VAL_REG;
4452 ts->mem_coherent = 0;
4453 s->reg_to_temp[reg] = ts;
4454 if (NEED_SYNC_ARG(i)) {
4455 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4456 } else if (IS_DEAD_ARG(i)) {
4462 #ifdef CONFIG_PROFILER
4464 /* avoid copy/paste errors */
4465 #define PROF_ADD(to, from, field) \
4467 (to)->field += qatomic_read(&((from)->field)); \
4470 #define PROF_MAX(to, from, field) \
4472 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4473 if (val__ > (to)->field) { \
4474 (to)->field = val__; \
4478 /* Pass in a zero'ed @prof */
4480 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4482 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4485 for (i = 0; i < n_ctxs; i++) {
4486 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4487 const TCGProfile *orig = &s->prof;
4490 PROF_ADD(prof, orig, cpu_exec_time);
4491 PROF_ADD(prof, orig, tb_count1);
4492 PROF_ADD(prof, orig, tb_count);
4493 PROF_ADD(prof, orig, op_count);
4494 PROF_MAX(prof, orig, op_count_max);
4495 PROF_ADD(prof, orig, temp_count);
4496 PROF_MAX(prof, orig, temp_count_max);
4497 PROF_ADD(prof, orig, del_op_count);
4498 PROF_ADD(prof, orig, code_in_len);
4499 PROF_ADD(prof, orig, code_out_len);
4500 PROF_ADD(prof, orig, search_out_len);
4501 PROF_ADD(prof, orig, interm_time);
4502 PROF_ADD(prof, orig, code_time);
4503 PROF_ADD(prof, orig, la_time);
4504 PROF_ADD(prof, orig, opt_time);
4505 PROF_ADD(prof, orig, restore_count);
4506 PROF_ADD(prof, orig, restore_time);
4511 for (i = 0; i < NB_OPS; i++) {
4512 PROF_ADD(prof, orig, table_op_count[i]);
4521 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4523 tcg_profile_snapshot(prof, true, false);
4526 static void tcg_profile_snapshot_table(TCGProfile *prof)
4528 tcg_profile_snapshot(prof, false, true);
4531 void tcg_dump_op_count(void)
4533 TCGProfile prof = {};
4536 tcg_profile_snapshot_table(&prof);
4537 for (i = 0; i < NB_OPS; i++) {
4538 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4539 prof.table_op_count[i]);
4543 int64_t tcg_cpu_exec_time(void)
4545 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4549 for (i = 0; i < n_ctxs; i++) {
4550 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4551 const TCGProfile *prof = &s->prof;
4553 ret += qatomic_read(&prof->cpu_exec_time);
4558 void tcg_dump_op_count(void)
4560 qemu_printf("[TCG profiler not compiled]\n");
4563 int64_t tcg_cpu_exec_time(void)
4565 error_report("%s: TCG profiler not compiled", __func__);
4571 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4573 #ifdef CONFIG_PROFILER
4574 TCGProfile *prof = &s->prof;
4579 #ifdef CONFIG_PROFILER
4583 QTAILQ_FOREACH(op, &s->ops, link) {
4586 qatomic_set(&prof->op_count, prof->op_count + n);
4587 if (n > prof->op_count_max) {
4588 qatomic_set(&prof->op_count_max, n);
4592 qatomic_set(&prof->temp_count, prof->temp_count + n);
4593 if (n > prof->temp_count_max) {
4594 qatomic_set(&prof->temp_count_max, n);
4600 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4601 && qemu_log_in_addr_range(tb->pc))) {
4602 FILE *logfile = qemu_log_lock();
4604 tcg_dump_ops(s, false);
4606 qemu_log_unlock(logfile);
4610 #ifdef CONFIG_DEBUG_TCG
4611 /* Ensure all labels referenced have been emitted. */
4616 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4617 if (unlikely(!l->present) && l->refs) {
4618 qemu_log_mask(CPU_LOG_TB_OP,
4619 "$L%d referenced but not present.\n", l->id);
4627 #ifdef CONFIG_PROFILER
4628 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4631 #ifdef USE_TCG_OPTIMIZATIONS
4635 #ifdef CONFIG_PROFILER
4636 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4637 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4640 reachable_code_pass(s);
4643 if (s->nb_indirects > 0) {
4645 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4646 && qemu_log_in_addr_range(tb->pc))) {
4647 FILE *logfile = qemu_log_lock();
4648 qemu_log("OP before indirect lowering:\n");
4649 tcg_dump_ops(s, false);
4651 qemu_log_unlock(logfile);
4654 /* Replace indirect temps with direct temps. */
4655 if (liveness_pass_2(s)) {
4656 /* If changes were made, re-run liveness. */
4661 #ifdef CONFIG_PROFILER
4662 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4666 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4667 && qemu_log_in_addr_range(tb->pc))) {
4668 FILE *logfile = qemu_log_lock();
4669 qemu_log("OP after optimization and liveness analysis:\n");
4670 tcg_dump_ops(s, true);
4672 qemu_log_unlock(logfile);
4676 tcg_reg_alloc_start(s);
4679 * Reset the buffer pointers when restarting after overflow.
4680 * TODO: Move this into translate-all.c with the rest of the
4681 * buffer management. Having only this done here is confusing.
4683 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4684 s->code_ptr = s->code_buf;
4686 #ifdef TCG_TARGET_NEED_LDST_LABELS
4687 QSIMPLEQ_INIT(&s->ldst_labels);
4689 #ifdef TCG_TARGET_NEED_POOL_LABELS
4690 s->pool_labels = NULL;
4694 QTAILQ_FOREACH(op, &s->ops, link) {
4695 TCGOpcode opc = op->opc;
4697 #ifdef CONFIG_PROFILER
4698 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4702 case INDEX_op_mov_i32:
4703 case INDEX_op_mov_i64:
4704 case INDEX_op_mov_vec:
4705 tcg_reg_alloc_mov(s, op);
4707 case INDEX_op_dup_vec:
4708 tcg_reg_alloc_dup(s, op);
4710 case INDEX_op_insn_start:
4711 if (num_insns >= 0) {
4712 size_t off = tcg_current_code_size(s);
4713 s->gen_insn_end_off[num_insns] = off;
4714 /* Assert that we do not overflow our stored offset. */
4715 assert(s->gen_insn_end_off[num_insns] == off);
4718 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4720 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4721 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4725 s->gen_insn_data[num_insns][i] = a;
4728 case INDEX_op_discard:
4729 temp_dead(s, arg_temp(op->args[0]));
4731 case INDEX_op_set_label:
4732 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4733 tcg_out_label(s, arg_label(op->args[0]));
4736 tcg_reg_alloc_call(s, op);
4738 case INDEX_op_dup2_vec:
4739 if (tcg_reg_alloc_dup2(s, op)) {
4744 /* Sanity check that we've not introduced any unhandled opcodes. */
4745 tcg_debug_assert(tcg_op_supported(opc));
4746 /* Note: in order to speed up the code, it would be much
4747 faster to have specialized register allocator functions for
4748 some common argument patterns */
4749 tcg_reg_alloc_op(s, op);
4752 #ifdef CONFIG_DEBUG_TCG
4755 /* Test for (pending) buffer overflow. The assumption is that any
4756 one operation beginning below the high water mark cannot overrun
4757 the buffer completely. Thus we can test for overflow after
4758 generating code without having to check during generation. */
4759 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4762 /* Test for TB overflow, as seen by gen_insn_end_off. */
4763 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4767 tcg_debug_assert(num_insns >= 0);
4768 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4770 /* Generate TB finalization at the end of block */
4771 #ifdef TCG_TARGET_NEED_LDST_LABELS
4772 i = tcg_out_ldst_finalize(s);
4777 #ifdef TCG_TARGET_NEED_POOL_LABELS
4778 i = tcg_out_pool_finalize(s);
4783 if (!tcg_resolve_relocs(s)) {
4787 #ifndef CONFIG_TCG_INTERPRETER
4788 /* flush instruction cache */
4789 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4790 (uintptr_t)s->code_buf,
4791 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4794 return tcg_current_code_size(s);
4797 #ifdef CONFIG_PROFILER
4798 void tcg_dump_info(void)
4800 TCGProfile prof = {};
4801 const TCGProfile *s;
4803 int64_t tb_div_count;
4806 tcg_profile_snapshot_counters(&prof);
4808 tb_count = s->tb_count;
4809 tb_div_count = tb_count ? tb_count : 1;
4810 tot = s->interm_time + s->code_time;
4812 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4814 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4816 tb_count, s->tb_count1 - tb_count,
4817 (double)(s->tb_count1 - s->tb_count)
4818 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4819 qemu_printf("avg ops/TB %0.1f max=%d\n",
4820 (double)s->op_count / tb_div_count, s->op_count_max);
4821 qemu_printf("deleted ops/TB %0.2f\n",
4822 (double)s->del_op_count / tb_div_count);
4823 qemu_printf("avg temps/TB %0.2f max=%d\n",
4824 (double)s->temp_count / tb_div_count, s->temp_count_max);
4825 qemu_printf("avg host code/TB %0.1f\n",
4826 (double)s->code_out_len / tb_div_count);
4827 qemu_printf("avg search data/TB %0.1f\n",
4828 (double)s->search_out_len / tb_div_count);
4830 qemu_printf("cycles/op %0.1f\n",
4831 s->op_count ? (double)tot / s->op_count : 0);
4832 qemu_printf("cycles/in byte %0.1f\n",
4833 s->code_in_len ? (double)tot / s->code_in_len : 0);
4834 qemu_printf("cycles/out byte %0.1f\n",
4835 s->code_out_len ? (double)tot / s->code_out_len : 0);
4836 qemu_printf("cycles/search byte %0.1f\n",
4837 s->search_out_len ? (double)tot / s->search_out_len : 0);
4841 qemu_printf(" gen_interm time %0.1f%%\n",
4842 (double)s->interm_time / tot * 100.0);
4843 qemu_printf(" gen_code time %0.1f%%\n",
4844 (double)s->code_time / tot * 100.0);
4845 qemu_printf("optim./code time %0.1f%%\n",
4846 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4848 qemu_printf("liveness/code time %0.1f%%\n",
4849 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4850 qemu_printf("cpu_restore count %" PRId64 "\n",
4852 qemu_printf(" avg cycles %0.1f\n",
4853 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4856 void tcg_dump_info(void)
4858 qemu_printf("[TCG profiler not compiled]\n");
4862 #ifdef ELF_HOST_MACHINE
4863 /* In order to use this feature, the backend needs to do three things:
4865 (1) Define ELF_HOST_MACHINE to indicate both what value to
4866 put into the ELF image and to indicate support for the feature.
4868 (2) Define tcg_register_jit. This should create a buffer containing
4869 the contents of a .debug_frame section that describes the post-
4870 prologue unwind info for the tcg machine.
4872 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4875 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4882 struct jit_code_entry {
4883 struct jit_code_entry *next_entry;
4884 struct jit_code_entry *prev_entry;
4885 const void *symfile_addr;
4886 uint64_t symfile_size;
4889 struct jit_descriptor {
4891 uint32_t action_flag;
4892 struct jit_code_entry *relevant_entry;
4893 struct jit_code_entry *first_entry;
4896 void __jit_debug_register_code(void) __attribute__((noinline));
4897 void __jit_debug_register_code(void)
4902 /* Must statically initialize the version, because GDB may check
4903 the version before we can set it. */
4904 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4906 /* End GDB interface. */
4908 static int find_string(const char *strtab, const char *str)
4910 const char *p = strtab + 1;
4913 if (strcmp(p, str) == 0) {
4920 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4921 const void *debug_frame,
4922 size_t debug_frame_size)
4924 struct __attribute__((packed)) DebugInfo {
4931 uintptr_t cu_low_pc;
4932 uintptr_t cu_high_pc;
4935 uintptr_t fn_low_pc;
4936 uintptr_t fn_high_pc;
4945 struct DebugInfo di;
4950 struct ElfImage *img;
4952 static const struct ElfImage img_template = {
4954 .e_ident[EI_MAG0] = ELFMAG0,
4955 .e_ident[EI_MAG1] = ELFMAG1,
4956 .e_ident[EI_MAG2] = ELFMAG2,
4957 .e_ident[EI_MAG3] = ELFMAG3,
4958 .e_ident[EI_CLASS] = ELF_CLASS,
4959 .e_ident[EI_DATA] = ELF_DATA,
4960 .e_ident[EI_VERSION] = EV_CURRENT,
4962 .e_machine = ELF_HOST_MACHINE,
4963 .e_version = EV_CURRENT,
4964 .e_phoff = offsetof(struct ElfImage, phdr),
4965 .e_shoff = offsetof(struct ElfImage, shdr),
4966 .e_ehsize = sizeof(ElfW(Shdr)),
4967 .e_phentsize = sizeof(ElfW(Phdr)),
4969 .e_shentsize = sizeof(ElfW(Shdr)),
4970 .e_shnum = ARRAY_SIZE(img->shdr),
4971 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4972 #ifdef ELF_HOST_FLAGS
4973 .e_flags = ELF_HOST_FLAGS,
4976 .e_ident[EI_OSABI] = ELF_OSABI,
4984 [0] = { .sh_type = SHT_NULL },
4985 /* Trick: The contents of code_gen_buffer are not present in
4986 this fake ELF file; that got allocated elsewhere. Therefore
4987 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4988 will not look for contents. We can record any address. */
4990 .sh_type = SHT_NOBITS,
4991 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4993 [2] = { /* .debug_info */
4994 .sh_type = SHT_PROGBITS,
4995 .sh_offset = offsetof(struct ElfImage, di),
4996 .sh_size = sizeof(struct DebugInfo),
4998 [3] = { /* .debug_abbrev */
4999 .sh_type = SHT_PROGBITS,
5000 .sh_offset = offsetof(struct ElfImage, da),
5001 .sh_size = sizeof(img->da),
5003 [4] = { /* .debug_frame */
5004 .sh_type = SHT_PROGBITS,
5005 .sh_offset = sizeof(struct ElfImage),
5007 [5] = { /* .symtab */
5008 .sh_type = SHT_SYMTAB,
5009 .sh_offset = offsetof(struct ElfImage, sym),
5010 .sh_size = sizeof(img->sym),
5012 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5013 .sh_entsize = sizeof(ElfW(Sym)),
5015 [6] = { /* .strtab */
5016 .sh_type = SHT_STRTAB,
5017 .sh_offset = offsetof(struct ElfImage, str),
5018 .sh_size = sizeof(img->str),
5022 [1] = { /* code_gen_buffer */
5023 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5028 .len = sizeof(struct DebugInfo) - 4,
5030 .ptr_size = sizeof(void *),
5032 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
5034 .fn_name = "code_gen_buffer"
5037 1, /* abbrev number (the cu) */
5038 0x11, 1, /* DW_TAG_compile_unit, has children */
5039 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5040 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5041 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5042 0, 0, /* end of abbrev */
5043 2, /* abbrev number (the fn) */
5044 0x2e, 0, /* DW_TAG_subprogram, no children */
5045 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5046 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5047 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5048 0, 0, /* end of abbrev */
5049 0 /* no more abbrev */
5051 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5052 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5055 /* We only need a single jit entry; statically allocate it. */
5056 static struct jit_code_entry one_entry;
5058 uintptr_t buf = (uintptr_t)buf_ptr;
5059 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5060 DebugFrameHeader *dfh;
5062 img = g_malloc(img_size);
5063 *img = img_template;
5065 img->phdr.p_vaddr = buf;
5066 img->phdr.p_paddr = buf;
5067 img->phdr.p_memsz = buf_size;
5069 img->shdr[1].sh_name = find_string(img->str, ".text");
5070 img->shdr[1].sh_addr = buf;
5071 img->shdr[1].sh_size = buf_size;
5073 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5074 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5076 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5077 img->shdr[4].sh_size = debug_frame_size;
5079 img->shdr[5].sh_name = find_string(img->str, ".symtab");
5080 img->shdr[6].sh_name = find_string(img->str, ".strtab");
5082 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5083 img->sym[1].st_value = buf;
5084 img->sym[1].st_size = buf_size;
5086 img->di.cu_low_pc = buf;
5087 img->di.cu_high_pc = buf + buf_size;
5088 img->di.fn_low_pc = buf;
5089 img->di.fn_high_pc = buf + buf_size;
5091 dfh = (DebugFrameHeader *)(img + 1);
5092 memcpy(dfh, debug_frame, debug_frame_size);
5093 dfh->fde.func_start = buf;
5094 dfh->fde.func_len = buf_size;
5097 /* Enable this block to be able to debug the ELF image file creation.
5098 One can use readelf, objdump, or other inspection utilities. */
5100 FILE *f = fopen("/tmp/qemu.jit", "w+b");
5102 if (fwrite(img, img_size, 1, f) != img_size) {
5103 /* Avoid stupid unused return value warning for fwrite. */
5110 one_entry.symfile_addr = img;
5111 one_entry.symfile_size = img_size;
5113 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5114 __jit_debug_descriptor.relevant_entry = &one_entry;
5115 __jit_debug_descriptor.first_entry = &one_entry;
5116 __jit_debug_register_code();
5119 /* No support for the feature. Provide the entry point expected by exec.c,
5120 and implement the internal function we declared earlier. */
5122 static void tcg_register_jit_int(const void *buf, size_t size,
5123 const void *debug_frame,
5124 size_t debug_frame_size)
5128 void tcg_register_jit(const void *buf, size_t buf_size)
5131 #endif /* ELF_HOST_MACHINE */
5133 #if !TCG_TARGET_MAYBE_vec
5134 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5136 g_assert_not_reached();