2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
45 #include "exec/exec-all.h"
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
51 #include "tcg/tcg-op.h"
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS ELFCLASS32
56 # define ELF_CLASS ELFCLASS64
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA ELFDATA2MSB
61 # define ELF_DATA ELFDATA2LSB
67 /* Forward declarations for functions declared in tcg-target.c.inc and
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72 intptr_t value, intptr_t addend);
74 /* The CIE and FDE header definitions will be common to all hosts. */
76 uint32_t len __attribute__((aligned((sizeof(void *)))));
82 uint8_t return_column;
85 typedef struct QEMU_PACKED {
86 uint32_t len __attribute__((aligned((sizeof(void *)))));
90 } DebugFrameFDEHeader;
92 typedef struct QEMU_PACKED {
94 DebugFrameFDEHeader fde;
97 static void tcg_register_jit_int(const void *buf, size_t size,
98 const void *debug_frame,
99 size_t debug_frame_size)
100 __attribute__((unused));
102 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107 TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
109 const TCGArg args[TCG_MAX_OP_ARGS],
110 const int const_args[TCG_MAX_OP_ARGS]);
111 #if TCG_TARGET_MAYBE_vec
112 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
113 TCGReg dst, TCGReg src);
114 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
115 TCGReg dst, TCGReg base, intptr_t offset);
116 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, int64_t arg);
118 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
119 unsigned vecl, unsigned vece,
120 const TCGArg args[TCG_MAX_OP_ARGS],
121 const int const_args[TCG_MAX_OP_ARGS]);
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124 TCGReg dst, TCGReg src)
126 g_assert_not_reached();
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129 TCGReg dst, TCGReg base, intptr_t offset)
131 g_assert_not_reached();
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134 TCGReg dst, int64_t arg)
136 g_assert_not_reached();
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
139 unsigned vecl, unsigned vece,
140 const TCGArg args[TCG_MAX_OP_ARGS],
141 const int const_args[TCG_MAX_OP_ARGS])
143 g_assert_not_reached();
146 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
149 TCGReg base, intptr_t ofs);
150 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
151 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext *s);
156 #define TCG_HIGHWATER 1024
158 static TCGContext **tcg_ctxs;
159 static unsigned int n_tcg_ctxs;
160 TCGv_env cpu_env = 0;
161 const void *tcg_code_gen_epilogue;
162 uintptr_t tcg_splitwx_diff;
164 #ifndef CONFIG_TCG_INTERPRETER
165 tcg_prologue_fn *tcg_qemu_tb_exec;
168 struct tcg_region_tree {
171 /* padding to avoid false sharing is computed at run-time */
175 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
176 * dynamically allocate from as demand dictates. Given appropriate region
177 * sizing, this minimizes flushes even when some TCG threads generate a lot
178 * more code than others.
180 struct tcg_region_state {
183 /* fields set at init time */
188 size_t size; /* size of one region */
189 size_t stride; /* .size + guard size */
191 /* fields protected by the lock */
192 size_t current; /* current region index */
193 size_t agg_size_full; /* aggregate size of full regions */
196 static struct tcg_region_state region;
198 * This is an array of struct tcg_region_tree's, with padding.
199 * We use void * to simplify the computation of region_trees[i]; each
200 * struct is found every tree_size bytes.
202 static void *region_trees;
203 static size_t tree_size;
204 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
205 static TCGRegSet tcg_target_call_clobber_regs;
207 #if TCG_TARGET_INSN_UNIT_SIZE == 1
208 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
213 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
221 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
223 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
226 tcg_insn_unit *p = s->code_ptr;
227 memcpy(p, &v, sizeof(v));
228 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
232 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
235 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
238 memcpy(p, &v, sizeof(v));
243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
244 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
246 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
249 tcg_insn_unit *p = s->code_ptr;
250 memcpy(p, &v, sizeof(v));
251 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
255 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
258 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
261 memcpy(p, &v, sizeof(v));
266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
267 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
269 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
272 tcg_insn_unit *p = s->code_ptr;
273 memcpy(p, &v, sizeof(v));
274 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
278 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
281 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
284 memcpy(p, &v, sizeof(v));
289 /* label relocation processing */
291 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
292 TCGLabel *l, intptr_t addend)
294 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
299 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
302 static void tcg_out_label(TCGContext *s, TCGLabel *l)
304 tcg_debug_assert(!l->has_value);
306 l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
309 TCGLabel *gen_new_label(void)
311 TCGContext *s = tcg_ctx;
312 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
314 memset(l, 0, sizeof(TCGLabel));
315 l->id = s->nb_labels++;
316 QSIMPLEQ_INIT(&l->relocs);
318 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
323 static bool tcg_resolve_relocs(TCGContext *s)
327 QSIMPLEQ_FOREACH(l, &s->labels, next) {
329 uintptr_t value = l->u.value;
331 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
332 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
340 static void set_jmp_reset_offset(TCGContext *s, int which)
343 * We will check for overflow at the end of the opcode loop in
344 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
346 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
349 /* Signal overflow, starting over with fewer guest insns. */
350 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
352 siglongjmp(s->jmp_trans, -2);
355 #define C_PFX1(P, A) P##A
356 #define C_PFX2(P, A, B) P##A##_##B
357 #define C_PFX3(P, A, B, C) P##A##_##B##_##C
358 #define C_PFX4(P, A, B, C, D) P##A##_##B##_##C##_##D
359 #define C_PFX5(P, A, B, C, D, E) P##A##_##B##_##C##_##D##_##E
360 #define C_PFX6(P, A, B, C, D, E, F) P##A##_##B##_##C##_##D##_##E##_##F
362 /* Define an enumeration for the various combinations. */
364 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1),
365 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2),
366 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3),
367 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4),
369 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1),
370 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2),
371 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3),
372 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
374 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2),
376 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1),
377 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2),
378 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
379 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
382 #include "tcg-target-con-set.h"
383 } TCGConstraintSetIndex;
385 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
401 /* Put all of the constraint sets into an array, indexed by the enum. */
403 #define C_O0_I1(I1) { .args_ct_str = { #I1 } },
404 #define C_O0_I2(I1, I2) { .args_ct_str = { #I1, #I2 } },
405 #define C_O0_I3(I1, I2, I3) { .args_ct_str = { #I1, #I2, #I3 } },
406 #define C_O0_I4(I1, I2, I3, I4) { .args_ct_str = { #I1, #I2, #I3, #I4 } },
408 #define C_O1_I1(O1, I1) { .args_ct_str = { #O1, #I1 } },
409 #define C_O1_I2(O1, I1, I2) { .args_ct_str = { #O1, #I1, #I2 } },
410 #define C_O1_I3(O1, I1, I2, I3) { .args_ct_str = { #O1, #I1, #I2, #I3 } },
411 #define C_O1_I4(O1, I1, I2, I3, I4) { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
413 #define C_N1_I2(O1, I1, I2) { .args_ct_str = { "&" #O1, #I1, #I2 } },
415 #define C_O2_I1(O1, O2, I1) { .args_ct_str = { #O1, #O2, #I1 } },
416 #define C_O2_I2(O1, O2, I1, I2) { .args_ct_str = { #O1, #O2, #I1, #I2 } },
417 #define C_O2_I3(O1, O2, I1, I2, I3) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
418 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
420 static const TCGTargetOpDef constraint_sets[] = {
421 #include "tcg-target-con-set.h"
439 /* Expand the enumerator to be returned from tcg_target_op_def(). */
441 #define C_O0_I1(I1) C_PFX1(c_o0_i1_, I1)
442 #define C_O0_I2(I1, I2) C_PFX2(c_o0_i2_, I1, I2)
443 #define C_O0_I3(I1, I2, I3) C_PFX3(c_o0_i3_, I1, I2, I3)
444 #define C_O0_I4(I1, I2, I3, I4) C_PFX4(c_o0_i4_, I1, I2, I3, I4)
446 #define C_O1_I1(O1, I1) C_PFX2(c_o1_i1_, O1, I1)
447 #define C_O1_I2(O1, I1, I2) C_PFX3(c_o1_i2_, O1, I1, I2)
448 #define C_O1_I3(O1, I1, I2, I3) C_PFX4(c_o1_i3_, O1, I1, I2, I3)
449 #define C_O1_I4(O1, I1, I2, I3, I4) C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
451 #define C_N1_I2(O1, I1, I2) C_PFX3(c_n1_i2_, O1, I1, I2)
453 #define C_O2_I1(O1, O2, I1) C_PFX3(c_o2_i1_, O1, O2, I1)
454 #define C_O2_I2(O1, O2, I1, I2) C_PFX4(c_o2_i2_, O1, O2, I1, I2)
455 #define C_O2_I3(O1, O2, I1, I2, I3) C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
456 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
458 #include "tcg-target.c.inc"
460 /* compare a pointer @ptr and a tb_tc @s */
461 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
463 if (ptr >= s->ptr + s->size) {
465 } else if (ptr < s->ptr) {
471 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
473 const struct tb_tc *a = ap;
474 const struct tb_tc *b = bp;
477 * When both sizes are set, we know this isn't a lookup.
478 * This is the most likely case: every TB must be inserted; lookups
479 * are a lot less frequent.
481 if (likely(a->size && b->size)) {
482 if (a->ptr > b->ptr) {
484 } else if (a->ptr < b->ptr) {
487 /* a->ptr == b->ptr should happen only on deletions */
488 g_assert(a->size == b->size);
492 * All lookups have either .size field set to 0.
493 * From the glib sources we see that @ap is always the lookup key. However
494 * the docs provide no guarantee, so we just mark this case as likely.
496 if (likely(a->size == 0)) {
497 return ptr_cmp_tb_tc(a->ptr, b);
499 return ptr_cmp_tb_tc(b->ptr, a);
502 static void tcg_region_trees_init(void)
506 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
507 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
508 for (i = 0; i < region.n; i++) {
509 struct tcg_region_tree *rt = region_trees + i * tree_size;
511 qemu_mutex_init(&rt->lock);
512 rt->tree = g_tree_new(tb_tc_cmp);
516 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
521 * Like tcg_splitwx_to_rw, with no assert. The pc may come from
522 * a signal handler over which the caller has no control.
524 if (!in_code_gen_buffer(p)) {
525 p -= tcg_splitwx_diff;
526 if (!in_code_gen_buffer(p)) {
531 if (p < region.start_aligned) {
534 ptrdiff_t offset = p - region.start_aligned;
536 if (offset > region.stride * (region.n - 1)) {
537 region_idx = region.n - 1;
539 region_idx = offset / region.stride;
542 return region_trees + region_idx * tree_size;
545 void tcg_tb_insert(TranslationBlock *tb)
547 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
549 g_assert(rt != NULL);
550 qemu_mutex_lock(&rt->lock);
551 g_tree_insert(rt->tree, &tb->tc, tb);
552 qemu_mutex_unlock(&rt->lock);
555 void tcg_tb_remove(TranslationBlock *tb)
557 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
559 g_assert(rt != NULL);
560 qemu_mutex_lock(&rt->lock);
561 g_tree_remove(rt->tree, &tb->tc);
562 qemu_mutex_unlock(&rt->lock);
566 * Find the TB 'tb' such that
567 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
568 * Return NULL if not found.
570 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
572 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
573 TranslationBlock *tb;
574 struct tb_tc s = { .ptr = (void *)tc_ptr };
580 qemu_mutex_lock(&rt->lock);
581 tb = g_tree_lookup(rt->tree, &s);
582 qemu_mutex_unlock(&rt->lock);
586 static void tcg_region_tree_lock_all(void)
590 for (i = 0; i < region.n; i++) {
591 struct tcg_region_tree *rt = region_trees + i * tree_size;
593 qemu_mutex_lock(&rt->lock);
597 static void tcg_region_tree_unlock_all(void)
601 for (i = 0; i < region.n; i++) {
602 struct tcg_region_tree *rt = region_trees + i * tree_size;
604 qemu_mutex_unlock(&rt->lock);
608 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
612 tcg_region_tree_lock_all();
613 for (i = 0; i < region.n; i++) {
614 struct tcg_region_tree *rt = region_trees + i * tree_size;
616 g_tree_foreach(rt->tree, func, user_data);
618 tcg_region_tree_unlock_all();
621 size_t tcg_nb_tbs(void)
626 tcg_region_tree_lock_all();
627 for (i = 0; i < region.n; i++) {
628 struct tcg_region_tree *rt = region_trees + i * tree_size;
630 nb_tbs += g_tree_nnodes(rt->tree);
632 tcg_region_tree_unlock_all();
636 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
638 TranslationBlock *tb = v;
644 static void tcg_region_tree_reset_all(void)
648 tcg_region_tree_lock_all();
649 for (i = 0; i < region.n; i++) {
650 struct tcg_region_tree *rt = region_trees + i * tree_size;
652 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
653 /* Increment the refcount first so that destroy acts as a reset */
654 g_tree_ref(rt->tree);
655 g_tree_destroy(rt->tree);
657 tcg_region_tree_unlock_all();
660 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
664 start = region.start_aligned + curr_region * region.stride;
665 end = start + region.size;
667 if (curr_region == 0) {
668 start = region.start;
670 if (curr_region == region.n - 1) {
678 static void tcg_region_assign(TCGContext *s, size_t curr_region)
682 tcg_region_bounds(curr_region, &start, &end);
684 s->code_gen_buffer = start;
685 s->code_gen_ptr = start;
686 s->code_gen_buffer_size = end - start;
687 s->code_gen_highwater = end - TCG_HIGHWATER;
690 static bool tcg_region_alloc__locked(TCGContext *s)
692 if (region.current == region.n) {
695 tcg_region_assign(s, region.current);
701 * Request a new region once the one in use has filled up.
702 * Returns true on error.
704 static bool tcg_region_alloc(TCGContext *s)
707 /* read the region size now; alloc__locked will overwrite it on success */
708 size_t size_full = s->code_gen_buffer_size;
710 qemu_mutex_lock(®ion.lock);
711 err = tcg_region_alloc__locked(s);
713 region.agg_size_full += size_full - TCG_HIGHWATER;
715 qemu_mutex_unlock(®ion.lock);
720 * Perform a context's first region allocation.
721 * This function does _not_ increment region.agg_size_full.
723 static void tcg_region_initial_alloc__locked(TCGContext *s)
725 bool err = tcg_region_alloc__locked(s);
729 #ifndef CONFIG_USER_ONLY
730 static void tcg_region_initial_alloc(TCGContext *s)
732 qemu_mutex_lock(®ion.lock);
733 tcg_region_initial_alloc__locked(s);
734 qemu_mutex_unlock(®ion.lock);
738 /* Call from a safe-work context */
739 void tcg_region_reset_all(void)
741 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
744 qemu_mutex_lock(®ion.lock);
746 region.agg_size_full = 0;
748 for (i = 0; i < n_ctxs; i++) {
749 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
750 tcg_region_initial_alloc__locked(s);
752 qemu_mutex_unlock(®ion.lock);
754 tcg_region_tree_reset_all();
757 #ifdef CONFIG_USER_ONLY
758 static size_t tcg_n_regions(void)
764 * It is likely that some vCPUs will translate more code than others, so we
765 * first try to set more regions than max_cpus, with those regions being of
766 * reasonable size. If that's not possible we make do by evenly dividing
767 * the code_gen_buffer among the vCPUs.
769 static size_t tcg_n_regions(void)
773 /* Use a single region if all we have is one vCPU thread */
774 #if !defined(CONFIG_USER_ONLY)
775 MachineState *ms = MACHINE(qdev_get_machine());
776 unsigned int max_cpus = ms->smp.max_cpus;
778 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
782 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
783 for (i = 8; i > 0; i--) {
784 size_t regions_per_thread = i;
787 region_size = tcg_init_ctx.code_gen_buffer_size;
788 region_size /= max_cpus * regions_per_thread;
790 if (region_size >= 2 * 1024u * 1024) {
791 return max_cpus * regions_per_thread;
794 /* If we can't, then just allocate one region per vCPU thread */
800 * Initializes region partitioning.
802 * Called at init time from the parent thread (i.e. the one calling
803 * tcg_context_init), after the target's TCG globals have been set.
805 * Region partitioning works by splitting code_gen_buffer into separate regions,
806 * and then assigning regions to TCG threads so that the threads can translate
807 * code in parallel without synchronization.
809 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
810 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
811 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
812 * must have been parsed before calling this function, since it calls
813 * qemu_tcg_mttcg_enabled().
815 * In user-mode we use a single region. Having multiple regions in user-mode
816 * is not supported, because the number of vCPU threads (recall that each thread
817 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
818 * OS, and usually this number is huge (tens of thousands is not uncommon).
819 * Thus, given this large bound on the number of vCPU threads and the fact
820 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
821 * that the availability of at least one region per vCPU thread.
823 * However, this user-mode limitation is unlikely to be a significant problem
824 * in practice. Multi-threaded guests share most if not all of their translated
825 * code, which makes parallel code generation less appealing than in softmmu.
827 void tcg_region_init(void)
829 void *buf = tcg_init_ctx.code_gen_buffer;
831 size_t size = tcg_init_ctx.code_gen_buffer_size;
832 size_t page_size = qemu_real_host_page_size;
837 n_regions = tcg_n_regions();
839 /* The first region will be 'aligned - buf' bytes larger than the others */
840 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
841 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
843 * Make region_size a multiple of page_size, using aligned as the start.
844 * As a result of this we might end up with a few extra pages at the end of
845 * the buffer; we will assign those to the last region.
847 region_size = (size - (aligned - buf)) / n_regions;
848 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
850 /* A region must have at least 2 pages; one code, one guard */
851 g_assert(region_size >= 2 * page_size);
853 /* init the region struct */
854 qemu_mutex_init(®ion.lock);
855 region.n = n_regions;
856 region.size = region_size - page_size;
857 region.stride = region_size;
859 region.start_aligned = aligned;
860 /* page-align the end, since its last page will be a guard page */
861 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
862 /* account for that last guard page */
863 region.end -= page_size;
866 * Set guard pages in the rw buffer, as that's the one into which
867 * buffer overruns could occur. Do not set guard pages in the rx
868 * buffer -- let that one use hugepages throughout.
870 for (i = 0; i < region.n; i++) {
873 tcg_region_bounds(i, &start, &end);
876 * macOS 11.2 has a bug (Apple Feedback FB8994773) in which mprotect
877 * rejects a permission change from RWX -> NONE. Guard pages are
878 * nice for bug detection but are not essential; ignore any failure.
880 (void)qemu_mprotect_none(end, page_size);
883 tcg_region_trees_init();
886 * Leave the initial context initialized to the first region.
887 * This will be the context into which we generate the prologue.
888 * It is also the only context for CONFIG_USER_ONLY.
890 tcg_region_initial_alloc__locked(&tcg_init_ctx);
893 static void tcg_region_prologue_set(TCGContext *s)
895 /* Deduct the prologue from the first region. */
896 g_assert(region.start == s->code_gen_buffer);
897 region.start = s->code_ptr;
899 /* Recompute boundaries of the first region. */
900 tcg_region_assign(s, 0);
902 /* Register the balance of the buffer with gdb. */
903 tcg_register_jit(tcg_splitwx_to_rx(region.start),
904 region.end - region.start);
907 #ifdef CONFIG_DEBUG_TCG
908 const void *tcg_splitwx_to_rx(void *rw)
910 /* Pass NULL pointers unchanged. */
912 g_assert(in_code_gen_buffer(rw));
913 rw += tcg_splitwx_diff;
918 void *tcg_splitwx_to_rw(const void *rx)
920 /* Pass NULL pointers unchanged. */
922 rx -= tcg_splitwx_diff;
923 /* Assert that we end with a pointer in the rw region. */
924 g_assert(in_code_gen_buffer(rx));
928 #endif /* CONFIG_DEBUG_TCG */
930 static void alloc_tcg_plugin_context(TCGContext *s)
933 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
934 s->plugin_tb->insns =
935 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
940 * All TCG threads except the parent (i.e. the one that called tcg_context_init
941 * and registered the target's TCG globals) must register with this function
942 * before initiating translation.
944 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
945 * of tcg_region_init() for the reasoning behind this.
947 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
948 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
949 * is not used anymore for translation once this function is called.
951 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
952 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
954 #ifdef CONFIG_USER_ONLY
955 void tcg_register_thread(void)
957 tcg_ctx = &tcg_init_ctx;
960 void tcg_register_thread(void)
962 MachineState *ms = MACHINE(qdev_get_machine());
963 TCGContext *s = g_malloc(sizeof(*s));
968 /* Relink mem_base. */
969 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
970 if (tcg_init_ctx.temps[i].mem_base) {
971 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
972 tcg_debug_assert(b >= 0 && b < n);
973 s->temps[i].mem_base = &s->temps[b];
977 /* Claim an entry in tcg_ctxs */
978 n = qatomic_fetch_inc(&n_tcg_ctxs);
979 g_assert(n < ms->smp.max_cpus);
980 qatomic_set(&tcg_ctxs[n], s);
983 alloc_tcg_plugin_context(s);
984 tcg_region_initial_alloc(s);
989 #endif /* !CONFIG_USER_ONLY */
992 * Returns the size (in bytes) of all translated code (i.e. from all regions)
993 * currently in the cache.
994 * See also: tcg_code_capacity()
995 * Do not confuse with tcg_current_code_size(); that one applies to a single
998 size_t tcg_code_size(void)
1000 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1004 qemu_mutex_lock(®ion.lock);
1005 total = region.agg_size_full;
1006 for (i = 0; i < n_ctxs; i++) {
1007 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1010 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
1011 g_assert(size <= s->code_gen_buffer_size);
1014 qemu_mutex_unlock(®ion.lock);
1019 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1021 * See also: tcg_code_size()
1023 size_t tcg_code_capacity(void)
1025 size_t guard_size, capacity;
1027 /* no need for synchronization; these variables are set at init time */
1028 guard_size = region.stride - region.size;
1029 capacity = region.end + guard_size - region.start;
1030 capacity -= region.n * (guard_size + TCG_HIGHWATER);
1034 size_t tcg_tb_phys_invalidate_count(void)
1036 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1040 for (i = 0; i < n_ctxs; i++) {
1041 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1043 total += qatomic_read(&s->tb_phys_invalidate_count);
1048 /* pool based memory allocation */
1049 void *tcg_malloc_internal(TCGContext *s, int size)
1054 if (size > TCG_POOL_CHUNK_SIZE) {
1055 /* big malloc: insert a new pool (XXX: could optimize) */
1056 p = g_malloc(sizeof(TCGPool) + size);
1058 p->next = s->pool_first_large;
1059 s->pool_first_large = p;
1062 p = s->pool_current;
1070 pool_size = TCG_POOL_CHUNK_SIZE;
1071 p = g_malloc(sizeof(TCGPool) + pool_size);
1072 p->size = pool_size;
1074 if (s->pool_current)
1075 s->pool_current->next = p;
1083 s->pool_current = p;
1084 s->pool_cur = p->data + size;
1085 s->pool_end = p->data + p->size;
1089 void tcg_pool_reset(TCGContext *s)
1092 for (p = s->pool_first_large; p; p = t) {
1096 s->pool_first_large = NULL;
1097 s->pool_cur = s->pool_end = NULL;
1098 s->pool_current = NULL;
1101 typedef struct TCGHelperInfo {
1108 #include "exec/helper-proto.h"
1110 static const TCGHelperInfo all_helpers[] = {
1111 #include "exec/helper-tcg.h"
1113 static GHashTable *helper_table;
1115 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1116 static void process_op_defs(TCGContext *s);
1117 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1118 TCGReg reg, const char *name);
1120 void tcg_context_init(TCGContext *s)
1122 int op, total_args, n, i;
1124 TCGArgConstraint *args_ct;
1127 memset(s, 0, sizeof(*s));
1130 /* Count total number of arguments and allocate the corresponding
1133 for(op = 0; op < NB_OPS; op++) {
1134 def = &tcg_op_defs[op];
1135 n = def->nb_iargs + def->nb_oargs;
1139 args_ct = g_new0(TCGArgConstraint, total_args);
1141 for(op = 0; op < NB_OPS; op++) {
1142 def = &tcg_op_defs[op];
1143 def->args_ct = args_ct;
1144 n = def->nb_iargs + def->nb_oargs;
1148 /* Register helpers. */
1149 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1150 helper_table = g_hash_table_new(NULL, NULL);
1152 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1153 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1154 (gpointer)&all_helpers[i]);
1160 /* Reverse the order of the saved registers, assuming they're all at
1161 the start of tcg_target_reg_alloc_order. */
1162 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1163 int r = tcg_target_reg_alloc_order[n];
1164 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1168 for (i = 0; i < n; ++i) {
1169 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1171 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1172 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1175 alloc_tcg_plugin_context(s);
1179 * In user-mode we simply share the init context among threads, since we
1180 * use a single region. See the documentation tcg_region_init() for the
1181 * reasoning behind this.
1182 * In softmmu we will have at most max_cpus TCG threads.
1184 #ifdef CONFIG_USER_ONLY
1185 tcg_ctxs = &tcg_ctx;
1188 MachineState *ms = MACHINE(qdev_get_machine());
1189 unsigned int max_cpus = ms->smp.max_cpus;
1190 tcg_ctxs = g_new(TCGContext *, max_cpus);
1193 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1194 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1195 cpu_env = temp_tcgv_ptr(ts);
1199 * Allocate TBs right before their corresponding translated code, making
1200 * sure that TBs and code are on different cache lines.
1202 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1204 uintptr_t align = qemu_icache_linesize;
1205 TranslationBlock *tb;
1209 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1210 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1212 if (unlikely(next > s->code_gen_highwater)) {
1213 if (tcg_region_alloc(s)) {
1218 qatomic_set(&s->code_gen_ptr, next);
1219 s->data_gen_ptr = NULL;
1223 void tcg_prologue_init(TCGContext *s)
1225 size_t prologue_size;
1227 s->code_ptr = s->code_gen_ptr;
1228 s->code_buf = s->code_gen_ptr;
1229 s->data_gen_ptr = NULL;
1231 #ifndef CONFIG_TCG_INTERPRETER
1232 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1235 #ifdef TCG_TARGET_NEED_POOL_LABELS
1236 s->pool_labels = NULL;
1239 qemu_thread_jit_write();
1240 /* Generate the prologue. */
1241 tcg_target_qemu_prologue(s);
1243 #ifdef TCG_TARGET_NEED_POOL_LABELS
1244 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1246 int result = tcg_out_pool_finalize(s);
1247 tcg_debug_assert(result == 0);
1251 prologue_size = tcg_current_code_size(s);
1253 #ifndef CONFIG_TCG_INTERPRETER
1254 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1255 (uintptr_t)s->code_buf, prologue_size);
1258 tcg_region_prologue_set(s);
1261 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1262 FILE *logfile = qemu_log_lock();
1263 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1264 if (s->data_gen_ptr) {
1265 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1266 size_t data_size = prologue_size - code_size;
1269 log_disas(s->code_gen_ptr, code_size);
1271 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1272 if (sizeof(tcg_target_ulong) == 8) {
1273 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1274 (uintptr_t)s->data_gen_ptr + i,
1275 *(uint64_t *)(s->data_gen_ptr + i));
1277 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1278 (uintptr_t)s->data_gen_ptr + i,
1279 *(uint32_t *)(s->data_gen_ptr + i));
1283 log_disas(s->code_gen_ptr, prologue_size);
1287 qemu_log_unlock(logfile);
1291 /* Assert that goto_ptr is implemented completely. */
1292 if (TCG_TARGET_HAS_goto_ptr) {
1293 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1297 void tcg_func_start(TCGContext *s)
1300 s->nb_temps = s->nb_globals;
1302 /* No temps have been previously allocated for size or locality. */
1303 memset(s->free_temps, 0, sizeof(s->free_temps));
1305 /* No constant temps have been previously allocated. */
1306 for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1307 if (s->const_table[i]) {
1308 g_hash_table_remove_all(s->const_table[i]);
1314 s->current_frame_offset = s->frame_start;
1316 #ifdef CONFIG_DEBUG_TCG
1317 s->goto_tb_issue_mask = 0;
1320 QTAILQ_INIT(&s->ops);
1321 QTAILQ_INIT(&s->free_ops);
1322 QSIMPLEQ_INIT(&s->labels);
1325 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1327 int n = s->nb_temps++;
1329 if (n >= TCG_MAX_TEMPS) {
1330 tcg_raise_tb_overflow(s);
1332 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1335 static TCGTemp *tcg_global_alloc(TCGContext *s)
1339 tcg_debug_assert(s->nb_globals == s->nb_temps);
1340 tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1342 ts = tcg_temp_alloc(s);
1343 ts->kind = TEMP_GLOBAL;
1348 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1349 TCGReg reg, const char *name)
1353 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1357 ts = tcg_global_alloc(s);
1358 ts->base_type = type;
1360 ts->kind = TEMP_FIXED;
1363 tcg_regset_set_reg(s->reserved_regs, reg);
1368 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1370 s->frame_start = start;
1371 s->frame_end = start + size;
1373 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1376 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1377 intptr_t offset, const char *name)
1379 TCGContext *s = tcg_ctx;
1380 TCGTemp *base_ts = tcgv_ptr_temp(base);
1381 TCGTemp *ts = tcg_global_alloc(s);
1382 int indirect_reg = 0, bigendian = 0;
1383 #ifdef HOST_WORDS_BIGENDIAN
1387 switch (base_ts->kind) {
1391 /* We do not support double-indirect registers. */
1392 tcg_debug_assert(!base_ts->indirect_reg);
1393 base_ts->indirect_base = 1;
1394 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1399 g_assert_not_reached();
1402 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1403 TCGTemp *ts2 = tcg_global_alloc(s);
1406 ts->base_type = TCG_TYPE_I64;
1407 ts->type = TCG_TYPE_I32;
1408 ts->indirect_reg = indirect_reg;
1409 ts->mem_allocated = 1;
1410 ts->mem_base = base_ts;
1411 ts->mem_offset = offset + bigendian * 4;
1412 pstrcpy(buf, sizeof(buf), name);
1413 pstrcat(buf, sizeof(buf), "_0");
1414 ts->name = strdup(buf);
1416 tcg_debug_assert(ts2 == ts + 1);
1417 ts2->base_type = TCG_TYPE_I64;
1418 ts2->type = TCG_TYPE_I32;
1419 ts2->indirect_reg = indirect_reg;
1420 ts2->mem_allocated = 1;
1421 ts2->mem_base = base_ts;
1422 ts2->mem_offset = offset + (1 - bigendian) * 4;
1423 pstrcpy(buf, sizeof(buf), name);
1424 pstrcat(buf, sizeof(buf), "_1");
1425 ts2->name = strdup(buf);
1427 ts->base_type = type;
1429 ts->indirect_reg = indirect_reg;
1430 ts->mem_allocated = 1;
1431 ts->mem_base = base_ts;
1432 ts->mem_offset = offset;
1438 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1440 TCGContext *s = tcg_ctx;
1441 TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1445 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1446 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1447 if (idx < TCG_MAX_TEMPS) {
1448 /* There is already an available temp with the right type. */
1449 clear_bit(idx, s->free_temps[k].l);
1451 ts = &s->temps[idx];
1452 ts->temp_allocated = 1;
1453 tcg_debug_assert(ts->base_type == type);
1454 tcg_debug_assert(ts->kind == kind);
1456 ts = tcg_temp_alloc(s);
1457 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1458 TCGTemp *ts2 = tcg_temp_alloc(s);
1460 ts->base_type = type;
1461 ts->type = TCG_TYPE_I32;
1462 ts->temp_allocated = 1;
1465 tcg_debug_assert(ts2 == ts + 1);
1466 ts2->base_type = TCG_TYPE_I64;
1467 ts2->type = TCG_TYPE_I32;
1468 ts2->temp_allocated = 1;
1471 ts->base_type = type;
1473 ts->temp_allocated = 1;
1478 #if defined(CONFIG_DEBUG_TCG)
1484 TCGv_vec tcg_temp_new_vec(TCGType type)
1488 #ifdef CONFIG_DEBUG_TCG
1491 assert(TCG_TARGET_HAS_v64);
1494 assert(TCG_TARGET_HAS_v128);
1497 assert(TCG_TARGET_HAS_v256);
1500 g_assert_not_reached();
1504 t = tcg_temp_new_internal(type, 0);
1505 return temp_tcgv_vec(t);
1508 /* Create a new temp of the same type as an existing temp. */
1509 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1511 TCGTemp *t = tcgv_vec_temp(match);
1513 tcg_debug_assert(t->temp_allocated != 0);
1515 t = tcg_temp_new_internal(t->base_type, 0);
1516 return temp_tcgv_vec(t);
1519 void tcg_temp_free_internal(TCGTemp *ts)
1521 TCGContext *s = tcg_ctx;
1524 /* In order to simplify users of tcg_constant_*, silently ignore free. */
1525 if (ts->kind == TEMP_CONST) {
1529 #if defined(CONFIG_DEBUG_TCG)
1531 if (s->temps_in_use < 0) {
1532 fprintf(stderr, "More temporaries freed than allocated!\n");
1536 tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1537 tcg_debug_assert(ts->temp_allocated != 0);
1538 ts->temp_allocated = 0;
1541 k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1542 set_bit(idx, s->free_temps[k].l);
1545 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1547 TCGContext *s = tcg_ctx;
1548 GHashTable *h = s->const_table[type];
1552 h = g_hash_table_new(g_int64_hash, g_int64_equal);
1553 s->const_table[type] = h;
1556 ts = g_hash_table_lookup(h, &val);
1558 ts = tcg_temp_alloc(s);
1560 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1561 TCGTemp *ts2 = tcg_temp_alloc(s);
1563 ts->base_type = TCG_TYPE_I64;
1564 ts->type = TCG_TYPE_I32;
1565 ts->kind = TEMP_CONST;
1566 ts->temp_allocated = 1;
1568 * Retain the full value of the 64-bit constant in the low
1569 * part, so that the hash table works. Actual uses will
1570 * truncate the value to the low part.
1574 tcg_debug_assert(ts2 == ts + 1);
1575 ts2->base_type = TCG_TYPE_I64;
1576 ts2->type = TCG_TYPE_I32;
1577 ts2->kind = TEMP_CONST;
1578 ts2->temp_allocated = 1;
1579 ts2->val = val >> 32;
1581 ts->base_type = type;
1583 ts->kind = TEMP_CONST;
1584 ts->temp_allocated = 1;
1587 g_hash_table_insert(h, &ts->val, ts);
1593 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1595 val = dup_const(vece, val);
1596 return temp_tcgv_vec(tcg_constant_internal(type, val));
1599 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1601 TCGTemp *t = tcgv_vec_temp(match);
1603 tcg_debug_assert(t->temp_allocated != 0);
1604 return tcg_constant_vec(t->base_type, vece, val);
1607 TCGv_i32 tcg_const_i32(int32_t val)
1610 t0 = tcg_temp_new_i32();
1611 tcg_gen_movi_i32(t0, val);
1615 TCGv_i64 tcg_const_i64(int64_t val)
1618 t0 = tcg_temp_new_i64();
1619 tcg_gen_movi_i64(t0, val);
1623 TCGv_i32 tcg_const_local_i32(int32_t val)
1626 t0 = tcg_temp_local_new_i32();
1627 tcg_gen_movi_i32(t0, val);
1631 TCGv_i64 tcg_const_local_i64(int64_t val)
1634 t0 = tcg_temp_local_new_i64();
1635 tcg_gen_movi_i64(t0, val);
1639 #if defined(CONFIG_DEBUG_TCG)
1640 void tcg_clear_temp_count(void)
1642 TCGContext *s = tcg_ctx;
1643 s->temps_in_use = 0;
1646 int tcg_check_temp_count(void)
1648 TCGContext *s = tcg_ctx;
1649 if (s->temps_in_use) {
1650 /* Clear the count so that we don't give another
1651 * warning immediately next time around.
1653 s->temps_in_use = 0;
1660 /* Return true if OP may appear in the opcode stream.
1661 Test the runtime variable that controls each opcode. */
1662 bool tcg_op_supported(TCGOpcode op)
1665 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1668 case INDEX_op_discard:
1669 case INDEX_op_set_label:
1673 case INDEX_op_insn_start:
1674 case INDEX_op_exit_tb:
1675 case INDEX_op_goto_tb:
1676 case INDEX_op_qemu_ld_i32:
1677 case INDEX_op_qemu_st_i32:
1678 case INDEX_op_qemu_ld_i64:
1679 case INDEX_op_qemu_st_i64:
1682 case INDEX_op_qemu_st8_i32:
1683 return TCG_TARGET_HAS_qemu_st8_i32;
1685 case INDEX_op_goto_ptr:
1686 return TCG_TARGET_HAS_goto_ptr;
1688 case INDEX_op_mov_i32:
1689 case INDEX_op_setcond_i32:
1690 case INDEX_op_brcond_i32:
1691 case INDEX_op_ld8u_i32:
1692 case INDEX_op_ld8s_i32:
1693 case INDEX_op_ld16u_i32:
1694 case INDEX_op_ld16s_i32:
1695 case INDEX_op_ld_i32:
1696 case INDEX_op_st8_i32:
1697 case INDEX_op_st16_i32:
1698 case INDEX_op_st_i32:
1699 case INDEX_op_add_i32:
1700 case INDEX_op_sub_i32:
1701 case INDEX_op_mul_i32:
1702 case INDEX_op_and_i32:
1703 case INDEX_op_or_i32:
1704 case INDEX_op_xor_i32:
1705 case INDEX_op_shl_i32:
1706 case INDEX_op_shr_i32:
1707 case INDEX_op_sar_i32:
1710 case INDEX_op_movcond_i32:
1711 return TCG_TARGET_HAS_movcond_i32;
1712 case INDEX_op_div_i32:
1713 case INDEX_op_divu_i32:
1714 return TCG_TARGET_HAS_div_i32;
1715 case INDEX_op_rem_i32:
1716 case INDEX_op_remu_i32:
1717 return TCG_TARGET_HAS_rem_i32;
1718 case INDEX_op_div2_i32:
1719 case INDEX_op_divu2_i32:
1720 return TCG_TARGET_HAS_div2_i32;
1721 case INDEX_op_rotl_i32:
1722 case INDEX_op_rotr_i32:
1723 return TCG_TARGET_HAS_rot_i32;
1724 case INDEX_op_deposit_i32:
1725 return TCG_TARGET_HAS_deposit_i32;
1726 case INDEX_op_extract_i32:
1727 return TCG_TARGET_HAS_extract_i32;
1728 case INDEX_op_sextract_i32:
1729 return TCG_TARGET_HAS_sextract_i32;
1730 case INDEX_op_extract2_i32:
1731 return TCG_TARGET_HAS_extract2_i32;
1732 case INDEX_op_add2_i32:
1733 return TCG_TARGET_HAS_add2_i32;
1734 case INDEX_op_sub2_i32:
1735 return TCG_TARGET_HAS_sub2_i32;
1736 case INDEX_op_mulu2_i32:
1737 return TCG_TARGET_HAS_mulu2_i32;
1738 case INDEX_op_muls2_i32:
1739 return TCG_TARGET_HAS_muls2_i32;
1740 case INDEX_op_muluh_i32:
1741 return TCG_TARGET_HAS_muluh_i32;
1742 case INDEX_op_mulsh_i32:
1743 return TCG_TARGET_HAS_mulsh_i32;
1744 case INDEX_op_ext8s_i32:
1745 return TCG_TARGET_HAS_ext8s_i32;
1746 case INDEX_op_ext16s_i32:
1747 return TCG_TARGET_HAS_ext16s_i32;
1748 case INDEX_op_ext8u_i32:
1749 return TCG_TARGET_HAS_ext8u_i32;
1750 case INDEX_op_ext16u_i32:
1751 return TCG_TARGET_HAS_ext16u_i32;
1752 case INDEX_op_bswap16_i32:
1753 return TCG_TARGET_HAS_bswap16_i32;
1754 case INDEX_op_bswap32_i32:
1755 return TCG_TARGET_HAS_bswap32_i32;
1756 case INDEX_op_not_i32:
1757 return TCG_TARGET_HAS_not_i32;
1758 case INDEX_op_neg_i32:
1759 return TCG_TARGET_HAS_neg_i32;
1760 case INDEX_op_andc_i32:
1761 return TCG_TARGET_HAS_andc_i32;
1762 case INDEX_op_orc_i32:
1763 return TCG_TARGET_HAS_orc_i32;
1764 case INDEX_op_eqv_i32:
1765 return TCG_TARGET_HAS_eqv_i32;
1766 case INDEX_op_nand_i32:
1767 return TCG_TARGET_HAS_nand_i32;
1768 case INDEX_op_nor_i32:
1769 return TCG_TARGET_HAS_nor_i32;
1770 case INDEX_op_clz_i32:
1771 return TCG_TARGET_HAS_clz_i32;
1772 case INDEX_op_ctz_i32:
1773 return TCG_TARGET_HAS_ctz_i32;
1774 case INDEX_op_ctpop_i32:
1775 return TCG_TARGET_HAS_ctpop_i32;
1777 case INDEX_op_brcond2_i32:
1778 case INDEX_op_setcond2_i32:
1779 return TCG_TARGET_REG_BITS == 32;
1781 case INDEX_op_mov_i64:
1782 case INDEX_op_setcond_i64:
1783 case INDEX_op_brcond_i64:
1784 case INDEX_op_ld8u_i64:
1785 case INDEX_op_ld8s_i64:
1786 case INDEX_op_ld16u_i64:
1787 case INDEX_op_ld16s_i64:
1788 case INDEX_op_ld32u_i64:
1789 case INDEX_op_ld32s_i64:
1790 case INDEX_op_ld_i64:
1791 case INDEX_op_st8_i64:
1792 case INDEX_op_st16_i64:
1793 case INDEX_op_st32_i64:
1794 case INDEX_op_st_i64:
1795 case INDEX_op_add_i64:
1796 case INDEX_op_sub_i64:
1797 case INDEX_op_mul_i64:
1798 case INDEX_op_and_i64:
1799 case INDEX_op_or_i64:
1800 case INDEX_op_xor_i64:
1801 case INDEX_op_shl_i64:
1802 case INDEX_op_shr_i64:
1803 case INDEX_op_sar_i64:
1804 case INDEX_op_ext_i32_i64:
1805 case INDEX_op_extu_i32_i64:
1806 return TCG_TARGET_REG_BITS == 64;
1808 case INDEX_op_movcond_i64:
1809 return TCG_TARGET_HAS_movcond_i64;
1810 case INDEX_op_div_i64:
1811 case INDEX_op_divu_i64:
1812 return TCG_TARGET_HAS_div_i64;
1813 case INDEX_op_rem_i64:
1814 case INDEX_op_remu_i64:
1815 return TCG_TARGET_HAS_rem_i64;
1816 case INDEX_op_div2_i64:
1817 case INDEX_op_divu2_i64:
1818 return TCG_TARGET_HAS_div2_i64;
1819 case INDEX_op_rotl_i64:
1820 case INDEX_op_rotr_i64:
1821 return TCG_TARGET_HAS_rot_i64;
1822 case INDEX_op_deposit_i64:
1823 return TCG_TARGET_HAS_deposit_i64;
1824 case INDEX_op_extract_i64:
1825 return TCG_TARGET_HAS_extract_i64;
1826 case INDEX_op_sextract_i64:
1827 return TCG_TARGET_HAS_sextract_i64;
1828 case INDEX_op_extract2_i64:
1829 return TCG_TARGET_HAS_extract2_i64;
1830 case INDEX_op_extrl_i64_i32:
1831 return TCG_TARGET_HAS_extrl_i64_i32;
1832 case INDEX_op_extrh_i64_i32:
1833 return TCG_TARGET_HAS_extrh_i64_i32;
1834 case INDEX_op_ext8s_i64:
1835 return TCG_TARGET_HAS_ext8s_i64;
1836 case INDEX_op_ext16s_i64:
1837 return TCG_TARGET_HAS_ext16s_i64;
1838 case INDEX_op_ext32s_i64:
1839 return TCG_TARGET_HAS_ext32s_i64;
1840 case INDEX_op_ext8u_i64:
1841 return TCG_TARGET_HAS_ext8u_i64;
1842 case INDEX_op_ext16u_i64:
1843 return TCG_TARGET_HAS_ext16u_i64;
1844 case INDEX_op_ext32u_i64:
1845 return TCG_TARGET_HAS_ext32u_i64;
1846 case INDEX_op_bswap16_i64:
1847 return TCG_TARGET_HAS_bswap16_i64;
1848 case INDEX_op_bswap32_i64:
1849 return TCG_TARGET_HAS_bswap32_i64;
1850 case INDEX_op_bswap64_i64:
1851 return TCG_TARGET_HAS_bswap64_i64;
1852 case INDEX_op_not_i64:
1853 return TCG_TARGET_HAS_not_i64;
1854 case INDEX_op_neg_i64:
1855 return TCG_TARGET_HAS_neg_i64;
1856 case INDEX_op_andc_i64:
1857 return TCG_TARGET_HAS_andc_i64;
1858 case INDEX_op_orc_i64:
1859 return TCG_TARGET_HAS_orc_i64;
1860 case INDEX_op_eqv_i64:
1861 return TCG_TARGET_HAS_eqv_i64;
1862 case INDEX_op_nand_i64:
1863 return TCG_TARGET_HAS_nand_i64;
1864 case INDEX_op_nor_i64:
1865 return TCG_TARGET_HAS_nor_i64;
1866 case INDEX_op_clz_i64:
1867 return TCG_TARGET_HAS_clz_i64;
1868 case INDEX_op_ctz_i64:
1869 return TCG_TARGET_HAS_ctz_i64;
1870 case INDEX_op_ctpop_i64:
1871 return TCG_TARGET_HAS_ctpop_i64;
1872 case INDEX_op_add2_i64:
1873 return TCG_TARGET_HAS_add2_i64;
1874 case INDEX_op_sub2_i64:
1875 return TCG_TARGET_HAS_sub2_i64;
1876 case INDEX_op_mulu2_i64:
1877 return TCG_TARGET_HAS_mulu2_i64;
1878 case INDEX_op_muls2_i64:
1879 return TCG_TARGET_HAS_muls2_i64;
1880 case INDEX_op_muluh_i64:
1881 return TCG_TARGET_HAS_muluh_i64;
1882 case INDEX_op_mulsh_i64:
1883 return TCG_TARGET_HAS_mulsh_i64;
1885 case INDEX_op_mov_vec:
1886 case INDEX_op_dup_vec:
1887 case INDEX_op_dupm_vec:
1888 case INDEX_op_ld_vec:
1889 case INDEX_op_st_vec:
1890 case INDEX_op_add_vec:
1891 case INDEX_op_sub_vec:
1892 case INDEX_op_and_vec:
1893 case INDEX_op_or_vec:
1894 case INDEX_op_xor_vec:
1895 case INDEX_op_cmp_vec:
1897 case INDEX_op_dup2_vec:
1898 return have_vec && TCG_TARGET_REG_BITS == 32;
1899 case INDEX_op_not_vec:
1900 return have_vec && TCG_TARGET_HAS_not_vec;
1901 case INDEX_op_neg_vec:
1902 return have_vec && TCG_TARGET_HAS_neg_vec;
1903 case INDEX_op_abs_vec:
1904 return have_vec && TCG_TARGET_HAS_abs_vec;
1905 case INDEX_op_andc_vec:
1906 return have_vec && TCG_TARGET_HAS_andc_vec;
1907 case INDEX_op_orc_vec:
1908 return have_vec && TCG_TARGET_HAS_orc_vec;
1909 case INDEX_op_mul_vec:
1910 return have_vec && TCG_TARGET_HAS_mul_vec;
1911 case INDEX_op_shli_vec:
1912 case INDEX_op_shri_vec:
1913 case INDEX_op_sari_vec:
1914 return have_vec && TCG_TARGET_HAS_shi_vec;
1915 case INDEX_op_shls_vec:
1916 case INDEX_op_shrs_vec:
1917 case INDEX_op_sars_vec:
1918 return have_vec && TCG_TARGET_HAS_shs_vec;
1919 case INDEX_op_shlv_vec:
1920 case INDEX_op_shrv_vec:
1921 case INDEX_op_sarv_vec:
1922 return have_vec && TCG_TARGET_HAS_shv_vec;
1923 case INDEX_op_rotli_vec:
1924 return have_vec && TCG_TARGET_HAS_roti_vec;
1925 case INDEX_op_rotls_vec:
1926 return have_vec && TCG_TARGET_HAS_rots_vec;
1927 case INDEX_op_rotlv_vec:
1928 case INDEX_op_rotrv_vec:
1929 return have_vec && TCG_TARGET_HAS_rotv_vec;
1930 case INDEX_op_ssadd_vec:
1931 case INDEX_op_usadd_vec:
1932 case INDEX_op_sssub_vec:
1933 case INDEX_op_ussub_vec:
1934 return have_vec && TCG_TARGET_HAS_sat_vec;
1935 case INDEX_op_smin_vec:
1936 case INDEX_op_umin_vec:
1937 case INDEX_op_smax_vec:
1938 case INDEX_op_umax_vec:
1939 return have_vec && TCG_TARGET_HAS_minmax_vec;
1940 case INDEX_op_bitsel_vec:
1941 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1942 case INDEX_op_cmpsel_vec:
1943 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1946 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1951 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1952 and endian swap. Maybe it would be better to do the alignment
1953 and endian swap in tcg_reg_alloc_call(). */
1954 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1956 int i, real_args, nb_rets, pi;
1957 unsigned sizemask, flags;
1958 TCGHelperInfo *info;
1961 info = g_hash_table_lookup(helper_table, (gpointer)func);
1962 flags = info->flags;
1963 sizemask = info->sizemask;
1965 #ifdef CONFIG_PLUGIN
1966 /* detect non-plugin helpers */
1967 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1968 tcg_ctx->plugin_insn->calls_helpers = true;
1972 #if defined(__sparc__) && !defined(__arch64__) \
1973 && !defined(CONFIG_TCG_INTERPRETER)
1974 /* We have 64-bit values in one register, but need to pass as two
1975 separate parameters. Split them. */
1976 int orig_sizemask = sizemask;
1977 int orig_nargs = nargs;
1978 TCGv_i64 retl, reth;
1979 TCGTemp *split_args[MAX_OPC_PARAM];
1983 if (sizemask != 0) {
1984 for (i = real_args = 0; i < nargs; ++i) {
1985 int is_64bit = sizemask & (1 << (i+1)*2);
1987 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1988 TCGv_i32 h = tcg_temp_new_i32();
1989 TCGv_i32 l = tcg_temp_new_i32();
1990 tcg_gen_extr_i64_i32(l, h, orig);
1991 split_args[real_args++] = tcgv_i32_temp(h);
1992 split_args[real_args++] = tcgv_i32_temp(l);
1994 split_args[real_args++] = args[i];
2001 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2002 for (i = 0; i < nargs; ++i) {
2003 int is_64bit = sizemask & (1 << (i+1)*2);
2004 int is_signed = sizemask & (2 << (i+1)*2);
2006 TCGv_i64 temp = tcg_temp_new_i64();
2007 TCGv_i64 orig = temp_tcgv_i64(args[i]);
2009 tcg_gen_ext32s_i64(temp, orig);
2011 tcg_gen_ext32u_i64(temp, orig);
2013 args[i] = tcgv_i64_temp(temp);
2016 #endif /* TCG_TARGET_EXTEND_ARGS */
2018 op = tcg_emit_op(INDEX_op_call);
2022 #if defined(__sparc__) && !defined(__arch64__) \
2023 && !defined(CONFIG_TCG_INTERPRETER)
2024 if (orig_sizemask & 1) {
2025 /* The 32-bit ABI is going to return the 64-bit value in
2026 the %o0/%o1 register pair. Prepare for this by using
2027 two return temporaries, and reassemble below. */
2028 retl = tcg_temp_new_i64();
2029 reth = tcg_temp_new_i64();
2030 op->args[pi++] = tcgv_i64_arg(reth);
2031 op->args[pi++] = tcgv_i64_arg(retl);
2034 op->args[pi++] = temp_arg(ret);
2038 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2039 #ifdef HOST_WORDS_BIGENDIAN
2040 op->args[pi++] = temp_arg(ret + 1);
2041 op->args[pi++] = temp_arg(ret);
2043 op->args[pi++] = temp_arg(ret);
2044 op->args[pi++] = temp_arg(ret + 1);
2048 op->args[pi++] = temp_arg(ret);
2055 TCGOP_CALLO(op) = nb_rets;
2058 for (i = 0; i < nargs; i++) {
2059 int is_64bit = sizemask & (1 << (i+1)*2);
2060 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2061 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2062 /* some targets want aligned 64 bit args */
2063 if (real_args & 1) {
2064 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2068 /* If stack grows up, then we will be placing successive
2069 arguments at lower addresses, which means we need to
2070 reverse the order compared to how we would normally
2071 treat either big or little-endian. For those arguments
2072 that will wind up in registers, this still works for
2073 HPPA (the only current STACK_GROWSUP target) since the
2074 argument registers are *also* allocated in decreasing
2075 order. If another such target is added, this logic may
2076 have to get more complicated to differentiate between
2077 stack arguments and register arguments. */
2078 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2079 op->args[pi++] = temp_arg(args[i] + 1);
2080 op->args[pi++] = temp_arg(args[i]);
2082 op->args[pi++] = temp_arg(args[i]);
2083 op->args[pi++] = temp_arg(args[i] + 1);
2089 op->args[pi++] = temp_arg(args[i]);
2092 op->args[pi++] = (uintptr_t)func;
2093 op->args[pi++] = flags;
2094 TCGOP_CALLI(op) = real_args;
2096 /* Make sure the fields didn't overflow. */
2097 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2098 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2100 #if defined(__sparc__) && !defined(__arch64__) \
2101 && !defined(CONFIG_TCG_INTERPRETER)
2102 /* Free all of the parts we allocated above. */
2103 for (i = real_args = 0; i < orig_nargs; ++i) {
2104 int is_64bit = orig_sizemask & (1 << (i+1)*2);
2106 tcg_temp_free_internal(args[real_args++]);
2107 tcg_temp_free_internal(args[real_args++]);
2112 if (orig_sizemask & 1) {
2113 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
2114 Note that describing these as TCGv_i64 eliminates an unnecessary
2115 zero-extension that tcg_gen_concat_i32_i64 would create. */
2116 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2117 tcg_temp_free_i64(retl);
2118 tcg_temp_free_i64(reth);
2120 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2121 for (i = 0; i < nargs; ++i) {
2122 int is_64bit = sizemask & (1 << (i+1)*2);
2124 tcg_temp_free_internal(args[i]);
2127 #endif /* TCG_TARGET_EXTEND_ARGS */
2130 static void tcg_reg_alloc_start(TCGContext *s)
2134 for (i = 0, n = s->nb_temps; i < n; i++) {
2135 TCGTemp *ts = &s->temps[i];
2136 TCGTempVal val = TEMP_VAL_MEM;
2140 val = TEMP_VAL_CONST;
2148 val = TEMP_VAL_DEAD;
2151 ts->mem_allocated = 0;
2154 g_assert_not_reached();
2159 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2162 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2165 int idx = temp_idx(ts);
2170 pstrcpy(buf, buf_size, ts->name);
2173 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2176 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2181 snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2183 #if TCG_TARGET_REG_BITS > 32
2185 snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2191 snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2192 64 << (ts->type - TCG_TYPE_V64), ts->val);
2195 g_assert_not_reached();
2202 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2203 int buf_size, TCGArg arg)
2205 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2208 /* Find helper name. */
2209 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2211 const char *ret = NULL;
2213 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2221 static const char * const cond_name[] =
2223 [TCG_COND_NEVER] = "never",
2224 [TCG_COND_ALWAYS] = "always",
2225 [TCG_COND_EQ] = "eq",
2226 [TCG_COND_NE] = "ne",
2227 [TCG_COND_LT] = "lt",
2228 [TCG_COND_GE] = "ge",
2229 [TCG_COND_LE] = "le",
2230 [TCG_COND_GT] = "gt",
2231 [TCG_COND_LTU] = "ltu",
2232 [TCG_COND_GEU] = "geu",
2233 [TCG_COND_LEU] = "leu",
2234 [TCG_COND_GTU] = "gtu"
2237 static const char * const ldst_name[] =
2253 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2254 #ifdef TARGET_ALIGNED_ONLY
2255 [MO_UNALN >> MO_ASHIFT] = "un+",
2256 [MO_ALIGN >> MO_ASHIFT] = "",
2258 [MO_UNALN >> MO_ASHIFT] = "",
2259 [MO_ALIGN >> MO_ASHIFT] = "al+",
2261 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2262 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2263 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2264 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2265 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2266 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2269 static inline bool tcg_regset_single(TCGRegSet d)
2271 return (d & (d - 1)) == 0;
2274 static inline TCGReg tcg_regset_first(TCGRegSet d)
2276 if (TCG_TARGET_NB_REGS <= 32) {
2283 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2288 QTAILQ_FOREACH(op, &s->ops, link) {
2289 int i, k, nb_oargs, nb_iargs, nb_cargs;
2290 const TCGOpDef *def;
2295 def = &tcg_op_defs[c];
2297 if (c == INDEX_op_insn_start) {
2299 col += qemu_log("\n ----");
2301 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2303 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2304 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2308 col += qemu_log(" " TARGET_FMT_lx, a);
2310 } else if (c == INDEX_op_call) {
2311 /* variable number of arguments */
2312 nb_oargs = TCGOP_CALLO(op);
2313 nb_iargs = TCGOP_CALLI(op);
2314 nb_cargs = def->nb_cargs;
2316 /* function name, flags, out args */
2317 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2318 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2319 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2320 for (i = 0; i < nb_oargs; i++) {
2321 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2324 for (i = 0; i < nb_iargs; i++) {
2325 TCGArg arg = op->args[nb_oargs + i];
2326 const char *t = "<dummy>";
2327 if (arg != TCG_CALL_DUMMY_ARG) {
2328 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2330 col += qemu_log(",%s", t);
2333 col += qemu_log(" %s ", def->name);
2335 nb_oargs = def->nb_oargs;
2336 nb_iargs = def->nb_iargs;
2337 nb_cargs = def->nb_cargs;
2339 if (def->flags & TCG_OPF_VECTOR) {
2340 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2341 8 << TCGOP_VECE(op));
2345 for (i = 0; i < nb_oargs; i++) {
2347 col += qemu_log(",");
2349 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2352 for (i = 0; i < nb_iargs; i++) {
2354 col += qemu_log(",");
2356 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2360 case INDEX_op_brcond_i32:
2361 case INDEX_op_setcond_i32:
2362 case INDEX_op_movcond_i32:
2363 case INDEX_op_brcond2_i32:
2364 case INDEX_op_setcond2_i32:
2365 case INDEX_op_brcond_i64:
2366 case INDEX_op_setcond_i64:
2367 case INDEX_op_movcond_i64:
2368 case INDEX_op_cmp_vec:
2369 case INDEX_op_cmpsel_vec:
2370 if (op->args[k] < ARRAY_SIZE(cond_name)
2371 && cond_name[op->args[k]]) {
2372 col += qemu_log(",%s", cond_name[op->args[k++]]);
2374 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2378 case INDEX_op_qemu_ld_i32:
2379 case INDEX_op_qemu_st_i32:
2380 case INDEX_op_qemu_st8_i32:
2381 case INDEX_op_qemu_ld_i64:
2382 case INDEX_op_qemu_st_i64:
2384 TCGMemOpIdx oi = op->args[k++];
2385 MemOp op = get_memop(oi);
2386 unsigned ix = get_mmuidx(oi);
2388 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2389 col += qemu_log(",$0x%x,%u", op, ix);
2391 const char *s_al, *s_op;
2392 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2393 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2394 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2404 case INDEX_op_set_label:
2406 case INDEX_op_brcond_i32:
2407 case INDEX_op_brcond_i64:
2408 case INDEX_op_brcond2_i32:
2409 col += qemu_log("%s$L%d", k ? "," : "",
2410 arg_label(op->args[k])->id);
2416 for (; i < nb_cargs; i++, k++) {
2417 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2421 if (have_prefs || op->life) {
2423 QemuLogFile *logfile;
2426 logfile = qatomic_rcu_read(&qemu_logfile);
2428 for (; col < 40; ++col) {
2429 putc(' ', logfile->fd);
2436 unsigned life = op->life;
2438 if (life & (SYNC_ARG * 3)) {
2440 for (i = 0; i < 2; ++i) {
2441 if (life & (SYNC_ARG << i)) {
2449 for (i = 0; life; ++i, life >>= 1) {
2458 for (i = 0; i < nb_oargs; ++i) {
2459 TCGRegSet set = op->output_pref[i];
2468 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2470 #ifdef CONFIG_DEBUG_TCG
2471 } else if (tcg_regset_single(set)) {
2472 TCGReg reg = tcg_regset_first(set);
2473 qemu_log("%s", tcg_target_reg_names[reg]);
2475 } else if (TCG_TARGET_NB_REGS <= 32) {
2476 qemu_log("%#x", (uint32_t)set);
2478 qemu_log("%#" PRIx64, (uint64_t)set);
2487 /* we give more priority to constraints with less registers */
2488 static int get_constraint_priority(const TCGOpDef *def, int k)
2490 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2493 if (arg_ct->oalias) {
2494 /* an alias is equivalent to a single register */
2497 n = ctpop64(arg_ct->regs);
2499 return TCG_TARGET_NB_REGS - n + 1;
2502 /* sort from highest priority to lowest */
2503 static void sort_constraints(TCGOpDef *def, int start, int n)
2506 TCGArgConstraint *a = def->args_ct;
2508 for (i = 0; i < n; i++) {
2509 a[start + i].sort_index = start + i;
2514 for (i = 0; i < n - 1; i++) {
2515 for (j = i + 1; j < n; j++) {
2516 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2517 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2519 int tmp = a[start + i].sort_index;
2520 a[start + i].sort_index = a[start + j].sort_index;
2521 a[start + j].sort_index = tmp;
2527 static void process_op_defs(TCGContext *s)
2531 for (op = 0; op < NB_OPS; op++) {
2532 TCGOpDef *def = &tcg_op_defs[op];
2533 const TCGTargetOpDef *tdefs;
2536 if (def->flags & TCG_OPF_NOT_PRESENT) {
2540 nb_args = def->nb_iargs + def->nb_oargs;
2546 * Macro magic should make it impossible, but double-check that
2547 * the array index is in range. Since the signness of an enum
2548 * is implementation defined, force the result to unsigned.
2550 unsigned con_set = tcg_target_op_def(op);
2551 tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2552 tdefs = &constraint_sets[con_set];
2554 for (i = 0; i < nb_args; i++) {
2555 const char *ct_str = tdefs->args_ct_str[i];
2556 /* Incomplete TCGTargetOpDef entry. */
2557 tcg_debug_assert(ct_str != NULL);
2559 while (*ct_str != '\0') {
2563 int oarg = *ct_str - '0';
2564 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2565 tcg_debug_assert(oarg < def->nb_oargs);
2566 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2567 def->args_ct[i] = def->args_ct[oarg];
2568 /* The output sets oalias. */
2569 def->args_ct[oarg].oalias = true;
2570 def->args_ct[oarg].alias_index = i;
2571 /* The input sets ialias. */
2572 def->args_ct[i].ialias = true;
2573 def->args_ct[i].alias_index = oarg;
2578 def->args_ct[i].newreg = true;
2582 def->args_ct[i].ct |= TCG_CT_CONST;
2586 /* Include all of the target-specific constraints. */
2589 #define CONST(CASE, MASK) \
2590 case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2591 #define REGS(CASE, MASK) \
2592 case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2594 #include "tcg-target-con-str.h"
2599 /* Typo in TCGTargetOpDef constraint. */
2600 g_assert_not_reached();
2605 /* TCGTargetOpDef entry with too much information? */
2606 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2608 /* sort the constraints (XXX: this is just an heuristic) */
2609 sort_constraints(def, 0, def->nb_oargs);
2610 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2614 void tcg_op_remove(TCGContext *s, TCGOp *op)
2620 label = arg_label(op->args[0]);
2623 case INDEX_op_brcond_i32:
2624 case INDEX_op_brcond_i64:
2625 label = arg_label(op->args[3]);
2628 case INDEX_op_brcond2_i32:
2629 label = arg_label(op->args[5]);
2636 QTAILQ_REMOVE(&s->ops, op, link);
2637 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2640 #ifdef CONFIG_PROFILER
2641 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2645 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2647 TCGContext *s = tcg_ctx;
2650 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2651 op = tcg_malloc(sizeof(TCGOp));
2653 op = QTAILQ_FIRST(&s->free_ops);
2654 QTAILQ_REMOVE(&s->free_ops, op, link);
2656 memset(op, 0, offsetof(TCGOp, link));
2663 TCGOp *tcg_emit_op(TCGOpcode opc)
2665 TCGOp *op = tcg_op_alloc(opc);
2666 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2670 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2672 TCGOp *new_op = tcg_op_alloc(opc);
2673 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2677 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2679 TCGOp *new_op = tcg_op_alloc(opc);
2680 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2684 /* Reachable analysis : remove unreachable code. */
2685 static void reachable_code_pass(TCGContext *s)
2687 TCGOp *op, *op_next;
2690 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2696 case INDEX_op_set_label:
2697 label = arg_label(op->args[0]);
2698 if (label->refs == 0) {
2700 * While there is an occasional backward branch, virtually
2701 * all branches generated by the translators are forward.
2702 * Which means that generally we will have already removed
2703 * all references to the label that will be, and there is
2704 * little to be gained by iterating.
2708 /* Once we see a label, insns become live again. */
2713 * Optimization can fold conditional branches to unconditional.
2714 * If we find a label with one reference which is preceded by
2715 * an unconditional branch to it, remove both. This needed to
2716 * wait until the dead code in between them was removed.
2718 if (label->refs == 1) {
2719 TCGOp *op_prev = QTAILQ_PREV(op, link);
2720 if (op_prev->opc == INDEX_op_br &&
2721 label == arg_label(op_prev->args[0])) {
2722 tcg_op_remove(s, op_prev);
2730 case INDEX_op_exit_tb:
2731 case INDEX_op_goto_ptr:
2732 /* Unconditional branches; everything following is dead. */
2737 /* Notice noreturn helper calls, raising exceptions. */
2738 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2739 if (call_flags & TCG_CALL_NO_RETURN) {
2744 case INDEX_op_insn_start:
2745 /* Never remove -- we need to keep these for unwind. */
2754 tcg_op_remove(s, op);
2762 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2763 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2765 /* For liveness_pass_1, the register preferences for a given temp. */
2766 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2768 return ts->state_ptr;
2771 /* For liveness_pass_1, reset the preferences for a given temp to the
2772 * maximal regset for its type.
2774 static inline void la_reset_pref(TCGTemp *ts)
2777 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2780 /* liveness analysis: end of function: all temps are dead, and globals
2781 should be in memory. */
2782 static void la_func_end(TCGContext *s, int ng, int nt)
2786 for (i = 0; i < ng; ++i) {
2787 s->temps[i].state = TS_DEAD | TS_MEM;
2788 la_reset_pref(&s->temps[i]);
2790 for (i = ng; i < nt; ++i) {
2791 s->temps[i].state = TS_DEAD;
2792 la_reset_pref(&s->temps[i]);
2796 /* liveness analysis: end of basic block: all temps are dead, globals
2797 and local temps should be in memory. */
2798 static void la_bb_end(TCGContext *s, int ng, int nt)
2802 for (i = 0; i < nt; ++i) {
2803 TCGTemp *ts = &s->temps[i];
2810 state = TS_DEAD | TS_MEM;
2817 g_assert_not_reached();
2824 /* liveness analysis: sync globals back to memory. */
2825 static void la_global_sync(TCGContext *s, int ng)
2829 for (i = 0; i < ng; ++i) {
2830 int state = s->temps[i].state;
2831 s->temps[i].state = state | TS_MEM;
2832 if (state == TS_DEAD) {
2833 /* If the global was previously dead, reset prefs. */
2834 la_reset_pref(&s->temps[i]);
2840 * liveness analysis: conditional branch: all temps are dead,
2841 * globals and local temps should be synced.
2843 static void la_bb_sync(TCGContext *s, int ng, int nt)
2845 la_global_sync(s, ng);
2847 for (int i = ng; i < nt; ++i) {
2848 TCGTemp *ts = &s->temps[i];
2854 ts->state = state | TS_MEM;
2855 if (state != TS_DEAD) {
2860 s->temps[i].state = TS_DEAD;
2865 g_assert_not_reached();
2867 la_reset_pref(&s->temps[i]);
2871 /* liveness analysis: sync globals back to memory and kill. */
2872 static void la_global_kill(TCGContext *s, int ng)
2876 for (i = 0; i < ng; i++) {
2877 s->temps[i].state = TS_DEAD | TS_MEM;
2878 la_reset_pref(&s->temps[i]);
2882 /* liveness analysis: note live globals crossing calls. */
2883 static void la_cross_call(TCGContext *s, int nt)
2885 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2888 for (i = 0; i < nt; i++) {
2889 TCGTemp *ts = &s->temps[i];
2890 if (!(ts->state & TS_DEAD)) {
2891 TCGRegSet *pset = la_temp_pref(ts);
2892 TCGRegSet set = *pset;
2895 /* If the combination is not possible, restart. */
2897 set = tcg_target_available_regs[ts->type] & mask;
2904 /* Liveness analysis : update the opc_arg_life array to tell if a
2905 given input arguments is dead. Instructions updating dead
2906 temporaries are removed. */
2907 static void liveness_pass_1(TCGContext *s)
2909 int nb_globals = s->nb_globals;
2910 int nb_temps = s->nb_temps;
2911 TCGOp *op, *op_prev;
2915 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2916 for (i = 0; i < nb_temps; ++i) {
2917 s->temps[i].state_ptr = prefs + i;
2920 /* ??? Should be redundant with the exit_tb that ends the TB. */
2921 la_func_end(s, nb_globals, nb_temps);
2923 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2924 int nb_iargs, nb_oargs;
2925 TCGOpcode opc_new, opc_new2;
2927 TCGLifeData arg_life = 0;
2929 TCGOpcode opc = op->opc;
2930 const TCGOpDef *def = &tcg_op_defs[opc];
2938 nb_oargs = TCGOP_CALLO(op);
2939 nb_iargs = TCGOP_CALLI(op);
2940 call_flags = op->args[nb_oargs + nb_iargs + 1];
2942 /* pure functions can be removed if their result is unused */
2943 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2944 for (i = 0; i < nb_oargs; i++) {
2945 ts = arg_temp(op->args[i]);
2946 if (ts->state != TS_DEAD) {
2947 goto do_not_remove_call;
2954 /* Output args are dead. */
2955 for (i = 0; i < nb_oargs; i++) {
2956 ts = arg_temp(op->args[i]);
2957 if (ts->state & TS_DEAD) {
2958 arg_life |= DEAD_ARG << i;
2960 if (ts->state & TS_MEM) {
2961 arg_life |= SYNC_ARG << i;
2963 ts->state = TS_DEAD;
2966 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2967 op->output_pref[i] = 0;
2970 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2971 TCG_CALL_NO_READ_GLOBALS))) {
2972 la_global_kill(s, nb_globals);
2973 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2974 la_global_sync(s, nb_globals);
2977 /* Record arguments that die in this helper. */
2978 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2979 ts = arg_temp(op->args[i]);
2980 if (ts && ts->state & TS_DEAD) {
2981 arg_life |= DEAD_ARG << i;
2985 /* For all live registers, remove call-clobbered prefs. */
2986 la_cross_call(s, nb_temps);
2988 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2990 /* Input arguments are live for preceding opcodes. */
2991 for (i = 0; i < nb_iargs; i++) {
2992 ts = arg_temp(op->args[i + nb_oargs]);
2993 if (ts && ts->state & TS_DEAD) {
2994 /* For those arguments that die, and will be allocated
2995 * in registers, clear the register set for that arg,
2996 * to be filled in below. For args that will be on
2997 * the stack, reset to any available reg.
3000 = (i < nb_call_regs ? 0 :
3001 tcg_target_available_regs[ts->type]);
3002 ts->state &= ~TS_DEAD;
3006 /* For each input argument, add its input register to prefs.
3007 If a temp is used once, this produces a single set bit. */
3008 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3009 ts = arg_temp(op->args[i + nb_oargs]);
3011 tcg_regset_set_reg(*la_temp_pref(ts),
3012 tcg_target_call_iarg_regs[i]);
3017 case INDEX_op_insn_start:
3019 case INDEX_op_discard:
3020 /* mark the temporary as dead */
3021 ts = arg_temp(op->args[0]);
3022 ts->state = TS_DEAD;
3026 case INDEX_op_add2_i32:
3027 opc_new = INDEX_op_add_i32;
3029 case INDEX_op_sub2_i32:
3030 opc_new = INDEX_op_sub_i32;
3032 case INDEX_op_add2_i64:
3033 opc_new = INDEX_op_add_i64;
3035 case INDEX_op_sub2_i64:
3036 opc_new = INDEX_op_sub_i64;
3040 /* Test if the high part of the operation is dead, but not
3041 the low part. The result can be optimized to a simple
3042 add or sub. This happens often for x86_64 guest when the
3043 cpu mode is set to 32 bit. */
3044 if (arg_temp(op->args[1])->state == TS_DEAD) {
3045 if (arg_temp(op->args[0])->state == TS_DEAD) {
3048 /* Replace the opcode and adjust the args in place,
3049 leaving 3 unused args at the end. */
3050 op->opc = opc = opc_new;
3051 op->args[1] = op->args[2];
3052 op->args[2] = op->args[4];
3053 /* Fall through and mark the single-word operation live. */
3059 case INDEX_op_mulu2_i32:
3060 opc_new = INDEX_op_mul_i32;
3061 opc_new2 = INDEX_op_muluh_i32;
3062 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3064 case INDEX_op_muls2_i32:
3065 opc_new = INDEX_op_mul_i32;
3066 opc_new2 = INDEX_op_mulsh_i32;
3067 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3069 case INDEX_op_mulu2_i64:
3070 opc_new = INDEX_op_mul_i64;
3071 opc_new2 = INDEX_op_muluh_i64;
3072 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3074 case INDEX_op_muls2_i64:
3075 opc_new = INDEX_op_mul_i64;
3076 opc_new2 = INDEX_op_mulsh_i64;
3077 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3082 if (arg_temp(op->args[1])->state == TS_DEAD) {
3083 if (arg_temp(op->args[0])->state == TS_DEAD) {
3084 /* Both parts of the operation are dead. */
3087 /* The high part of the operation is dead; generate the low. */
3088 op->opc = opc = opc_new;
3089 op->args[1] = op->args[2];
3090 op->args[2] = op->args[3];
3091 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3092 /* The low part of the operation is dead; generate the high. */
3093 op->opc = opc = opc_new2;
3094 op->args[0] = op->args[1];
3095 op->args[1] = op->args[2];
3096 op->args[2] = op->args[3];
3100 /* Mark the single-word operation live. */
3105 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3106 nb_iargs = def->nb_iargs;
3107 nb_oargs = def->nb_oargs;
3109 /* Test if the operation can be removed because all
3110 its outputs are dead. We assume that nb_oargs == 0
3111 implies side effects */
3112 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3113 for (i = 0; i < nb_oargs; i++) {
3114 if (arg_temp(op->args[i])->state != TS_DEAD) {
3123 tcg_op_remove(s, op);
3127 for (i = 0; i < nb_oargs; i++) {
3128 ts = arg_temp(op->args[i]);
3130 /* Remember the preference of the uses that followed. */
3131 op->output_pref[i] = *la_temp_pref(ts);
3133 /* Output args are dead. */
3134 if (ts->state & TS_DEAD) {
3135 arg_life |= DEAD_ARG << i;
3137 if (ts->state & TS_MEM) {
3138 arg_life |= SYNC_ARG << i;
3140 ts->state = TS_DEAD;
3144 /* If end of basic block, update. */
3145 if (def->flags & TCG_OPF_BB_EXIT) {
3146 la_func_end(s, nb_globals, nb_temps);
3147 } else if (def->flags & TCG_OPF_COND_BRANCH) {
3148 la_bb_sync(s, nb_globals, nb_temps);
3149 } else if (def->flags & TCG_OPF_BB_END) {
3150 la_bb_end(s, nb_globals, nb_temps);
3151 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3152 la_global_sync(s, nb_globals);
3153 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3154 la_cross_call(s, nb_temps);
3158 /* Record arguments that die in this opcode. */
3159 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3160 ts = arg_temp(op->args[i]);
3161 if (ts->state & TS_DEAD) {
3162 arg_life |= DEAD_ARG << i;
3166 /* Input arguments are live for preceding opcodes. */
3167 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3168 ts = arg_temp(op->args[i]);
3169 if (ts->state & TS_DEAD) {
3170 /* For operands that were dead, initially allow
3171 all regs for the type. */
3172 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3173 ts->state &= ~TS_DEAD;
3177 /* Incorporate constraints for this operand. */
3179 case INDEX_op_mov_i32:
3180 case INDEX_op_mov_i64:
3181 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3182 have proper constraints. That said, special case
3183 moves to propagate preferences backward. */
3184 if (IS_DEAD_ARG(1)) {
3185 *la_temp_pref(arg_temp(op->args[0]))
3186 = *la_temp_pref(arg_temp(op->args[1]));
3191 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3192 const TCGArgConstraint *ct = &def->args_ct[i];
3193 TCGRegSet set, *pset;
3195 ts = arg_temp(op->args[i]);
3196 pset = la_temp_pref(ts);
3201 set &= op->output_pref[ct->alias_index];
3203 /* If the combination is not possible, restart. */
3213 op->life = arg_life;
3217 /* Liveness analysis: Convert indirect regs to direct temporaries. */
3218 static bool liveness_pass_2(TCGContext *s)
3220 int nb_globals = s->nb_globals;
3222 bool changes = false;
3223 TCGOp *op, *op_next;
3225 /* Create a temporary for each indirect global. */
3226 for (i = 0; i < nb_globals; ++i) {
3227 TCGTemp *its = &s->temps[i];
3228 if (its->indirect_reg) {
3229 TCGTemp *dts = tcg_temp_alloc(s);
3230 dts->type = its->type;
3231 dts->base_type = its->base_type;
3232 its->state_ptr = dts;
3234 its->state_ptr = NULL;
3236 /* All globals begin dead. */
3237 its->state = TS_DEAD;
3239 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3240 TCGTemp *its = &s->temps[i];
3241 its->state_ptr = NULL;
3242 its->state = TS_DEAD;
3245 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3246 TCGOpcode opc = op->opc;
3247 const TCGOpDef *def = &tcg_op_defs[opc];
3248 TCGLifeData arg_life = op->life;
3249 int nb_iargs, nb_oargs, call_flags;
3250 TCGTemp *arg_ts, *dir_ts;
3252 if (opc == INDEX_op_call) {
3253 nb_oargs = TCGOP_CALLO(op);
3254 nb_iargs = TCGOP_CALLI(op);
3255 call_flags = op->args[nb_oargs + nb_iargs + 1];
3257 nb_iargs = def->nb_iargs;
3258 nb_oargs = def->nb_oargs;
3260 /* Set flags similar to how calls require. */
3261 if (def->flags & TCG_OPF_COND_BRANCH) {
3262 /* Like reading globals: sync_globals */
3263 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3264 } else if (def->flags & TCG_OPF_BB_END) {
3265 /* Like writing globals: save_globals */
3267 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3268 /* Like reading globals: sync_globals */
3269 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3271 /* No effect on globals. */
3272 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3273 TCG_CALL_NO_WRITE_GLOBALS);
3277 /* Make sure that input arguments are available. */
3278 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3279 arg_ts = arg_temp(op->args[i]);
3281 dir_ts = arg_ts->state_ptr;
3282 if (dir_ts && arg_ts->state == TS_DEAD) {
3283 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3286 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3288 lop->args[0] = temp_arg(dir_ts);
3289 lop->args[1] = temp_arg(arg_ts->mem_base);
3290 lop->args[2] = arg_ts->mem_offset;
3292 /* Loaded, but synced with memory. */
3293 arg_ts->state = TS_MEM;
3298 /* Perform input replacement, and mark inputs that became dead.
3299 No action is required except keeping temp_state up to date
3300 so that we reload when needed. */
3301 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3302 arg_ts = arg_temp(op->args[i]);
3304 dir_ts = arg_ts->state_ptr;
3306 op->args[i] = temp_arg(dir_ts);
3308 if (IS_DEAD_ARG(i)) {
3309 arg_ts->state = TS_DEAD;
3315 /* Liveness analysis should ensure that the following are
3316 all correct, for call sites and basic block end points. */
3317 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3319 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3320 for (i = 0; i < nb_globals; ++i) {
3321 /* Liveness should see that globals are synced back,
3322 that is, either TS_DEAD or TS_MEM. */
3323 arg_ts = &s->temps[i];
3324 tcg_debug_assert(arg_ts->state_ptr == 0
3325 || arg_ts->state != 0);
3328 for (i = 0; i < nb_globals; ++i) {
3329 /* Liveness should see that globals are saved back,
3330 that is, TS_DEAD, waiting to be reloaded. */
3331 arg_ts = &s->temps[i];
3332 tcg_debug_assert(arg_ts->state_ptr == 0
3333 || arg_ts->state == TS_DEAD);
3337 /* Outputs become available. */
3338 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3339 arg_ts = arg_temp(op->args[0]);
3340 dir_ts = arg_ts->state_ptr;
3342 op->args[0] = temp_arg(dir_ts);
3345 /* The output is now live and modified. */
3348 if (NEED_SYNC_ARG(0)) {
3349 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3352 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3353 TCGTemp *out_ts = dir_ts;
3355 if (IS_DEAD_ARG(0)) {
3356 out_ts = arg_temp(op->args[1]);
3357 arg_ts->state = TS_DEAD;
3358 tcg_op_remove(s, op);
3360 arg_ts->state = TS_MEM;
3363 sop->args[0] = temp_arg(out_ts);
3364 sop->args[1] = temp_arg(arg_ts->mem_base);
3365 sop->args[2] = arg_ts->mem_offset;
3367 tcg_debug_assert(!IS_DEAD_ARG(0));
3371 for (i = 0; i < nb_oargs; i++) {
3372 arg_ts = arg_temp(op->args[i]);
3373 dir_ts = arg_ts->state_ptr;
3377 op->args[i] = temp_arg(dir_ts);
3380 /* The output is now live and modified. */
3383 /* Sync outputs upon their last write. */
3384 if (NEED_SYNC_ARG(i)) {
3385 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3388 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3390 sop->args[0] = temp_arg(dir_ts);
3391 sop->args[1] = temp_arg(arg_ts->mem_base);
3392 sop->args[2] = arg_ts->mem_offset;
3394 arg_ts->state = TS_MEM;
3396 /* Drop outputs that are dead. */
3397 if (IS_DEAD_ARG(i)) {
3398 arg_ts->state = TS_DEAD;
3407 #ifdef CONFIG_DEBUG_TCG
3408 static void dump_regs(TCGContext *s)
3414 for(i = 0; i < s->nb_temps; i++) {
3416 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3417 switch(ts->val_type) {
3419 printf("%s", tcg_target_reg_names[ts->reg]);
3422 printf("%d(%s)", (int)ts->mem_offset,
3423 tcg_target_reg_names[ts->mem_base->reg]);
3425 case TEMP_VAL_CONST:
3426 printf("$0x%" PRIx64, ts->val);
3438 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3439 if (s->reg_to_temp[i] != NULL) {
3441 tcg_target_reg_names[i],
3442 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3447 static void check_regs(TCGContext *s)
3454 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3455 ts = s->reg_to_temp[reg];
3457 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3458 printf("Inconsistency for register %s:\n",
3459 tcg_target_reg_names[reg]);
3464 for (k = 0; k < s->nb_temps; k++) {
3466 if (ts->val_type == TEMP_VAL_REG
3467 && ts->kind != TEMP_FIXED
3468 && s->reg_to_temp[ts->reg] != ts) {
3469 printf("Inconsistency for temp %s:\n",
3470 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3472 printf("reg state:\n");
3480 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3482 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3483 /* Sparc64 stack is accessed with offset of 2047 */
3484 s->current_frame_offset = (s->current_frame_offset +
3485 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3486 ~(sizeof(tcg_target_long) - 1);
3488 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3492 ts->mem_offset = s->current_frame_offset;
3493 ts->mem_base = s->frame_temp;
3494 ts->mem_allocated = 1;
3495 s->current_frame_offset += sizeof(tcg_target_long);
3498 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3500 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3501 mark it free; otherwise mark it dead. */
3502 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3504 TCGTempVal new_type;
3511 new_type = TEMP_VAL_MEM;
3514 new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3517 new_type = TEMP_VAL_CONST;
3520 g_assert_not_reached();
3522 if (ts->val_type == TEMP_VAL_REG) {
3523 s->reg_to_temp[ts->reg] = NULL;
3525 ts->val_type = new_type;
3528 /* Mark a temporary as dead. */
3529 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3531 temp_free_or_dead(s, ts, 1);
3534 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3535 registers needs to be allocated to store a constant. If 'free_or_dead'
3536 is non-zero, subsequently release the temporary; if it is positive, the
3537 temp is dead; if it is negative, the temp is free. */
3538 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3539 TCGRegSet preferred_regs, int free_or_dead)
3541 if (!temp_readonly(ts) && !ts->mem_coherent) {
3542 if (!ts->mem_allocated) {
3543 temp_allocate_frame(s, ts);
3545 switch (ts->val_type) {
3546 case TEMP_VAL_CONST:
3547 /* If we're going to free the temp immediately, then we won't
3548 require it later in a register, so attempt to store the
3549 constant to memory directly. */
3551 && tcg_out_sti(s, ts->type, ts->val,
3552 ts->mem_base->reg, ts->mem_offset)) {
3555 temp_load(s, ts, tcg_target_available_regs[ts->type],
3556 allocated_regs, preferred_regs);
3560 tcg_out_st(s, ts->type, ts->reg,
3561 ts->mem_base->reg, ts->mem_offset);
3571 ts->mem_coherent = 1;
3574 temp_free_or_dead(s, ts, free_or_dead);
3578 /* free register 'reg' by spilling the corresponding temporary if necessary */
3579 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3581 TCGTemp *ts = s->reg_to_temp[reg];
3583 temp_sync(s, ts, allocated_regs, 0, -1);
3589 * @required_regs: Set of registers in which we must allocate.
3590 * @allocated_regs: Set of registers which must be avoided.
3591 * @preferred_regs: Set of registers we should prefer.
3592 * @rev: True if we search the registers in "indirect" order.
3594 * The allocated register must be in @required_regs & ~@allocated_regs,
3595 * but if we can put it in @preferred_regs we may save a move later.
3597 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3598 TCGRegSet allocated_regs,
3599 TCGRegSet preferred_regs, bool rev)
3601 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3602 TCGRegSet reg_ct[2];
3605 reg_ct[1] = required_regs & ~allocated_regs;
3606 tcg_debug_assert(reg_ct[1] != 0);
3607 reg_ct[0] = reg_ct[1] & preferred_regs;
3609 /* Skip the preferred_regs option if it cannot be satisfied,
3610 or if the preference made no difference. */
3611 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3613 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3615 /* Try free registers, preferences first. */
3616 for (j = f; j < 2; j++) {
3617 TCGRegSet set = reg_ct[j];
3619 if (tcg_regset_single(set)) {
3620 /* One register in the set. */
3621 TCGReg reg = tcg_regset_first(set);
3622 if (s->reg_to_temp[reg] == NULL) {
3626 for (i = 0; i < n; i++) {
3627 TCGReg reg = order[i];
3628 if (s->reg_to_temp[reg] == NULL &&
3629 tcg_regset_test_reg(set, reg)) {
3636 /* We must spill something. */
3637 for (j = f; j < 2; j++) {
3638 TCGRegSet set = reg_ct[j];
3640 if (tcg_regset_single(set)) {
3641 /* One register in the set. */
3642 TCGReg reg = tcg_regset_first(set);
3643 tcg_reg_free(s, reg, allocated_regs);
3646 for (i = 0; i < n; i++) {
3647 TCGReg reg = order[i];
3648 if (tcg_regset_test_reg(set, reg)) {
3649 tcg_reg_free(s, reg, allocated_regs);
3659 /* Make sure the temporary is in a register. If needed, allocate the register
3660 from DESIRED while avoiding ALLOCATED. */
3661 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3662 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3666 switch (ts->val_type) {
3669 case TEMP_VAL_CONST:
3670 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3671 preferred_regs, ts->indirect_base);
3672 if (ts->type <= TCG_TYPE_I64) {
3673 tcg_out_movi(s, ts->type, reg, ts->val);
3675 uint64_t val = ts->val;
3679 * Find the minimal vector element that matches the constant.
3680 * The targets will, in general, have to do this search anyway,
3681 * do this generically.
3683 if (val == dup_const(MO_8, val)) {
3685 } else if (val == dup_const(MO_16, val)) {
3687 } else if (val == dup_const(MO_32, val)) {
3691 tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3693 ts->mem_coherent = 0;
3696 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3697 preferred_regs, ts->indirect_base);
3698 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3699 ts->mem_coherent = 1;
3706 ts->val_type = TEMP_VAL_REG;
3707 s->reg_to_temp[reg] = ts;
3710 /* Save a temporary to memory. 'allocated_regs' is used in case a
3711 temporary registers needs to be allocated to store a constant. */
3712 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3714 /* The liveness analysis already ensures that globals are back
3715 in memory. Keep an tcg_debug_assert for safety. */
3716 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3719 /* save globals to their canonical location and assume they can be
3720 modified be the following code. 'allocated_regs' is used in case a
3721 temporary registers needs to be allocated to store a constant. */
3722 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3726 for (i = 0, n = s->nb_globals; i < n; i++) {
3727 temp_save(s, &s->temps[i], allocated_regs);
3731 /* sync globals to their canonical location and assume they can be
3732 read by the following code. 'allocated_regs' is used in case a
3733 temporary registers needs to be allocated to store a constant. */
3734 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3738 for (i = 0, n = s->nb_globals; i < n; i++) {
3739 TCGTemp *ts = &s->temps[i];
3740 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3741 || ts->kind == TEMP_FIXED
3742 || ts->mem_coherent);
3746 /* at the end of a basic block, we assume all temporaries are dead and
3747 all globals are stored at their canonical location. */
3748 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3752 for (i = s->nb_globals; i < s->nb_temps; i++) {
3753 TCGTemp *ts = &s->temps[i];
3757 temp_save(s, ts, allocated_regs);
3760 /* The liveness analysis already ensures that temps are dead.
3761 Keep an tcg_debug_assert for safety. */
3762 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3765 /* Similarly, we should have freed any allocated register. */
3766 tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3769 g_assert_not_reached();
3773 save_globals(s, allocated_regs);
3777 * At a conditional branch, we assume all temporaries are dead and
3778 * all globals and local temps are synced to their location.
3780 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3782 sync_globals(s, allocated_regs);
3784 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3785 TCGTemp *ts = &s->temps[i];
3787 * The liveness analysis already ensures that temps are dead.
3788 * Keep tcg_debug_asserts for safety.
3792 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3795 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3800 g_assert_not_reached();
3806 * Specialized code generation for INDEX_op_mov_* with a constant.
3808 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3809 tcg_target_ulong val, TCGLifeData arg_life,
3810 TCGRegSet preferred_regs)
3812 /* ENV should not be modified. */
3813 tcg_debug_assert(!temp_readonly(ots));
3815 /* The movi is not explicitly generated here. */
3816 if (ots->val_type == TEMP_VAL_REG) {
3817 s->reg_to_temp[ots->reg] = NULL;
3819 ots->val_type = TEMP_VAL_CONST;
3821 ots->mem_coherent = 0;
3822 if (NEED_SYNC_ARG(0)) {
3823 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3824 } else if (IS_DEAD_ARG(0)) {
3830 * Specialized code generation for INDEX_op_mov_*.
3832 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3834 const TCGLifeData arg_life = op->life;
3835 TCGRegSet allocated_regs, preferred_regs;
3837 TCGType otype, itype;
3839 allocated_regs = s->reserved_regs;
3840 preferred_regs = op->output_pref[0];
3841 ots = arg_temp(op->args[0]);
3842 ts = arg_temp(op->args[1]);
3844 /* ENV should not be modified. */
3845 tcg_debug_assert(!temp_readonly(ots));
3847 /* Note that otype != itype for no-op truncation. */
3851 if (ts->val_type == TEMP_VAL_CONST) {
3852 /* propagate constant or generate sti */
3853 tcg_target_ulong val = ts->val;
3854 if (IS_DEAD_ARG(1)) {
3857 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3861 /* If the source value is in memory we're going to be forced
3862 to have it in a register in order to perform the copy. Copy
3863 the SOURCE value into its own register first, that way we
3864 don't have to reload SOURCE the next time it is used. */
3865 if (ts->val_type == TEMP_VAL_MEM) {
3866 temp_load(s, ts, tcg_target_available_regs[itype],
3867 allocated_regs, preferred_regs);
3870 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3871 if (IS_DEAD_ARG(0)) {
3872 /* mov to a non-saved dead register makes no sense (even with
3873 liveness analysis disabled). */
3874 tcg_debug_assert(NEED_SYNC_ARG(0));
3875 if (!ots->mem_allocated) {
3876 temp_allocate_frame(s, ots);
3878 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3879 if (IS_DEAD_ARG(1)) {
3884 if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3885 /* the mov can be suppressed */
3886 if (ots->val_type == TEMP_VAL_REG) {
3887 s->reg_to_temp[ots->reg] = NULL;
3892 if (ots->val_type != TEMP_VAL_REG) {
3893 /* When allocating a new register, make sure to not spill the
3895 tcg_regset_set_reg(allocated_regs, ts->reg);
3896 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3897 allocated_regs, preferred_regs,
3898 ots->indirect_base);
3900 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3902 * Cross register class move not supported.
3903 * Store the source register into the destination slot
3904 * and leave the destination temp as TEMP_VAL_MEM.
3906 assert(!temp_readonly(ots));
3907 if (!ts->mem_allocated) {
3908 temp_allocate_frame(s, ots);
3910 tcg_out_st(s, ts->type, ts->reg,
3911 ots->mem_base->reg, ots->mem_offset);
3912 ots->mem_coherent = 1;
3913 temp_free_or_dead(s, ots, -1);
3917 ots->val_type = TEMP_VAL_REG;
3918 ots->mem_coherent = 0;
3919 s->reg_to_temp[ots->reg] = ots;
3920 if (NEED_SYNC_ARG(0)) {
3921 temp_sync(s, ots, allocated_regs, 0, 0);
3927 * Specialized code generation for INDEX_op_dup_vec.
3929 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3931 const TCGLifeData arg_life = op->life;
3932 TCGRegSet dup_out_regs, dup_in_regs;
3934 TCGType itype, vtype;
3935 intptr_t endian_fixup;
3939 ots = arg_temp(op->args[0]);
3940 its = arg_temp(op->args[1]);
3942 /* ENV should not be modified. */
3943 tcg_debug_assert(!temp_readonly(ots));
3946 vece = TCGOP_VECE(op);
3947 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3949 if (its->val_type == TEMP_VAL_CONST) {
3950 /* Propagate constant via movi -> dupi. */
3951 tcg_target_ulong val = its->val;
3952 if (IS_DEAD_ARG(1)) {
3955 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3959 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3960 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3962 /* Allocate the output register now. */
3963 if (ots->val_type != TEMP_VAL_REG) {
3964 TCGRegSet allocated_regs = s->reserved_regs;
3966 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3967 /* Make sure to not spill the input register. */
3968 tcg_regset_set_reg(allocated_regs, its->reg);
3970 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3971 op->output_pref[0], ots->indirect_base);
3972 ots->val_type = TEMP_VAL_REG;
3973 ots->mem_coherent = 0;
3974 s->reg_to_temp[ots->reg] = ots;
3977 switch (its->val_type) {
3980 * The dup constriaints must be broad, covering all possible VECE.
3981 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3982 * to fail, indicating that extra moves are required for that case.
3984 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3985 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3988 /* Try again from memory or a vector input register. */
3990 if (!its->mem_coherent) {
3992 * The input register is not synced, and so an extra store
3993 * would be required to use memory. Attempt an integer-vector
3994 * register move first. We do not have a TCGRegSet for this.
3996 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3999 /* Sync the temp back to its slot and load from there. */
4000 temp_sync(s, its, s->reserved_regs, 0, 0);
4005 #ifdef HOST_WORDS_BIGENDIAN
4006 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4007 endian_fixup -= 1 << vece;
4011 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4012 its->mem_offset + endian_fixup)) {
4015 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4019 g_assert_not_reached();
4022 /* We now have a vector input register, so dup must succeed. */
4023 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4024 tcg_debug_assert(ok);
4027 if (IS_DEAD_ARG(1)) {
4030 if (NEED_SYNC_ARG(0)) {
4031 temp_sync(s, ots, s->reserved_regs, 0, 0);
4033 if (IS_DEAD_ARG(0)) {
4038 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4040 const TCGLifeData arg_life = op->life;
4041 const TCGOpDef * const def = &tcg_op_defs[op->opc];
4042 TCGRegSet i_allocated_regs;
4043 TCGRegSet o_allocated_regs;
4044 int i, k, nb_iargs, nb_oargs;
4047 const TCGArgConstraint *arg_ct;
4049 TCGArg new_args[TCG_MAX_OP_ARGS];
4050 int const_args[TCG_MAX_OP_ARGS];
4052 nb_oargs = def->nb_oargs;
4053 nb_iargs = def->nb_iargs;
4055 /* copy constants */
4056 memcpy(new_args + nb_oargs + nb_iargs,
4057 op->args + nb_oargs + nb_iargs,
4058 sizeof(TCGArg) * def->nb_cargs);
4060 i_allocated_regs = s->reserved_regs;
4061 o_allocated_regs = s->reserved_regs;
4063 /* satisfy input constraints */
4064 for (k = 0; k < nb_iargs; k++) {
4065 TCGRegSet i_preferred_regs, o_preferred_regs;
4067 i = def->args_ct[nb_oargs + k].sort_index;
4069 arg_ct = &def->args_ct[i];
4072 if (ts->val_type == TEMP_VAL_CONST
4073 && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4074 /* constant is OK for instruction */
4076 new_args[i] = ts->val;
4080 i_preferred_regs = o_preferred_regs = 0;
4081 if (arg_ct->ialias) {
4082 o_preferred_regs = op->output_pref[arg_ct->alias_index];
4085 * If the input is readonly, then it cannot also be an
4086 * output and aliased to itself. If the input is not
4087 * dead after the instruction, we must allocate a new
4088 * register and move it.
4090 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4091 goto allocate_in_reg;
4095 * Check if the current register has already been allocated
4096 * for another input aliased to an output.
4098 if (ts->val_type == TEMP_VAL_REG) {
4100 for (int k2 = 0; k2 < k; k2++) {
4101 int i2 = def->args_ct[nb_oargs + k2].sort_index;
4102 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4103 goto allocate_in_reg;
4107 i_preferred_regs = o_preferred_regs;
4110 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4113 if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4116 * Allocate a new register matching the constraint
4117 * and move the temporary register into it.
4119 temp_load(s, ts, tcg_target_available_regs[ts->type],
4120 i_allocated_regs, 0);
4121 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4122 o_preferred_regs, ts->indirect_base);
4123 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4125 * Cross register class move not supported. Sync the
4126 * temp back to its slot and load from there.
4128 temp_sync(s, ts, i_allocated_regs, 0, 0);
4129 tcg_out_ld(s, ts->type, reg,
4130 ts->mem_base->reg, ts->mem_offset);
4135 tcg_regset_set_reg(i_allocated_regs, reg);
4138 /* mark dead temporaries and free the associated registers */
4139 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4140 if (IS_DEAD_ARG(i)) {
4141 temp_dead(s, arg_temp(op->args[i]));
4145 if (def->flags & TCG_OPF_COND_BRANCH) {
4146 tcg_reg_alloc_cbranch(s, i_allocated_regs);
4147 } else if (def->flags & TCG_OPF_BB_END) {
4148 tcg_reg_alloc_bb_end(s, i_allocated_regs);
4150 if (def->flags & TCG_OPF_CALL_CLOBBER) {
4151 /* XXX: permit generic clobber register list ? */
4152 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4153 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4154 tcg_reg_free(s, i, i_allocated_regs);
4158 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4159 /* sync globals if the op has side effects and might trigger
4161 sync_globals(s, i_allocated_regs);
4164 /* satisfy the output constraints */
4165 for(k = 0; k < nb_oargs; k++) {
4166 i = def->args_ct[k].sort_index;
4168 arg_ct = &def->args_ct[i];
4171 /* ENV should not be modified. */
4172 tcg_debug_assert(!temp_readonly(ts));
4174 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4175 reg = new_args[arg_ct->alias_index];
4176 } else if (arg_ct->newreg) {
4177 reg = tcg_reg_alloc(s, arg_ct->regs,
4178 i_allocated_regs | o_allocated_regs,
4179 op->output_pref[k], ts->indirect_base);
4181 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4182 op->output_pref[k], ts->indirect_base);
4184 tcg_regset_set_reg(o_allocated_regs, reg);
4185 if (ts->val_type == TEMP_VAL_REG) {
4186 s->reg_to_temp[ts->reg] = NULL;
4188 ts->val_type = TEMP_VAL_REG;
4191 * Temp value is modified, so the value kept in memory is
4192 * potentially not the same.
4194 ts->mem_coherent = 0;
4195 s->reg_to_temp[reg] = ts;
4200 /* emit instruction */
4201 if (def->flags & TCG_OPF_VECTOR) {
4202 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4203 new_args, const_args);
4205 tcg_out_op(s, op->opc, new_args, const_args);
4208 /* move the outputs in the correct register if needed */
4209 for(i = 0; i < nb_oargs; i++) {
4210 ts = arg_temp(op->args[i]);
4212 /* ENV should not be modified. */
4213 tcg_debug_assert(!temp_readonly(ts));
4215 if (NEED_SYNC_ARG(i)) {
4216 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4217 } else if (IS_DEAD_ARG(i)) {
4223 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4225 const TCGLifeData arg_life = op->life;
4226 TCGTemp *ots, *itsl, *itsh;
4227 TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4229 /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4230 tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4231 tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4233 ots = arg_temp(op->args[0]);
4234 itsl = arg_temp(op->args[1]);
4235 itsh = arg_temp(op->args[2]);
4237 /* ENV should not be modified. */
4238 tcg_debug_assert(!temp_readonly(ots));
4240 /* Allocate the output register now. */
4241 if (ots->val_type != TEMP_VAL_REG) {
4242 TCGRegSet allocated_regs = s->reserved_regs;
4243 TCGRegSet dup_out_regs =
4244 tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4246 /* Make sure to not spill the input registers. */
4247 if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4248 tcg_regset_set_reg(allocated_regs, itsl->reg);
4250 if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4251 tcg_regset_set_reg(allocated_regs, itsh->reg);
4254 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4255 op->output_pref[0], ots->indirect_base);
4256 ots->val_type = TEMP_VAL_REG;
4257 ots->mem_coherent = 0;
4258 s->reg_to_temp[ots->reg] = ots;
4261 /* Promote dup2 of immediates to dupi_vec. */
4262 if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4263 uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4266 if (val == dup_const(MO_8, val)) {
4268 } else if (val == dup_const(MO_16, val)) {
4270 } else if (val == dup_const(MO_32, val)) {
4274 tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4278 /* If the two inputs form one 64-bit value, try dupm_vec. */
4279 if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4280 if (!itsl->mem_coherent) {
4281 temp_sync(s, itsl, s->reserved_regs, 0, 0);
4283 if (!itsh->mem_coherent) {
4284 temp_sync(s, itsh, s->reserved_regs, 0, 0);
4286 #ifdef HOST_WORDS_BIGENDIAN
4287 TCGTemp *its = itsh;
4289 TCGTemp *its = itsl;
4291 if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4292 its->mem_base->reg, its->mem_offset)) {
4297 /* Fall back to generic expansion. */
4301 if (IS_DEAD_ARG(1)) {
4304 if (IS_DEAD_ARG(2)) {
4307 if (NEED_SYNC_ARG(0)) {
4308 temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4309 } else if (IS_DEAD_ARG(0)) {
4315 #ifdef TCG_TARGET_STACK_GROWSUP
4316 #define STACK_DIR(x) (-(x))
4318 #define STACK_DIR(x) (x)
4321 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4323 const int nb_oargs = TCGOP_CALLO(op);
4324 const int nb_iargs = TCGOP_CALLI(op);
4325 const TCGLifeData arg_life = op->life;
4326 int flags, nb_regs, i;
4330 intptr_t stack_offset;
4331 size_t call_stack_size;
4332 tcg_insn_unit *func_addr;
4334 TCGRegSet allocated_regs;
4336 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4337 flags = op->args[nb_oargs + nb_iargs + 1];
4339 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4340 if (nb_regs > nb_iargs) {
4344 /* assign stack slots first */
4345 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4346 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4347 ~(TCG_TARGET_STACK_ALIGN - 1);
4348 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4349 if (allocate_args) {
4350 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4351 preallocate call stack */
4355 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4356 for (i = nb_regs; i < nb_iargs; i++) {
4357 arg = op->args[nb_oargs + i];
4358 #ifdef TCG_TARGET_STACK_GROWSUP
4359 stack_offset -= sizeof(tcg_target_long);
4361 if (arg != TCG_CALL_DUMMY_ARG) {
4363 temp_load(s, ts, tcg_target_available_regs[ts->type],
4364 s->reserved_regs, 0);
4365 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4367 #ifndef TCG_TARGET_STACK_GROWSUP
4368 stack_offset += sizeof(tcg_target_long);
4372 /* assign input registers */
4373 allocated_regs = s->reserved_regs;
4374 for (i = 0; i < nb_regs; i++) {
4375 arg = op->args[nb_oargs + i];
4376 if (arg != TCG_CALL_DUMMY_ARG) {
4378 reg = tcg_target_call_iarg_regs[i];
4380 if (ts->val_type == TEMP_VAL_REG) {
4381 if (ts->reg != reg) {
4382 tcg_reg_free(s, reg, allocated_regs);
4383 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4385 * Cross register class move not supported. Sync the
4386 * temp back to its slot and load from there.
4388 temp_sync(s, ts, allocated_regs, 0, 0);
4389 tcg_out_ld(s, ts->type, reg,
4390 ts->mem_base->reg, ts->mem_offset);
4394 TCGRegSet arg_set = 0;
4396 tcg_reg_free(s, reg, allocated_regs);
4397 tcg_regset_set_reg(arg_set, reg);
4398 temp_load(s, ts, arg_set, allocated_regs, 0);
4401 tcg_regset_set_reg(allocated_regs, reg);
4405 /* mark dead temporaries and free the associated registers */
4406 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4407 if (IS_DEAD_ARG(i)) {
4408 temp_dead(s, arg_temp(op->args[i]));
4412 /* clobber call registers */
4413 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4414 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4415 tcg_reg_free(s, i, allocated_regs);
4419 /* Save globals if they might be written by the helper, sync them if
4420 they might be read. */
4421 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4423 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4424 sync_globals(s, allocated_regs);
4426 save_globals(s, allocated_regs);
4429 tcg_out_call(s, func_addr);
4431 /* assign output registers and emit moves if needed */
4432 for(i = 0; i < nb_oargs; i++) {
4436 /* ENV should not be modified. */
4437 tcg_debug_assert(!temp_readonly(ts));
4439 reg = tcg_target_call_oarg_regs[i];
4440 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4441 if (ts->val_type == TEMP_VAL_REG) {
4442 s->reg_to_temp[ts->reg] = NULL;
4444 ts->val_type = TEMP_VAL_REG;
4446 ts->mem_coherent = 0;
4447 s->reg_to_temp[reg] = ts;
4448 if (NEED_SYNC_ARG(i)) {
4449 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4450 } else if (IS_DEAD_ARG(i)) {
4456 #ifdef CONFIG_PROFILER
4458 /* avoid copy/paste errors */
4459 #define PROF_ADD(to, from, field) \
4461 (to)->field += qatomic_read(&((from)->field)); \
4464 #define PROF_MAX(to, from, field) \
4466 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4467 if (val__ > (to)->field) { \
4468 (to)->field = val__; \
4472 /* Pass in a zero'ed @prof */
4474 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4476 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4479 for (i = 0; i < n_ctxs; i++) {
4480 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4481 const TCGProfile *orig = &s->prof;
4484 PROF_ADD(prof, orig, cpu_exec_time);
4485 PROF_ADD(prof, orig, tb_count1);
4486 PROF_ADD(prof, orig, tb_count);
4487 PROF_ADD(prof, orig, op_count);
4488 PROF_MAX(prof, orig, op_count_max);
4489 PROF_ADD(prof, orig, temp_count);
4490 PROF_MAX(prof, orig, temp_count_max);
4491 PROF_ADD(prof, orig, del_op_count);
4492 PROF_ADD(prof, orig, code_in_len);
4493 PROF_ADD(prof, orig, code_out_len);
4494 PROF_ADD(prof, orig, search_out_len);
4495 PROF_ADD(prof, orig, interm_time);
4496 PROF_ADD(prof, orig, code_time);
4497 PROF_ADD(prof, orig, la_time);
4498 PROF_ADD(prof, orig, opt_time);
4499 PROF_ADD(prof, orig, restore_count);
4500 PROF_ADD(prof, orig, restore_time);
4505 for (i = 0; i < NB_OPS; i++) {
4506 PROF_ADD(prof, orig, table_op_count[i]);
4515 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4517 tcg_profile_snapshot(prof, true, false);
4520 static void tcg_profile_snapshot_table(TCGProfile *prof)
4522 tcg_profile_snapshot(prof, false, true);
4525 void tcg_dump_op_count(void)
4527 TCGProfile prof = {};
4530 tcg_profile_snapshot_table(&prof);
4531 for (i = 0; i < NB_OPS; i++) {
4532 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4533 prof.table_op_count[i]);
4537 int64_t tcg_cpu_exec_time(void)
4539 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4543 for (i = 0; i < n_ctxs; i++) {
4544 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4545 const TCGProfile *prof = &s->prof;
4547 ret += qatomic_read(&prof->cpu_exec_time);
4552 void tcg_dump_op_count(void)
4554 qemu_printf("[TCG profiler not compiled]\n");
4557 int64_t tcg_cpu_exec_time(void)
4559 error_report("%s: TCG profiler not compiled", __func__);
4565 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4567 #ifdef CONFIG_PROFILER
4568 TCGProfile *prof = &s->prof;
4573 #ifdef CONFIG_PROFILER
4577 QTAILQ_FOREACH(op, &s->ops, link) {
4580 qatomic_set(&prof->op_count, prof->op_count + n);
4581 if (n > prof->op_count_max) {
4582 qatomic_set(&prof->op_count_max, n);
4586 qatomic_set(&prof->temp_count, prof->temp_count + n);
4587 if (n > prof->temp_count_max) {
4588 qatomic_set(&prof->temp_count_max, n);
4594 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4595 && qemu_log_in_addr_range(tb->pc))) {
4596 FILE *logfile = qemu_log_lock();
4598 tcg_dump_ops(s, false);
4600 qemu_log_unlock(logfile);
4604 #ifdef CONFIG_DEBUG_TCG
4605 /* Ensure all labels referenced have been emitted. */
4610 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4611 if (unlikely(!l->present) && l->refs) {
4612 qemu_log_mask(CPU_LOG_TB_OP,
4613 "$L%d referenced but not present.\n", l->id);
4621 #ifdef CONFIG_PROFILER
4622 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4625 #ifdef USE_TCG_OPTIMIZATIONS
4629 #ifdef CONFIG_PROFILER
4630 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4631 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4634 reachable_code_pass(s);
4637 if (s->nb_indirects > 0) {
4639 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4640 && qemu_log_in_addr_range(tb->pc))) {
4641 FILE *logfile = qemu_log_lock();
4642 qemu_log("OP before indirect lowering:\n");
4643 tcg_dump_ops(s, false);
4645 qemu_log_unlock(logfile);
4648 /* Replace indirect temps with direct temps. */
4649 if (liveness_pass_2(s)) {
4650 /* If changes were made, re-run liveness. */
4655 #ifdef CONFIG_PROFILER
4656 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4660 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4661 && qemu_log_in_addr_range(tb->pc))) {
4662 FILE *logfile = qemu_log_lock();
4663 qemu_log("OP after optimization and liveness analysis:\n");
4664 tcg_dump_ops(s, true);
4666 qemu_log_unlock(logfile);
4670 tcg_reg_alloc_start(s);
4673 * Reset the buffer pointers when restarting after overflow.
4674 * TODO: Move this into translate-all.c with the rest of the
4675 * buffer management. Having only this done here is confusing.
4677 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4678 s->code_ptr = s->code_buf;
4680 #ifdef TCG_TARGET_NEED_LDST_LABELS
4681 QSIMPLEQ_INIT(&s->ldst_labels);
4683 #ifdef TCG_TARGET_NEED_POOL_LABELS
4684 s->pool_labels = NULL;
4688 QTAILQ_FOREACH(op, &s->ops, link) {
4689 TCGOpcode opc = op->opc;
4691 #ifdef CONFIG_PROFILER
4692 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4696 case INDEX_op_mov_i32:
4697 case INDEX_op_mov_i64:
4698 case INDEX_op_mov_vec:
4699 tcg_reg_alloc_mov(s, op);
4701 case INDEX_op_dup_vec:
4702 tcg_reg_alloc_dup(s, op);
4704 case INDEX_op_insn_start:
4705 if (num_insns >= 0) {
4706 size_t off = tcg_current_code_size(s);
4707 s->gen_insn_end_off[num_insns] = off;
4708 /* Assert that we do not overflow our stored offset. */
4709 assert(s->gen_insn_end_off[num_insns] == off);
4712 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4714 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4715 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4719 s->gen_insn_data[num_insns][i] = a;
4722 case INDEX_op_discard:
4723 temp_dead(s, arg_temp(op->args[0]));
4725 case INDEX_op_set_label:
4726 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4727 tcg_out_label(s, arg_label(op->args[0]));
4730 tcg_reg_alloc_call(s, op);
4732 case INDEX_op_dup2_vec:
4733 if (tcg_reg_alloc_dup2(s, op)) {
4738 /* Sanity check that we've not introduced any unhandled opcodes. */
4739 tcg_debug_assert(tcg_op_supported(opc));
4740 /* Note: in order to speed up the code, it would be much
4741 faster to have specialized register allocator functions for
4742 some common argument patterns */
4743 tcg_reg_alloc_op(s, op);
4746 #ifdef CONFIG_DEBUG_TCG
4749 /* Test for (pending) buffer overflow. The assumption is that any
4750 one operation beginning below the high water mark cannot overrun
4751 the buffer completely. Thus we can test for overflow after
4752 generating code without having to check during generation. */
4753 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4756 /* Test for TB overflow, as seen by gen_insn_end_off. */
4757 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4761 tcg_debug_assert(num_insns >= 0);
4762 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4764 /* Generate TB finalization at the end of block */
4765 #ifdef TCG_TARGET_NEED_LDST_LABELS
4766 i = tcg_out_ldst_finalize(s);
4771 #ifdef TCG_TARGET_NEED_POOL_LABELS
4772 i = tcg_out_pool_finalize(s);
4777 if (!tcg_resolve_relocs(s)) {
4781 #ifndef CONFIG_TCG_INTERPRETER
4782 /* flush instruction cache */
4783 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4784 (uintptr_t)s->code_buf,
4785 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4788 return tcg_current_code_size(s);
4791 #ifdef CONFIG_PROFILER
4792 void tcg_dump_info(void)
4794 TCGProfile prof = {};
4795 const TCGProfile *s;
4797 int64_t tb_div_count;
4800 tcg_profile_snapshot_counters(&prof);
4802 tb_count = s->tb_count;
4803 tb_div_count = tb_count ? tb_count : 1;
4804 tot = s->interm_time + s->code_time;
4806 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4808 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4810 tb_count, s->tb_count1 - tb_count,
4811 (double)(s->tb_count1 - s->tb_count)
4812 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4813 qemu_printf("avg ops/TB %0.1f max=%d\n",
4814 (double)s->op_count / tb_div_count, s->op_count_max);
4815 qemu_printf("deleted ops/TB %0.2f\n",
4816 (double)s->del_op_count / tb_div_count);
4817 qemu_printf("avg temps/TB %0.2f max=%d\n",
4818 (double)s->temp_count / tb_div_count, s->temp_count_max);
4819 qemu_printf("avg host code/TB %0.1f\n",
4820 (double)s->code_out_len / tb_div_count);
4821 qemu_printf("avg search data/TB %0.1f\n",
4822 (double)s->search_out_len / tb_div_count);
4824 qemu_printf("cycles/op %0.1f\n",
4825 s->op_count ? (double)tot / s->op_count : 0);
4826 qemu_printf("cycles/in byte %0.1f\n",
4827 s->code_in_len ? (double)tot / s->code_in_len : 0);
4828 qemu_printf("cycles/out byte %0.1f\n",
4829 s->code_out_len ? (double)tot / s->code_out_len : 0);
4830 qemu_printf("cycles/search byte %0.1f\n",
4831 s->search_out_len ? (double)tot / s->search_out_len : 0);
4835 qemu_printf(" gen_interm time %0.1f%%\n",
4836 (double)s->interm_time / tot * 100.0);
4837 qemu_printf(" gen_code time %0.1f%%\n",
4838 (double)s->code_time / tot * 100.0);
4839 qemu_printf("optim./code time %0.1f%%\n",
4840 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4842 qemu_printf("liveness/code time %0.1f%%\n",
4843 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4844 qemu_printf("cpu_restore count %" PRId64 "\n",
4846 qemu_printf(" avg cycles %0.1f\n",
4847 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4850 void tcg_dump_info(void)
4852 qemu_printf("[TCG profiler not compiled]\n");
4856 #ifdef ELF_HOST_MACHINE
4857 /* In order to use this feature, the backend needs to do three things:
4859 (1) Define ELF_HOST_MACHINE to indicate both what value to
4860 put into the ELF image and to indicate support for the feature.
4862 (2) Define tcg_register_jit. This should create a buffer containing
4863 the contents of a .debug_frame section that describes the post-
4864 prologue unwind info for the tcg machine.
4866 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4869 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4876 struct jit_code_entry {
4877 struct jit_code_entry *next_entry;
4878 struct jit_code_entry *prev_entry;
4879 const void *symfile_addr;
4880 uint64_t symfile_size;
4883 struct jit_descriptor {
4885 uint32_t action_flag;
4886 struct jit_code_entry *relevant_entry;
4887 struct jit_code_entry *first_entry;
4890 void __jit_debug_register_code(void) __attribute__((noinline));
4891 void __jit_debug_register_code(void)
4896 /* Must statically initialize the version, because GDB may check
4897 the version before we can set it. */
4898 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4900 /* End GDB interface. */
4902 static int find_string(const char *strtab, const char *str)
4904 const char *p = strtab + 1;
4907 if (strcmp(p, str) == 0) {
4914 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4915 const void *debug_frame,
4916 size_t debug_frame_size)
4918 struct __attribute__((packed)) DebugInfo {
4925 uintptr_t cu_low_pc;
4926 uintptr_t cu_high_pc;
4929 uintptr_t fn_low_pc;
4930 uintptr_t fn_high_pc;
4939 struct DebugInfo di;
4944 struct ElfImage *img;
4946 static const struct ElfImage img_template = {
4948 .e_ident[EI_MAG0] = ELFMAG0,
4949 .e_ident[EI_MAG1] = ELFMAG1,
4950 .e_ident[EI_MAG2] = ELFMAG2,
4951 .e_ident[EI_MAG3] = ELFMAG3,
4952 .e_ident[EI_CLASS] = ELF_CLASS,
4953 .e_ident[EI_DATA] = ELF_DATA,
4954 .e_ident[EI_VERSION] = EV_CURRENT,
4956 .e_machine = ELF_HOST_MACHINE,
4957 .e_version = EV_CURRENT,
4958 .e_phoff = offsetof(struct ElfImage, phdr),
4959 .e_shoff = offsetof(struct ElfImage, shdr),
4960 .e_ehsize = sizeof(ElfW(Shdr)),
4961 .e_phentsize = sizeof(ElfW(Phdr)),
4963 .e_shentsize = sizeof(ElfW(Shdr)),
4964 .e_shnum = ARRAY_SIZE(img->shdr),
4965 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4966 #ifdef ELF_HOST_FLAGS
4967 .e_flags = ELF_HOST_FLAGS,
4970 .e_ident[EI_OSABI] = ELF_OSABI,
4978 [0] = { .sh_type = SHT_NULL },
4979 /* Trick: The contents of code_gen_buffer are not present in
4980 this fake ELF file; that got allocated elsewhere. Therefore
4981 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4982 will not look for contents. We can record any address. */
4984 .sh_type = SHT_NOBITS,
4985 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4987 [2] = { /* .debug_info */
4988 .sh_type = SHT_PROGBITS,
4989 .sh_offset = offsetof(struct ElfImage, di),
4990 .sh_size = sizeof(struct DebugInfo),
4992 [3] = { /* .debug_abbrev */
4993 .sh_type = SHT_PROGBITS,
4994 .sh_offset = offsetof(struct ElfImage, da),
4995 .sh_size = sizeof(img->da),
4997 [4] = { /* .debug_frame */
4998 .sh_type = SHT_PROGBITS,
4999 .sh_offset = sizeof(struct ElfImage),
5001 [5] = { /* .symtab */
5002 .sh_type = SHT_SYMTAB,
5003 .sh_offset = offsetof(struct ElfImage, sym),
5004 .sh_size = sizeof(img->sym),
5006 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5007 .sh_entsize = sizeof(ElfW(Sym)),
5009 [6] = { /* .strtab */
5010 .sh_type = SHT_STRTAB,
5011 .sh_offset = offsetof(struct ElfImage, str),
5012 .sh_size = sizeof(img->str),
5016 [1] = { /* code_gen_buffer */
5017 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5022 .len = sizeof(struct DebugInfo) - 4,
5024 .ptr_size = sizeof(void *),
5026 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
5028 .fn_name = "code_gen_buffer"
5031 1, /* abbrev number (the cu) */
5032 0x11, 1, /* DW_TAG_compile_unit, has children */
5033 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
5034 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5035 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5036 0, 0, /* end of abbrev */
5037 2, /* abbrev number (the fn) */
5038 0x2e, 0, /* DW_TAG_subprogram, no children */
5039 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
5040 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
5041 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
5042 0, 0, /* end of abbrev */
5043 0 /* no more abbrev */
5045 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5046 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5049 /* We only need a single jit entry; statically allocate it. */
5050 static struct jit_code_entry one_entry;
5052 uintptr_t buf = (uintptr_t)buf_ptr;
5053 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5054 DebugFrameHeader *dfh;
5056 img = g_malloc(img_size);
5057 *img = img_template;
5059 img->phdr.p_vaddr = buf;
5060 img->phdr.p_paddr = buf;
5061 img->phdr.p_memsz = buf_size;
5063 img->shdr[1].sh_name = find_string(img->str, ".text");
5064 img->shdr[1].sh_addr = buf;
5065 img->shdr[1].sh_size = buf_size;
5067 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5068 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5070 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5071 img->shdr[4].sh_size = debug_frame_size;
5073 img->shdr[5].sh_name = find_string(img->str, ".symtab");
5074 img->shdr[6].sh_name = find_string(img->str, ".strtab");
5076 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5077 img->sym[1].st_value = buf;
5078 img->sym[1].st_size = buf_size;
5080 img->di.cu_low_pc = buf;
5081 img->di.cu_high_pc = buf + buf_size;
5082 img->di.fn_low_pc = buf;
5083 img->di.fn_high_pc = buf + buf_size;
5085 dfh = (DebugFrameHeader *)(img + 1);
5086 memcpy(dfh, debug_frame, debug_frame_size);
5087 dfh->fde.func_start = buf;
5088 dfh->fde.func_len = buf_size;
5091 /* Enable this block to be able to debug the ELF image file creation.
5092 One can use readelf, objdump, or other inspection utilities. */
5094 FILE *f = fopen("/tmp/qemu.jit", "w+b");
5096 if (fwrite(img, img_size, 1, f) != img_size) {
5097 /* Avoid stupid unused return value warning for fwrite. */
5104 one_entry.symfile_addr = img;
5105 one_entry.symfile_size = img_size;
5107 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5108 __jit_debug_descriptor.relevant_entry = &one_entry;
5109 __jit_debug_descriptor.first_entry = &one_entry;
5110 __jit_debug_register_code();
5113 /* No support for the feature. Provide the entry point expected by exec.c,
5114 and implement the internal function we declared earlier. */
5116 static void tcg_register_jit_int(const void *buf, size_t size,
5117 const void *debug_frame,
5118 size_t debug_frame_size)
5122 void tcg_register_jit(const void *buf, size_t buf_size)
5125 #endif /* ELF_HOST_MACHINE */
5127 #if !TCG_TARGET_MAYBE_vec
5128 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5130 g_assert_not_reached();