2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41 CPU definitions. Currently they are used for qemu_ld/st
43 #define NO_CPU_IO_DEFS
46 #include "exec/exec-all.h"
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
52 #include "tcg/tcg-op.h"
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS ELFCLASS32
57 # define ELF_CLASS ELFCLASS64
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA ELFDATA2MSB
62 # define ELF_DATA ELFDATA2LSB
67 #include "sysemu/sysemu.h"
69 /* Forward declarations for functions declared in tcg-target.c.inc and
71 static void tcg_target_init(TCGContext *s);
72 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
73 static void tcg_target_qemu_prologue(TCGContext *s);
74 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
75 intptr_t value, intptr_t addend);
77 /* The CIE and FDE header definitions will be common to all hosts. */
79 uint32_t len __attribute__((aligned((sizeof(void *)))));
85 uint8_t return_column;
88 typedef struct QEMU_PACKED {
89 uint32_t len __attribute__((aligned((sizeof(void *)))));
93 } DebugFrameFDEHeader;
95 typedef struct QEMU_PACKED {
97 DebugFrameFDEHeader fde;
100 static void tcg_register_jit_int(void *buf, size_t size,
101 const void *debug_frame,
102 size_t debug_frame_size)
103 __attribute__((unused));
105 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
106 static const char *target_parse_constraint(TCGArgConstraint *ct,
107 const char *ct_str, TCGType type);
108 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
110 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
111 static void tcg_out_movi(TCGContext *s, TCGType type,
112 TCGReg ret, tcg_target_long arg);
113 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
114 const int *const_args);
115 #if TCG_TARGET_MAYBE_vec
116 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
117 TCGReg dst, TCGReg src);
118 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
119 TCGReg dst, TCGReg base, intptr_t offset);
120 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
121 TCGReg dst, tcg_target_long arg);
122 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
123 unsigned vece, const TCGArg *args,
124 const int *const_args);
126 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
127 TCGReg dst, TCGReg src)
129 g_assert_not_reached();
131 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
132 TCGReg dst, TCGReg base, intptr_t offset)
134 g_assert_not_reached();
136 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
137 TCGReg dst, tcg_target_long arg)
139 g_assert_not_reached();
141 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
142 unsigned vece, const TCGArg *args,
143 const int *const_args)
145 g_assert_not_reached();
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151 TCGReg base, intptr_t ofs);
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
153 static int tcg_target_const_match(tcg_target_long val, TCGType type,
154 const TCGArgConstraint *arg_ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
159 #define TCG_HIGHWATER 1024
161 static TCGContext **tcg_ctxs;
162 static unsigned int n_tcg_ctxs;
163 TCGv_env cpu_env = 0;
164 void *tcg_code_gen_epilogue;
165 uintptr_t tcg_splitwx_diff;
167 #ifndef CONFIG_TCG_INTERPRETER
168 tcg_prologue_fn *tcg_qemu_tb_exec;
171 struct tcg_region_tree {
174 /* padding to avoid false sharing is computed at run-time */
178 * We divide code_gen_buffer into equally-sized "regions" that TCG threads
179 * dynamically allocate from as demand dictates. Given appropriate region
180 * sizing, this minimizes flushes even when some TCG threads generate a lot
181 * more code than others.
183 struct tcg_region_state {
186 /* fields set at init time */
191 size_t size; /* size of one region */
192 size_t stride; /* .size + guard size */
194 /* fields protected by the lock */
195 size_t current; /* current region index */
196 size_t agg_size_full; /* aggregate size of full regions */
199 static struct tcg_region_state region;
201 * This is an array of struct tcg_region_tree's, with padding.
202 * We use void * to simplify the computation of region_trees[i]; each
203 * struct is found every tree_size bytes.
205 static void *region_trees;
206 static size_t tree_size;
207 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
208 static TCGRegSet tcg_target_call_clobber_regs;
210 #if TCG_TARGET_INSN_UNIT_SIZE == 1
211 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
216 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
223 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
224 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
226 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
229 tcg_insn_unit *p = s->code_ptr;
230 memcpy(p, &v, sizeof(v));
231 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
235 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
238 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
241 memcpy(p, &v, sizeof(v));
246 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
247 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
249 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
252 tcg_insn_unit *p = s->code_ptr;
253 memcpy(p, &v, sizeof(v));
254 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
258 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
261 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
264 memcpy(p, &v, sizeof(v));
269 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
270 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
272 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
275 tcg_insn_unit *p = s->code_ptr;
276 memcpy(p, &v, sizeof(v));
277 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
281 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
284 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
287 memcpy(p, &v, sizeof(v));
292 /* label relocation processing */
294 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
295 TCGLabel *l, intptr_t addend)
297 TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
302 QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
305 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
307 tcg_debug_assert(!l->has_value);
309 l->u.value_ptr = tcg_splitwx_to_rx(ptr);
312 TCGLabel *gen_new_label(void)
314 TCGContext *s = tcg_ctx;
315 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
317 memset(l, 0, sizeof(TCGLabel));
318 l->id = s->nb_labels++;
319 QSIMPLEQ_INIT(&l->relocs);
321 QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
326 static bool tcg_resolve_relocs(TCGContext *s)
330 QSIMPLEQ_FOREACH(l, &s->labels, next) {
332 uintptr_t value = l->u.value;
334 QSIMPLEQ_FOREACH(r, &l->relocs, next) {
335 if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
343 static void set_jmp_reset_offset(TCGContext *s, int which)
346 * We will check for overflow at the end of the opcode loop in
347 * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
349 s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
352 #include "tcg-target.c.inc"
354 /* compare a pointer @ptr and a tb_tc @s */
355 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
357 if (ptr >= s->ptr + s->size) {
359 } else if (ptr < s->ptr) {
365 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
367 const struct tb_tc *a = ap;
368 const struct tb_tc *b = bp;
371 * When both sizes are set, we know this isn't a lookup.
372 * This is the most likely case: every TB must be inserted; lookups
373 * are a lot less frequent.
375 if (likely(a->size && b->size)) {
376 if (a->ptr > b->ptr) {
378 } else if (a->ptr < b->ptr) {
381 /* a->ptr == b->ptr should happen only on deletions */
382 g_assert(a->size == b->size);
386 * All lookups have either .size field set to 0.
387 * From the glib sources we see that @ap is always the lookup key. However
388 * the docs provide no guarantee, so we just mark this case as likely.
390 if (likely(a->size == 0)) {
391 return ptr_cmp_tb_tc(a->ptr, b);
393 return ptr_cmp_tb_tc(b->ptr, a);
396 static void tcg_region_trees_init(void)
400 tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
401 region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
402 for (i = 0; i < region.n; i++) {
403 struct tcg_region_tree *rt = region_trees + i * tree_size;
405 qemu_mutex_init(&rt->lock);
406 rt->tree = g_tree_new(tb_tc_cmp);
410 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *cp)
412 void *p = tcg_splitwx_to_rw(cp);
415 if (p < region.start_aligned) {
418 ptrdiff_t offset = p - region.start_aligned;
420 if (offset > region.stride * (region.n - 1)) {
421 region_idx = region.n - 1;
423 region_idx = offset / region.stride;
426 return region_trees + region_idx * tree_size;
429 void tcg_tb_insert(TranslationBlock *tb)
431 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
433 qemu_mutex_lock(&rt->lock);
434 g_tree_insert(rt->tree, &tb->tc, tb);
435 qemu_mutex_unlock(&rt->lock);
438 void tcg_tb_remove(TranslationBlock *tb)
440 struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
442 qemu_mutex_lock(&rt->lock);
443 g_tree_remove(rt->tree, &tb->tc);
444 qemu_mutex_unlock(&rt->lock);
448 * Find the TB 'tb' such that
449 * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
450 * Return NULL if not found.
452 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
454 struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
455 TranslationBlock *tb;
456 struct tb_tc s = { .ptr = (void *)tc_ptr };
458 qemu_mutex_lock(&rt->lock);
459 tb = g_tree_lookup(rt->tree, &s);
460 qemu_mutex_unlock(&rt->lock);
464 static void tcg_region_tree_lock_all(void)
468 for (i = 0; i < region.n; i++) {
469 struct tcg_region_tree *rt = region_trees + i * tree_size;
471 qemu_mutex_lock(&rt->lock);
475 static void tcg_region_tree_unlock_all(void)
479 for (i = 0; i < region.n; i++) {
480 struct tcg_region_tree *rt = region_trees + i * tree_size;
482 qemu_mutex_unlock(&rt->lock);
486 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
490 tcg_region_tree_lock_all();
491 for (i = 0; i < region.n; i++) {
492 struct tcg_region_tree *rt = region_trees + i * tree_size;
494 g_tree_foreach(rt->tree, func, user_data);
496 tcg_region_tree_unlock_all();
499 size_t tcg_nb_tbs(void)
504 tcg_region_tree_lock_all();
505 for (i = 0; i < region.n; i++) {
506 struct tcg_region_tree *rt = region_trees + i * tree_size;
508 nb_tbs += g_tree_nnodes(rt->tree);
510 tcg_region_tree_unlock_all();
514 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
516 TranslationBlock *tb = v;
522 static void tcg_region_tree_reset_all(void)
526 tcg_region_tree_lock_all();
527 for (i = 0; i < region.n; i++) {
528 struct tcg_region_tree *rt = region_trees + i * tree_size;
530 g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
531 /* Increment the refcount first so that destroy acts as a reset */
532 g_tree_ref(rt->tree);
533 g_tree_destroy(rt->tree);
535 tcg_region_tree_unlock_all();
538 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
542 start = region.start_aligned + curr_region * region.stride;
543 end = start + region.size;
545 if (curr_region == 0) {
546 start = region.start;
548 if (curr_region == region.n - 1) {
556 static void tcg_region_assign(TCGContext *s, size_t curr_region)
560 tcg_region_bounds(curr_region, &start, &end);
562 s->code_gen_buffer = start;
563 s->code_gen_ptr = start;
564 s->code_gen_buffer_size = end - start;
565 s->code_gen_highwater = end - TCG_HIGHWATER;
568 static bool tcg_region_alloc__locked(TCGContext *s)
570 if (region.current == region.n) {
573 tcg_region_assign(s, region.current);
579 * Request a new region once the one in use has filled up.
580 * Returns true on error.
582 static bool tcg_region_alloc(TCGContext *s)
585 /* read the region size now; alloc__locked will overwrite it on success */
586 size_t size_full = s->code_gen_buffer_size;
588 qemu_mutex_lock(®ion.lock);
589 err = tcg_region_alloc__locked(s);
591 region.agg_size_full += size_full - TCG_HIGHWATER;
593 qemu_mutex_unlock(®ion.lock);
598 * Perform a context's first region allocation.
599 * This function does _not_ increment region.agg_size_full.
601 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
603 return tcg_region_alloc__locked(s);
606 /* Call from a safe-work context */
607 void tcg_region_reset_all(void)
609 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
612 qemu_mutex_lock(®ion.lock);
614 region.agg_size_full = 0;
616 for (i = 0; i < n_ctxs; i++) {
617 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
618 bool err = tcg_region_initial_alloc__locked(s);
622 qemu_mutex_unlock(®ion.lock);
624 tcg_region_tree_reset_all();
627 #ifdef CONFIG_USER_ONLY
628 static size_t tcg_n_regions(void)
634 * It is likely that some vCPUs will translate more code than others, so we
635 * first try to set more regions than max_cpus, with those regions being of
636 * reasonable size. If that's not possible we make do by evenly dividing
637 * the code_gen_buffer among the vCPUs.
639 static size_t tcg_n_regions(void)
643 /* Use a single region if all we have is one vCPU thread */
644 #if !defined(CONFIG_USER_ONLY)
645 MachineState *ms = MACHINE(qdev_get_machine());
646 unsigned int max_cpus = ms->smp.max_cpus;
648 if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
652 /* Try to have more regions than max_cpus, with each region being >= 2 MB */
653 for (i = 8; i > 0; i--) {
654 size_t regions_per_thread = i;
657 region_size = tcg_init_ctx.code_gen_buffer_size;
658 region_size /= max_cpus * regions_per_thread;
660 if (region_size >= 2 * 1024u * 1024) {
661 return max_cpus * regions_per_thread;
664 /* If we can't, then just allocate one region per vCPU thread */
670 * Initializes region partitioning.
672 * Called at init time from the parent thread (i.e. the one calling
673 * tcg_context_init), after the target's TCG globals have been set.
675 * Region partitioning works by splitting code_gen_buffer into separate regions,
676 * and then assigning regions to TCG threads so that the threads can translate
677 * code in parallel without synchronization.
679 * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
680 * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
681 * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
682 * must have been parsed before calling this function, since it calls
683 * qemu_tcg_mttcg_enabled().
685 * In user-mode we use a single region. Having multiple regions in user-mode
686 * is not supported, because the number of vCPU threads (recall that each thread
687 * spawned by the guest corresponds to a vCPU thread) is only bounded by the
688 * OS, and usually this number is huge (tens of thousands is not uncommon).
689 * Thus, given this large bound on the number of vCPU threads and the fact
690 * that code_gen_buffer is allocated at compile-time, we cannot guarantee
691 * that the availability of at least one region per vCPU thread.
693 * However, this user-mode limitation is unlikely to be a significant problem
694 * in practice. Multi-threaded guests share most if not all of their translated
695 * code, which makes parallel code generation less appealing than in softmmu.
697 void tcg_region_init(void)
699 void *buf = tcg_init_ctx.code_gen_buffer;
701 size_t size = tcg_init_ctx.code_gen_buffer_size;
702 size_t page_size = qemu_real_host_page_size;
706 uintptr_t splitwx_diff;
708 n_regions = tcg_n_regions();
710 /* The first region will be 'aligned - buf' bytes larger than the others */
711 aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
712 g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
714 * Make region_size a multiple of page_size, using aligned as the start.
715 * As a result of this we might end up with a few extra pages at the end of
716 * the buffer; we will assign those to the last region.
718 region_size = (size - (aligned - buf)) / n_regions;
719 region_size = QEMU_ALIGN_DOWN(region_size, page_size);
721 /* A region must have at least 2 pages; one code, one guard */
722 g_assert(region_size >= 2 * page_size);
724 /* init the region struct */
725 qemu_mutex_init(®ion.lock);
726 region.n = n_regions;
727 region.size = region_size - page_size;
728 region.stride = region_size;
730 region.start_aligned = aligned;
731 /* page-align the end, since its last page will be a guard page */
732 region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
733 /* account for that last guard page */
734 region.end -= page_size;
736 /* set guard pages */
737 splitwx_diff = tcg_splitwx_diff;
738 for (i = 0; i < region.n; i++) {
742 tcg_region_bounds(i, &start, &end);
743 rc = qemu_mprotect_none(end, page_size);
746 rc = qemu_mprotect_none(end + splitwx_diff, page_size);
751 tcg_region_trees_init();
753 /* In user-mode we support only one ctx, so do the initial allocation now */
754 #ifdef CONFIG_USER_ONLY
756 bool err = tcg_region_initial_alloc__locked(tcg_ctx);
763 #ifdef CONFIG_DEBUG_TCG
764 const void *tcg_splitwx_to_rx(void *rw)
766 /* Pass NULL pointers unchanged. */
768 g_assert(in_code_gen_buffer(rw));
769 rw += tcg_splitwx_diff;
774 void *tcg_splitwx_to_rw(const void *rx)
776 /* Pass NULL pointers unchanged. */
778 rx -= tcg_splitwx_diff;
779 /* Assert that we end with a pointer in the rw region. */
780 g_assert(in_code_gen_buffer(rx));
784 #endif /* CONFIG_DEBUG_TCG */
786 static void alloc_tcg_plugin_context(TCGContext *s)
789 s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
790 s->plugin_tb->insns =
791 g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
796 * All TCG threads except the parent (i.e. the one that called tcg_context_init
797 * and registered the target's TCG globals) must register with this function
798 * before initiating translation.
800 * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
801 * of tcg_region_init() for the reasoning behind this.
803 * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
804 * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
805 * is not used anymore for translation once this function is called.
807 * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
808 * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
810 #ifdef CONFIG_USER_ONLY
811 void tcg_register_thread(void)
813 tcg_ctx = &tcg_init_ctx;
816 void tcg_register_thread(void)
818 MachineState *ms = MACHINE(qdev_get_machine());
819 TCGContext *s = g_malloc(sizeof(*s));
825 /* Relink mem_base. */
826 for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
827 if (tcg_init_ctx.temps[i].mem_base) {
828 ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
829 tcg_debug_assert(b >= 0 && b < n);
830 s->temps[i].mem_base = &s->temps[b];
834 /* Claim an entry in tcg_ctxs */
835 n = qatomic_fetch_inc(&n_tcg_ctxs);
836 g_assert(n < ms->smp.max_cpus);
837 qatomic_set(&tcg_ctxs[n], s);
840 alloc_tcg_plugin_context(s);
844 qemu_mutex_lock(®ion.lock);
845 err = tcg_region_initial_alloc__locked(tcg_ctx);
847 qemu_mutex_unlock(®ion.lock);
849 #endif /* !CONFIG_USER_ONLY */
852 * Returns the size (in bytes) of all translated code (i.e. from all regions)
853 * currently in the cache.
854 * See also: tcg_code_capacity()
855 * Do not confuse with tcg_current_code_size(); that one applies to a single
858 size_t tcg_code_size(void)
860 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
864 qemu_mutex_lock(®ion.lock);
865 total = region.agg_size_full;
866 for (i = 0; i < n_ctxs; i++) {
867 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
870 size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
871 g_assert(size <= s->code_gen_buffer_size);
874 qemu_mutex_unlock(®ion.lock);
879 * Returns the code capacity (in bytes) of the entire cache, i.e. including all
881 * See also: tcg_code_size()
883 size_t tcg_code_capacity(void)
885 size_t guard_size, capacity;
887 /* no need for synchronization; these variables are set at init time */
888 guard_size = region.stride - region.size;
889 capacity = region.end + guard_size - region.start;
890 capacity -= region.n * (guard_size + TCG_HIGHWATER);
894 size_t tcg_tb_phys_invalidate_count(void)
896 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
900 for (i = 0; i < n_ctxs; i++) {
901 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
903 total += qatomic_read(&s->tb_phys_invalidate_count);
908 /* pool based memory allocation */
909 void *tcg_malloc_internal(TCGContext *s, int size)
914 if (size > TCG_POOL_CHUNK_SIZE) {
915 /* big malloc: insert a new pool (XXX: could optimize) */
916 p = g_malloc(sizeof(TCGPool) + size);
918 p->next = s->pool_first_large;
919 s->pool_first_large = p;
930 pool_size = TCG_POOL_CHUNK_SIZE;
931 p = g_malloc(sizeof(TCGPool) + pool_size);
935 s->pool_current->next = p;
944 s->pool_cur = p->data + size;
945 s->pool_end = p->data + p->size;
949 void tcg_pool_reset(TCGContext *s)
952 for (p = s->pool_first_large; p; p = t) {
956 s->pool_first_large = NULL;
957 s->pool_cur = s->pool_end = NULL;
958 s->pool_current = NULL;
961 typedef struct TCGHelperInfo {
968 #include "exec/helper-proto.h"
970 static const TCGHelperInfo all_helpers[] = {
971 #include "exec/helper-tcg.h"
973 static GHashTable *helper_table;
975 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
976 static void process_op_defs(TCGContext *s);
977 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
978 TCGReg reg, const char *name);
980 void tcg_context_init(TCGContext *s)
982 int op, total_args, n, i;
984 TCGArgConstraint *args_ct;
987 memset(s, 0, sizeof(*s));
990 /* Count total number of arguments and allocate the corresponding
993 for(op = 0; op < NB_OPS; op++) {
994 def = &tcg_op_defs[op];
995 n = def->nb_iargs + def->nb_oargs;
999 args_ct = g_new0(TCGArgConstraint, total_args);
1001 for(op = 0; op < NB_OPS; op++) {
1002 def = &tcg_op_defs[op];
1003 def->args_ct = args_ct;
1004 n = def->nb_iargs + def->nb_oargs;
1008 /* Register helpers. */
1009 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
1010 helper_table = g_hash_table_new(NULL, NULL);
1012 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1013 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1014 (gpointer)&all_helpers[i]);
1020 /* Reverse the order of the saved registers, assuming they're all at
1021 the start of tcg_target_reg_alloc_order. */
1022 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1023 int r = tcg_target_reg_alloc_order[n];
1024 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1028 for (i = 0; i < n; ++i) {
1029 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1031 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1032 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1035 alloc_tcg_plugin_context(s);
1039 * In user-mode we simply share the init context among threads, since we
1040 * use a single region. See the documentation tcg_region_init() for the
1041 * reasoning behind this.
1042 * In softmmu we will have at most max_cpus TCG threads.
1044 #ifdef CONFIG_USER_ONLY
1045 tcg_ctxs = &tcg_ctx;
1048 MachineState *ms = MACHINE(qdev_get_machine());
1049 unsigned int max_cpus = ms->smp.max_cpus;
1050 tcg_ctxs = g_new(TCGContext *, max_cpus);
1053 tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1054 ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1055 cpu_env = temp_tcgv_ptr(ts);
1059 * Allocate TBs right before their corresponding translated code, making
1060 * sure that TBs and code are on different cache lines.
1062 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1064 uintptr_t align = qemu_icache_linesize;
1065 TranslationBlock *tb;
1069 tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1070 next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1072 if (unlikely(next > s->code_gen_highwater)) {
1073 if (tcg_region_alloc(s)) {
1078 qatomic_set(&s->code_gen_ptr, next);
1079 s->data_gen_ptr = NULL;
1083 void tcg_prologue_init(TCGContext *s)
1085 size_t prologue_size, total_size;
1088 /* Put the prologue at the beginning of code_gen_buffer. */
1089 buf0 = s->code_gen_buffer;
1090 total_size = s->code_gen_buffer_size;
1093 s->data_gen_ptr = NULL;
1096 * The region trees are not yet configured, but tcg_splitwx_to_rx
1097 * needs the bounds for an assert.
1099 region.start = buf0;
1100 region.end = buf0 + total_size;
1102 #ifndef CONFIG_TCG_INTERPRETER
1103 tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1106 /* Compute a high-water mark, at which we voluntarily flush the buffer
1107 and start over. The size here is arbitrary, significantly larger
1108 than we expect the code generation for any one opcode to require. */
1109 s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1111 #ifdef TCG_TARGET_NEED_POOL_LABELS
1112 s->pool_labels = NULL;
1115 /* Generate the prologue. */
1116 tcg_target_qemu_prologue(s);
1118 #ifdef TCG_TARGET_NEED_POOL_LABELS
1119 /* Allow the prologue to put e.g. guest_base into a pool entry. */
1121 int result = tcg_out_pool_finalize(s);
1122 tcg_debug_assert(result == 0);
1127 #ifndef CONFIG_TCG_INTERPRETER
1128 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1129 tcg_ptr_byte_diff(buf1, buf0));
1132 /* Deduct the prologue from the buffer. */
1133 prologue_size = tcg_current_code_size(s);
1134 s->code_gen_ptr = buf1;
1135 s->code_gen_buffer = buf1;
1137 total_size -= prologue_size;
1138 s->code_gen_buffer_size = total_size;
1140 tcg_register_jit(s->code_gen_buffer, total_size);
1143 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1144 FILE *logfile = qemu_log_lock();
1145 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1146 if (s->data_gen_ptr) {
1147 size_t code_size = s->data_gen_ptr - buf0;
1148 size_t data_size = prologue_size - code_size;
1151 log_disas(buf0, code_size);
1153 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1154 if (sizeof(tcg_target_ulong) == 8) {
1155 qemu_log("0x%08" PRIxPTR ": .quad 0x%016" PRIx64 "\n",
1156 (uintptr_t)s->data_gen_ptr + i,
1157 *(uint64_t *)(s->data_gen_ptr + i));
1159 qemu_log("0x%08" PRIxPTR ": .long 0x%08x\n",
1160 (uintptr_t)s->data_gen_ptr + i,
1161 *(uint32_t *)(s->data_gen_ptr + i));
1165 log_disas(buf0, prologue_size);
1169 qemu_log_unlock(logfile);
1173 /* Assert that goto_ptr is implemented completely. */
1174 if (TCG_TARGET_HAS_goto_ptr) {
1175 tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1179 void tcg_func_start(TCGContext *s)
1182 s->nb_temps = s->nb_globals;
1184 /* No temps have been previously allocated for size or locality. */
1185 memset(s->free_temps, 0, sizeof(s->free_temps));
1189 s->current_frame_offset = s->frame_start;
1191 #ifdef CONFIG_DEBUG_TCG
1192 s->goto_tb_issue_mask = 0;
1195 QTAILQ_INIT(&s->ops);
1196 QTAILQ_INIT(&s->free_ops);
1197 QSIMPLEQ_INIT(&s->labels);
1200 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1202 int n = s->nb_temps++;
1203 tcg_debug_assert(n < TCG_MAX_TEMPS);
1204 return memset(&s->temps[n], 0, sizeof(TCGTemp));
1207 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1211 tcg_debug_assert(s->nb_globals == s->nb_temps);
1213 ts = tcg_temp_alloc(s);
1214 ts->temp_global = 1;
1219 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1220 TCGReg reg, const char *name)
1224 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1228 ts = tcg_global_alloc(s);
1229 ts->base_type = type;
1234 tcg_regset_set_reg(s->reserved_regs, reg);
1239 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1241 s->frame_start = start;
1242 s->frame_end = start + size;
1244 = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1247 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1248 intptr_t offset, const char *name)
1250 TCGContext *s = tcg_ctx;
1251 TCGTemp *base_ts = tcgv_ptr_temp(base);
1252 TCGTemp *ts = tcg_global_alloc(s);
1253 int indirect_reg = 0, bigendian = 0;
1254 #ifdef HOST_WORDS_BIGENDIAN
1258 if (!base_ts->fixed_reg) {
1259 /* We do not support double-indirect registers. */
1260 tcg_debug_assert(!base_ts->indirect_reg);
1261 base_ts->indirect_base = 1;
1262 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1267 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1268 TCGTemp *ts2 = tcg_global_alloc(s);
1271 ts->base_type = TCG_TYPE_I64;
1272 ts->type = TCG_TYPE_I32;
1273 ts->indirect_reg = indirect_reg;
1274 ts->mem_allocated = 1;
1275 ts->mem_base = base_ts;
1276 ts->mem_offset = offset + bigendian * 4;
1277 pstrcpy(buf, sizeof(buf), name);
1278 pstrcat(buf, sizeof(buf), "_0");
1279 ts->name = strdup(buf);
1281 tcg_debug_assert(ts2 == ts + 1);
1282 ts2->base_type = TCG_TYPE_I64;
1283 ts2->type = TCG_TYPE_I32;
1284 ts2->indirect_reg = indirect_reg;
1285 ts2->mem_allocated = 1;
1286 ts2->mem_base = base_ts;
1287 ts2->mem_offset = offset + (1 - bigendian) * 4;
1288 pstrcpy(buf, sizeof(buf), name);
1289 pstrcat(buf, sizeof(buf), "_1");
1290 ts2->name = strdup(buf);
1292 ts->base_type = type;
1294 ts->indirect_reg = indirect_reg;
1295 ts->mem_allocated = 1;
1296 ts->mem_base = base_ts;
1297 ts->mem_offset = offset;
1303 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1305 TCGContext *s = tcg_ctx;
1309 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1310 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1311 if (idx < TCG_MAX_TEMPS) {
1312 /* There is already an available temp with the right type. */
1313 clear_bit(idx, s->free_temps[k].l);
1315 ts = &s->temps[idx];
1316 ts->temp_allocated = 1;
1317 tcg_debug_assert(ts->base_type == type);
1318 tcg_debug_assert(ts->temp_local == temp_local);
1320 ts = tcg_temp_alloc(s);
1321 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1322 TCGTemp *ts2 = tcg_temp_alloc(s);
1324 ts->base_type = type;
1325 ts->type = TCG_TYPE_I32;
1326 ts->temp_allocated = 1;
1327 ts->temp_local = temp_local;
1329 tcg_debug_assert(ts2 == ts + 1);
1330 ts2->base_type = TCG_TYPE_I64;
1331 ts2->type = TCG_TYPE_I32;
1332 ts2->temp_allocated = 1;
1333 ts2->temp_local = temp_local;
1335 ts->base_type = type;
1337 ts->temp_allocated = 1;
1338 ts->temp_local = temp_local;
1342 #if defined(CONFIG_DEBUG_TCG)
1348 TCGv_vec tcg_temp_new_vec(TCGType type)
1352 #ifdef CONFIG_DEBUG_TCG
1355 assert(TCG_TARGET_HAS_v64);
1358 assert(TCG_TARGET_HAS_v128);
1361 assert(TCG_TARGET_HAS_v256);
1364 g_assert_not_reached();
1368 t = tcg_temp_new_internal(type, 0);
1369 return temp_tcgv_vec(t);
1372 /* Create a new temp of the same type as an existing temp. */
1373 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1375 TCGTemp *t = tcgv_vec_temp(match);
1377 tcg_debug_assert(t->temp_allocated != 0);
1379 t = tcg_temp_new_internal(t->base_type, 0);
1380 return temp_tcgv_vec(t);
1383 void tcg_temp_free_internal(TCGTemp *ts)
1385 TCGContext *s = tcg_ctx;
1388 #if defined(CONFIG_DEBUG_TCG)
1390 if (s->temps_in_use < 0) {
1391 fprintf(stderr, "More temporaries freed than allocated!\n");
1395 tcg_debug_assert(ts->temp_global == 0);
1396 tcg_debug_assert(ts->temp_allocated != 0);
1397 ts->temp_allocated = 0;
1400 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1401 set_bit(idx, s->free_temps[k].l);
1404 TCGv_i32 tcg_const_i32(int32_t val)
1407 t0 = tcg_temp_new_i32();
1408 tcg_gen_movi_i32(t0, val);
1412 TCGv_i64 tcg_const_i64(int64_t val)
1415 t0 = tcg_temp_new_i64();
1416 tcg_gen_movi_i64(t0, val);
1420 TCGv_i32 tcg_const_local_i32(int32_t val)
1423 t0 = tcg_temp_local_new_i32();
1424 tcg_gen_movi_i32(t0, val);
1428 TCGv_i64 tcg_const_local_i64(int64_t val)
1431 t0 = tcg_temp_local_new_i64();
1432 tcg_gen_movi_i64(t0, val);
1436 #if defined(CONFIG_DEBUG_TCG)
1437 void tcg_clear_temp_count(void)
1439 TCGContext *s = tcg_ctx;
1440 s->temps_in_use = 0;
1443 int tcg_check_temp_count(void)
1445 TCGContext *s = tcg_ctx;
1446 if (s->temps_in_use) {
1447 /* Clear the count so that we don't give another
1448 * warning immediately next time around.
1450 s->temps_in_use = 0;
1457 /* Return true if OP may appear in the opcode stream.
1458 Test the runtime variable that controls each opcode. */
1459 bool tcg_op_supported(TCGOpcode op)
1462 = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1465 case INDEX_op_discard:
1466 case INDEX_op_set_label:
1470 case INDEX_op_insn_start:
1471 case INDEX_op_exit_tb:
1472 case INDEX_op_goto_tb:
1473 case INDEX_op_qemu_ld_i32:
1474 case INDEX_op_qemu_st_i32:
1475 case INDEX_op_qemu_ld_i64:
1476 case INDEX_op_qemu_st_i64:
1479 case INDEX_op_qemu_st8_i32:
1480 return TCG_TARGET_HAS_qemu_st8_i32;
1482 case INDEX_op_goto_ptr:
1483 return TCG_TARGET_HAS_goto_ptr;
1485 case INDEX_op_mov_i32:
1486 case INDEX_op_movi_i32:
1487 case INDEX_op_setcond_i32:
1488 case INDEX_op_brcond_i32:
1489 case INDEX_op_ld8u_i32:
1490 case INDEX_op_ld8s_i32:
1491 case INDEX_op_ld16u_i32:
1492 case INDEX_op_ld16s_i32:
1493 case INDEX_op_ld_i32:
1494 case INDEX_op_st8_i32:
1495 case INDEX_op_st16_i32:
1496 case INDEX_op_st_i32:
1497 case INDEX_op_add_i32:
1498 case INDEX_op_sub_i32:
1499 case INDEX_op_mul_i32:
1500 case INDEX_op_and_i32:
1501 case INDEX_op_or_i32:
1502 case INDEX_op_xor_i32:
1503 case INDEX_op_shl_i32:
1504 case INDEX_op_shr_i32:
1505 case INDEX_op_sar_i32:
1508 case INDEX_op_movcond_i32:
1509 return TCG_TARGET_HAS_movcond_i32;
1510 case INDEX_op_div_i32:
1511 case INDEX_op_divu_i32:
1512 return TCG_TARGET_HAS_div_i32;
1513 case INDEX_op_rem_i32:
1514 case INDEX_op_remu_i32:
1515 return TCG_TARGET_HAS_rem_i32;
1516 case INDEX_op_div2_i32:
1517 case INDEX_op_divu2_i32:
1518 return TCG_TARGET_HAS_div2_i32;
1519 case INDEX_op_rotl_i32:
1520 case INDEX_op_rotr_i32:
1521 return TCG_TARGET_HAS_rot_i32;
1522 case INDEX_op_deposit_i32:
1523 return TCG_TARGET_HAS_deposit_i32;
1524 case INDEX_op_extract_i32:
1525 return TCG_TARGET_HAS_extract_i32;
1526 case INDEX_op_sextract_i32:
1527 return TCG_TARGET_HAS_sextract_i32;
1528 case INDEX_op_extract2_i32:
1529 return TCG_TARGET_HAS_extract2_i32;
1530 case INDEX_op_add2_i32:
1531 return TCG_TARGET_HAS_add2_i32;
1532 case INDEX_op_sub2_i32:
1533 return TCG_TARGET_HAS_sub2_i32;
1534 case INDEX_op_mulu2_i32:
1535 return TCG_TARGET_HAS_mulu2_i32;
1536 case INDEX_op_muls2_i32:
1537 return TCG_TARGET_HAS_muls2_i32;
1538 case INDEX_op_muluh_i32:
1539 return TCG_TARGET_HAS_muluh_i32;
1540 case INDEX_op_mulsh_i32:
1541 return TCG_TARGET_HAS_mulsh_i32;
1542 case INDEX_op_ext8s_i32:
1543 return TCG_TARGET_HAS_ext8s_i32;
1544 case INDEX_op_ext16s_i32:
1545 return TCG_TARGET_HAS_ext16s_i32;
1546 case INDEX_op_ext8u_i32:
1547 return TCG_TARGET_HAS_ext8u_i32;
1548 case INDEX_op_ext16u_i32:
1549 return TCG_TARGET_HAS_ext16u_i32;
1550 case INDEX_op_bswap16_i32:
1551 return TCG_TARGET_HAS_bswap16_i32;
1552 case INDEX_op_bswap32_i32:
1553 return TCG_TARGET_HAS_bswap32_i32;
1554 case INDEX_op_not_i32:
1555 return TCG_TARGET_HAS_not_i32;
1556 case INDEX_op_neg_i32:
1557 return TCG_TARGET_HAS_neg_i32;
1558 case INDEX_op_andc_i32:
1559 return TCG_TARGET_HAS_andc_i32;
1560 case INDEX_op_orc_i32:
1561 return TCG_TARGET_HAS_orc_i32;
1562 case INDEX_op_eqv_i32:
1563 return TCG_TARGET_HAS_eqv_i32;
1564 case INDEX_op_nand_i32:
1565 return TCG_TARGET_HAS_nand_i32;
1566 case INDEX_op_nor_i32:
1567 return TCG_TARGET_HAS_nor_i32;
1568 case INDEX_op_clz_i32:
1569 return TCG_TARGET_HAS_clz_i32;
1570 case INDEX_op_ctz_i32:
1571 return TCG_TARGET_HAS_ctz_i32;
1572 case INDEX_op_ctpop_i32:
1573 return TCG_TARGET_HAS_ctpop_i32;
1575 case INDEX_op_brcond2_i32:
1576 case INDEX_op_setcond2_i32:
1577 return TCG_TARGET_REG_BITS == 32;
1579 case INDEX_op_mov_i64:
1580 case INDEX_op_movi_i64:
1581 case INDEX_op_setcond_i64:
1582 case INDEX_op_brcond_i64:
1583 case INDEX_op_ld8u_i64:
1584 case INDEX_op_ld8s_i64:
1585 case INDEX_op_ld16u_i64:
1586 case INDEX_op_ld16s_i64:
1587 case INDEX_op_ld32u_i64:
1588 case INDEX_op_ld32s_i64:
1589 case INDEX_op_ld_i64:
1590 case INDEX_op_st8_i64:
1591 case INDEX_op_st16_i64:
1592 case INDEX_op_st32_i64:
1593 case INDEX_op_st_i64:
1594 case INDEX_op_add_i64:
1595 case INDEX_op_sub_i64:
1596 case INDEX_op_mul_i64:
1597 case INDEX_op_and_i64:
1598 case INDEX_op_or_i64:
1599 case INDEX_op_xor_i64:
1600 case INDEX_op_shl_i64:
1601 case INDEX_op_shr_i64:
1602 case INDEX_op_sar_i64:
1603 case INDEX_op_ext_i32_i64:
1604 case INDEX_op_extu_i32_i64:
1605 return TCG_TARGET_REG_BITS == 64;
1607 case INDEX_op_movcond_i64:
1608 return TCG_TARGET_HAS_movcond_i64;
1609 case INDEX_op_div_i64:
1610 case INDEX_op_divu_i64:
1611 return TCG_TARGET_HAS_div_i64;
1612 case INDEX_op_rem_i64:
1613 case INDEX_op_remu_i64:
1614 return TCG_TARGET_HAS_rem_i64;
1615 case INDEX_op_div2_i64:
1616 case INDEX_op_divu2_i64:
1617 return TCG_TARGET_HAS_div2_i64;
1618 case INDEX_op_rotl_i64:
1619 case INDEX_op_rotr_i64:
1620 return TCG_TARGET_HAS_rot_i64;
1621 case INDEX_op_deposit_i64:
1622 return TCG_TARGET_HAS_deposit_i64;
1623 case INDEX_op_extract_i64:
1624 return TCG_TARGET_HAS_extract_i64;
1625 case INDEX_op_sextract_i64:
1626 return TCG_TARGET_HAS_sextract_i64;
1627 case INDEX_op_extract2_i64:
1628 return TCG_TARGET_HAS_extract2_i64;
1629 case INDEX_op_extrl_i64_i32:
1630 return TCG_TARGET_HAS_extrl_i64_i32;
1631 case INDEX_op_extrh_i64_i32:
1632 return TCG_TARGET_HAS_extrh_i64_i32;
1633 case INDEX_op_ext8s_i64:
1634 return TCG_TARGET_HAS_ext8s_i64;
1635 case INDEX_op_ext16s_i64:
1636 return TCG_TARGET_HAS_ext16s_i64;
1637 case INDEX_op_ext32s_i64:
1638 return TCG_TARGET_HAS_ext32s_i64;
1639 case INDEX_op_ext8u_i64:
1640 return TCG_TARGET_HAS_ext8u_i64;
1641 case INDEX_op_ext16u_i64:
1642 return TCG_TARGET_HAS_ext16u_i64;
1643 case INDEX_op_ext32u_i64:
1644 return TCG_TARGET_HAS_ext32u_i64;
1645 case INDEX_op_bswap16_i64:
1646 return TCG_TARGET_HAS_bswap16_i64;
1647 case INDEX_op_bswap32_i64:
1648 return TCG_TARGET_HAS_bswap32_i64;
1649 case INDEX_op_bswap64_i64:
1650 return TCG_TARGET_HAS_bswap64_i64;
1651 case INDEX_op_not_i64:
1652 return TCG_TARGET_HAS_not_i64;
1653 case INDEX_op_neg_i64:
1654 return TCG_TARGET_HAS_neg_i64;
1655 case INDEX_op_andc_i64:
1656 return TCG_TARGET_HAS_andc_i64;
1657 case INDEX_op_orc_i64:
1658 return TCG_TARGET_HAS_orc_i64;
1659 case INDEX_op_eqv_i64:
1660 return TCG_TARGET_HAS_eqv_i64;
1661 case INDEX_op_nand_i64:
1662 return TCG_TARGET_HAS_nand_i64;
1663 case INDEX_op_nor_i64:
1664 return TCG_TARGET_HAS_nor_i64;
1665 case INDEX_op_clz_i64:
1666 return TCG_TARGET_HAS_clz_i64;
1667 case INDEX_op_ctz_i64:
1668 return TCG_TARGET_HAS_ctz_i64;
1669 case INDEX_op_ctpop_i64:
1670 return TCG_TARGET_HAS_ctpop_i64;
1671 case INDEX_op_add2_i64:
1672 return TCG_TARGET_HAS_add2_i64;
1673 case INDEX_op_sub2_i64:
1674 return TCG_TARGET_HAS_sub2_i64;
1675 case INDEX_op_mulu2_i64:
1676 return TCG_TARGET_HAS_mulu2_i64;
1677 case INDEX_op_muls2_i64:
1678 return TCG_TARGET_HAS_muls2_i64;
1679 case INDEX_op_muluh_i64:
1680 return TCG_TARGET_HAS_muluh_i64;
1681 case INDEX_op_mulsh_i64:
1682 return TCG_TARGET_HAS_mulsh_i64;
1684 case INDEX_op_mov_vec:
1685 case INDEX_op_dup_vec:
1686 case INDEX_op_dupi_vec:
1687 case INDEX_op_dupm_vec:
1688 case INDEX_op_ld_vec:
1689 case INDEX_op_st_vec:
1690 case INDEX_op_add_vec:
1691 case INDEX_op_sub_vec:
1692 case INDEX_op_and_vec:
1693 case INDEX_op_or_vec:
1694 case INDEX_op_xor_vec:
1695 case INDEX_op_cmp_vec:
1697 case INDEX_op_dup2_vec:
1698 return have_vec && TCG_TARGET_REG_BITS == 32;
1699 case INDEX_op_not_vec:
1700 return have_vec && TCG_TARGET_HAS_not_vec;
1701 case INDEX_op_neg_vec:
1702 return have_vec && TCG_TARGET_HAS_neg_vec;
1703 case INDEX_op_abs_vec:
1704 return have_vec && TCG_TARGET_HAS_abs_vec;
1705 case INDEX_op_andc_vec:
1706 return have_vec && TCG_TARGET_HAS_andc_vec;
1707 case INDEX_op_orc_vec:
1708 return have_vec && TCG_TARGET_HAS_orc_vec;
1709 case INDEX_op_mul_vec:
1710 return have_vec && TCG_TARGET_HAS_mul_vec;
1711 case INDEX_op_shli_vec:
1712 case INDEX_op_shri_vec:
1713 case INDEX_op_sari_vec:
1714 return have_vec && TCG_TARGET_HAS_shi_vec;
1715 case INDEX_op_shls_vec:
1716 case INDEX_op_shrs_vec:
1717 case INDEX_op_sars_vec:
1718 return have_vec && TCG_TARGET_HAS_shs_vec;
1719 case INDEX_op_shlv_vec:
1720 case INDEX_op_shrv_vec:
1721 case INDEX_op_sarv_vec:
1722 return have_vec && TCG_TARGET_HAS_shv_vec;
1723 case INDEX_op_rotli_vec:
1724 return have_vec && TCG_TARGET_HAS_roti_vec;
1725 case INDEX_op_rotls_vec:
1726 return have_vec && TCG_TARGET_HAS_rots_vec;
1727 case INDEX_op_rotlv_vec:
1728 case INDEX_op_rotrv_vec:
1729 return have_vec && TCG_TARGET_HAS_rotv_vec;
1730 case INDEX_op_ssadd_vec:
1731 case INDEX_op_usadd_vec:
1732 case INDEX_op_sssub_vec:
1733 case INDEX_op_ussub_vec:
1734 return have_vec && TCG_TARGET_HAS_sat_vec;
1735 case INDEX_op_smin_vec:
1736 case INDEX_op_umin_vec:
1737 case INDEX_op_smax_vec:
1738 case INDEX_op_umax_vec:
1739 return have_vec && TCG_TARGET_HAS_minmax_vec;
1740 case INDEX_op_bitsel_vec:
1741 return have_vec && TCG_TARGET_HAS_bitsel_vec;
1742 case INDEX_op_cmpsel_vec:
1743 return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1746 tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1751 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1752 and endian swap. Maybe it would be better to do the alignment
1753 and endian swap in tcg_reg_alloc_call(). */
1754 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1756 int i, real_args, nb_rets, pi;
1757 unsigned sizemask, flags;
1758 TCGHelperInfo *info;
1761 info = g_hash_table_lookup(helper_table, (gpointer)func);
1762 flags = info->flags;
1763 sizemask = info->sizemask;
1765 #ifdef CONFIG_PLUGIN
1766 /* detect non-plugin helpers */
1767 if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1768 tcg_ctx->plugin_insn->calls_helpers = true;
1772 #if defined(__sparc__) && !defined(__arch64__) \
1773 && !defined(CONFIG_TCG_INTERPRETER)
1774 /* We have 64-bit values in one register, but need to pass as two
1775 separate parameters. Split them. */
1776 int orig_sizemask = sizemask;
1777 int orig_nargs = nargs;
1778 TCGv_i64 retl, reth;
1779 TCGTemp *split_args[MAX_OPC_PARAM];
1783 if (sizemask != 0) {
1784 for (i = real_args = 0; i < nargs; ++i) {
1785 int is_64bit = sizemask & (1 << (i+1)*2);
1787 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1788 TCGv_i32 h = tcg_temp_new_i32();
1789 TCGv_i32 l = tcg_temp_new_i32();
1790 tcg_gen_extr_i64_i32(l, h, orig);
1791 split_args[real_args++] = tcgv_i32_temp(h);
1792 split_args[real_args++] = tcgv_i32_temp(l);
1794 split_args[real_args++] = args[i];
1801 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1802 for (i = 0; i < nargs; ++i) {
1803 int is_64bit = sizemask & (1 << (i+1)*2);
1804 int is_signed = sizemask & (2 << (i+1)*2);
1806 TCGv_i64 temp = tcg_temp_new_i64();
1807 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1809 tcg_gen_ext32s_i64(temp, orig);
1811 tcg_gen_ext32u_i64(temp, orig);
1813 args[i] = tcgv_i64_temp(temp);
1816 #endif /* TCG_TARGET_EXTEND_ARGS */
1818 op = tcg_emit_op(INDEX_op_call);
1822 #if defined(__sparc__) && !defined(__arch64__) \
1823 && !defined(CONFIG_TCG_INTERPRETER)
1824 if (orig_sizemask & 1) {
1825 /* The 32-bit ABI is going to return the 64-bit value in
1826 the %o0/%o1 register pair. Prepare for this by using
1827 two return temporaries, and reassemble below. */
1828 retl = tcg_temp_new_i64();
1829 reth = tcg_temp_new_i64();
1830 op->args[pi++] = tcgv_i64_arg(reth);
1831 op->args[pi++] = tcgv_i64_arg(retl);
1834 op->args[pi++] = temp_arg(ret);
1838 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1839 #ifdef HOST_WORDS_BIGENDIAN
1840 op->args[pi++] = temp_arg(ret + 1);
1841 op->args[pi++] = temp_arg(ret);
1843 op->args[pi++] = temp_arg(ret);
1844 op->args[pi++] = temp_arg(ret + 1);
1848 op->args[pi++] = temp_arg(ret);
1855 TCGOP_CALLO(op) = nb_rets;
1858 for (i = 0; i < nargs; i++) {
1859 int is_64bit = sizemask & (1 << (i+1)*2);
1860 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1861 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1862 /* some targets want aligned 64 bit args */
1863 if (real_args & 1) {
1864 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1868 /* If stack grows up, then we will be placing successive
1869 arguments at lower addresses, which means we need to
1870 reverse the order compared to how we would normally
1871 treat either big or little-endian. For those arguments
1872 that will wind up in registers, this still works for
1873 HPPA (the only current STACK_GROWSUP target) since the
1874 argument registers are *also* allocated in decreasing
1875 order. If another such target is added, this logic may
1876 have to get more complicated to differentiate between
1877 stack arguments and register arguments. */
1878 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1879 op->args[pi++] = temp_arg(args[i] + 1);
1880 op->args[pi++] = temp_arg(args[i]);
1882 op->args[pi++] = temp_arg(args[i]);
1883 op->args[pi++] = temp_arg(args[i] + 1);
1889 op->args[pi++] = temp_arg(args[i]);
1892 op->args[pi++] = (uintptr_t)func;
1893 op->args[pi++] = flags;
1894 TCGOP_CALLI(op) = real_args;
1896 /* Make sure the fields didn't overflow. */
1897 tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1898 tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1900 #if defined(__sparc__) && !defined(__arch64__) \
1901 && !defined(CONFIG_TCG_INTERPRETER)
1902 /* Free all of the parts we allocated above. */
1903 for (i = real_args = 0; i < orig_nargs; ++i) {
1904 int is_64bit = orig_sizemask & (1 << (i+1)*2);
1906 tcg_temp_free_internal(args[real_args++]);
1907 tcg_temp_free_internal(args[real_args++]);
1912 if (orig_sizemask & 1) {
1913 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
1914 Note that describing these as TCGv_i64 eliminates an unnecessary
1915 zero-extension that tcg_gen_concat_i32_i64 would create. */
1916 tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1917 tcg_temp_free_i64(retl);
1918 tcg_temp_free_i64(reth);
1920 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1921 for (i = 0; i < nargs; ++i) {
1922 int is_64bit = sizemask & (1 << (i+1)*2);
1924 tcg_temp_free_internal(args[i]);
1927 #endif /* TCG_TARGET_EXTEND_ARGS */
1930 static void tcg_reg_alloc_start(TCGContext *s)
1935 for (i = 0, n = s->nb_globals; i < n; i++) {
1937 ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1939 for (n = s->nb_temps; i < n; i++) {
1941 ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1942 ts->mem_allocated = 0;
1946 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1949 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1952 int idx = temp_idx(ts);
1954 if (ts->temp_global) {
1955 pstrcpy(buf, buf_size, ts->name);
1956 } else if (ts->temp_local) {
1957 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1959 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1964 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1965 int buf_size, TCGArg arg)
1967 return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1970 /* Find helper name. */
1971 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1973 const char *ret = NULL;
1975 TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1983 static const char * const cond_name[] =
1985 [TCG_COND_NEVER] = "never",
1986 [TCG_COND_ALWAYS] = "always",
1987 [TCG_COND_EQ] = "eq",
1988 [TCG_COND_NE] = "ne",
1989 [TCG_COND_LT] = "lt",
1990 [TCG_COND_GE] = "ge",
1991 [TCG_COND_LE] = "le",
1992 [TCG_COND_GT] = "gt",
1993 [TCG_COND_LTU] = "ltu",
1994 [TCG_COND_GEU] = "geu",
1995 [TCG_COND_LEU] = "leu",
1996 [TCG_COND_GTU] = "gtu"
1999 static const char * const ldst_name[] =
2015 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2016 #ifdef TARGET_ALIGNED_ONLY
2017 [MO_UNALN >> MO_ASHIFT] = "un+",
2018 [MO_ALIGN >> MO_ASHIFT] = "",
2020 [MO_UNALN >> MO_ASHIFT] = "",
2021 [MO_ALIGN >> MO_ASHIFT] = "al+",
2023 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
2024 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
2025 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
2026 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2027 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2028 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2031 static inline bool tcg_regset_single(TCGRegSet d)
2033 return (d & (d - 1)) == 0;
2036 static inline TCGReg tcg_regset_first(TCGRegSet d)
2038 if (TCG_TARGET_NB_REGS <= 32) {
2045 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2050 QTAILQ_FOREACH(op, &s->ops, link) {
2051 int i, k, nb_oargs, nb_iargs, nb_cargs;
2052 const TCGOpDef *def;
2057 def = &tcg_op_defs[c];
2059 if (c == INDEX_op_insn_start) {
2061 col += qemu_log("\n ----");
2063 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2065 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2066 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2070 col += qemu_log(" " TARGET_FMT_lx, a);
2072 } else if (c == INDEX_op_call) {
2073 /* variable number of arguments */
2074 nb_oargs = TCGOP_CALLO(op);
2075 nb_iargs = TCGOP_CALLI(op);
2076 nb_cargs = def->nb_cargs;
2078 /* function name, flags, out args */
2079 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2080 tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2081 op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2082 for (i = 0; i < nb_oargs; i++) {
2083 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2086 for (i = 0; i < nb_iargs; i++) {
2087 TCGArg arg = op->args[nb_oargs + i];
2088 const char *t = "<dummy>";
2089 if (arg != TCG_CALL_DUMMY_ARG) {
2090 t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2092 col += qemu_log(",%s", t);
2095 col += qemu_log(" %s ", def->name);
2097 nb_oargs = def->nb_oargs;
2098 nb_iargs = def->nb_iargs;
2099 nb_cargs = def->nb_cargs;
2101 if (def->flags & TCG_OPF_VECTOR) {
2102 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2103 8 << TCGOP_VECE(op));
2107 for (i = 0; i < nb_oargs; i++) {
2109 col += qemu_log(",");
2111 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2114 for (i = 0; i < nb_iargs; i++) {
2116 col += qemu_log(",");
2118 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2122 case INDEX_op_brcond_i32:
2123 case INDEX_op_setcond_i32:
2124 case INDEX_op_movcond_i32:
2125 case INDEX_op_brcond2_i32:
2126 case INDEX_op_setcond2_i32:
2127 case INDEX_op_brcond_i64:
2128 case INDEX_op_setcond_i64:
2129 case INDEX_op_movcond_i64:
2130 case INDEX_op_cmp_vec:
2131 case INDEX_op_cmpsel_vec:
2132 if (op->args[k] < ARRAY_SIZE(cond_name)
2133 && cond_name[op->args[k]]) {
2134 col += qemu_log(",%s", cond_name[op->args[k++]]);
2136 col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2140 case INDEX_op_qemu_ld_i32:
2141 case INDEX_op_qemu_st_i32:
2142 case INDEX_op_qemu_st8_i32:
2143 case INDEX_op_qemu_ld_i64:
2144 case INDEX_op_qemu_st_i64:
2146 TCGMemOpIdx oi = op->args[k++];
2147 MemOp op = get_memop(oi);
2148 unsigned ix = get_mmuidx(oi);
2150 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2151 col += qemu_log(",$0x%x,%u", op, ix);
2153 const char *s_al, *s_op;
2154 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2155 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2156 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2166 case INDEX_op_set_label:
2168 case INDEX_op_brcond_i32:
2169 case INDEX_op_brcond_i64:
2170 case INDEX_op_brcond2_i32:
2171 col += qemu_log("%s$L%d", k ? "," : "",
2172 arg_label(op->args[k])->id);
2178 for (; i < nb_cargs; i++, k++) {
2179 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2183 if (have_prefs || op->life) {
2185 QemuLogFile *logfile;
2188 logfile = qatomic_rcu_read(&qemu_logfile);
2190 for (; col < 40; ++col) {
2191 putc(' ', logfile->fd);
2198 unsigned life = op->life;
2200 if (life & (SYNC_ARG * 3)) {
2202 for (i = 0; i < 2; ++i) {
2203 if (life & (SYNC_ARG << i)) {
2211 for (i = 0; life; ++i, life >>= 1) {
2220 for (i = 0; i < nb_oargs; ++i) {
2221 TCGRegSet set = op->output_pref[i];
2230 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2232 #ifdef CONFIG_DEBUG_TCG
2233 } else if (tcg_regset_single(set)) {
2234 TCGReg reg = tcg_regset_first(set);
2235 qemu_log("%s", tcg_target_reg_names[reg]);
2237 } else if (TCG_TARGET_NB_REGS <= 32) {
2238 qemu_log("%#x", (uint32_t)set);
2240 qemu_log("%#" PRIx64, (uint64_t)set);
2249 /* we give more priority to constraints with less registers */
2250 static int get_constraint_priority(const TCGOpDef *def, int k)
2252 const TCGArgConstraint *arg_ct = &def->args_ct[k];
2255 if (arg_ct->oalias) {
2256 /* an alias is equivalent to a single register */
2259 n = ctpop64(arg_ct->regs);
2261 return TCG_TARGET_NB_REGS - n + 1;
2264 /* sort from highest priority to lowest */
2265 static void sort_constraints(TCGOpDef *def, int start, int n)
2268 TCGArgConstraint *a = def->args_ct;
2270 for (i = 0; i < n; i++) {
2271 a[start + i].sort_index = start + i;
2276 for (i = 0; i < n - 1; i++) {
2277 for (j = i + 1; j < n; j++) {
2278 int p1 = get_constraint_priority(def, a[start + i].sort_index);
2279 int p2 = get_constraint_priority(def, a[start + j].sort_index);
2281 int tmp = a[start + i].sort_index;
2282 a[start + i].sort_index = a[start + j].sort_index;
2283 a[start + j].sort_index = tmp;
2289 static void process_op_defs(TCGContext *s)
2293 for (op = 0; op < NB_OPS; op++) {
2294 TCGOpDef *def = &tcg_op_defs[op];
2295 const TCGTargetOpDef *tdefs;
2299 if (def->flags & TCG_OPF_NOT_PRESENT) {
2303 nb_args = def->nb_iargs + def->nb_oargs;
2308 tdefs = tcg_target_op_def(op);
2309 /* Missing TCGTargetOpDef entry. */
2310 tcg_debug_assert(tdefs != NULL);
2312 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2313 for (i = 0; i < nb_args; i++) {
2314 const char *ct_str = tdefs->args_ct_str[i];
2315 /* Incomplete TCGTargetOpDef entry. */
2316 tcg_debug_assert(ct_str != NULL);
2318 while (*ct_str != '\0') {
2322 int oarg = *ct_str - '0';
2323 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2324 tcg_debug_assert(oarg < def->nb_oargs);
2325 tcg_debug_assert(def->args_ct[oarg].regs != 0);
2326 def->args_ct[i] = def->args_ct[oarg];
2327 /* The output sets oalias. */
2328 def->args_ct[oarg].oalias = true;
2329 def->args_ct[oarg].alias_index = i;
2330 /* The input sets ialias. */
2331 def->args_ct[i].ialias = true;
2332 def->args_ct[i].alias_index = oarg;
2337 def->args_ct[i].newreg = true;
2341 def->args_ct[i].ct |= TCG_CT_CONST;
2345 ct_str = target_parse_constraint(&def->args_ct[i],
2347 /* Typo in TCGTargetOpDef constraint. */
2348 tcg_debug_assert(ct_str != NULL);
2353 /* TCGTargetOpDef entry with too much information? */
2354 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2356 /* sort the constraints (XXX: this is just an heuristic) */
2357 sort_constraints(def, 0, def->nb_oargs);
2358 sort_constraints(def, def->nb_oargs, def->nb_iargs);
2362 void tcg_op_remove(TCGContext *s, TCGOp *op)
2368 label = arg_label(op->args[0]);
2371 case INDEX_op_brcond_i32:
2372 case INDEX_op_brcond_i64:
2373 label = arg_label(op->args[3]);
2376 case INDEX_op_brcond2_i32:
2377 label = arg_label(op->args[5]);
2384 QTAILQ_REMOVE(&s->ops, op, link);
2385 QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2388 #ifdef CONFIG_PROFILER
2389 qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2393 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2395 TCGContext *s = tcg_ctx;
2398 if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2399 op = tcg_malloc(sizeof(TCGOp));
2401 op = QTAILQ_FIRST(&s->free_ops);
2402 QTAILQ_REMOVE(&s->free_ops, op, link);
2404 memset(op, 0, offsetof(TCGOp, link));
2411 TCGOp *tcg_emit_op(TCGOpcode opc)
2413 TCGOp *op = tcg_op_alloc(opc);
2414 QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2418 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2420 TCGOp *new_op = tcg_op_alloc(opc);
2421 QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2425 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2427 TCGOp *new_op = tcg_op_alloc(opc);
2428 QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2432 /* Reachable analysis : remove unreachable code. */
2433 static void reachable_code_pass(TCGContext *s)
2435 TCGOp *op, *op_next;
2438 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2444 case INDEX_op_set_label:
2445 label = arg_label(op->args[0]);
2446 if (label->refs == 0) {
2448 * While there is an occasional backward branch, virtually
2449 * all branches generated by the translators are forward.
2450 * Which means that generally we will have already removed
2451 * all references to the label that will be, and there is
2452 * little to be gained by iterating.
2456 /* Once we see a label, insns become live again. */
2461 * Optimization can fold conditional branches to unconditional.
2462 * If we find a label with one reference which is preceded by
2463 * an unconditional branch to it, remove both. This needed to
2464 * wait until the dead code in between them was removed.
2466 if (label->refs == 1) {
2467 TCGOp *op_prev = QTAILQ_PREV(op, link);
2468 if (op_prev->opc == INDEX_op_br &&
2469 label == arg_label(op_prev->args[0])) {
2470 tcg_op_remove(s, op_prev);
2478 case INDEX_op_exit_tb:
2479 case INDEX_op_goto_ptr:
2480 /* Unconditional branches; everything following is dead. */
2485 /* Notice noreturn helper calls, raising exceptions. */
2486 call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2487 if (call_flags & TCG_CALL_NO_RETURN) {
2492 case INDEX_op_insn_start:
2493 /* Never remove -- we need to keep these for unwind. */
2502 tcg_op_remove(s, op);
2510 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
2511 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2513 /* For liveness_pass_1, the register preferences for a given temp. */
2514 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2516 return ts->state_ptr;
2519 /* For liveness_pass_1, reset the preferences for a given temp to the
2520 * maximal regset for its type.
2522 static inline void la_reset_pref(TCGTemp *ts)
2525 = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2528 /* liveness analysis: end of function: all temps are dead, and globals
2529 should be in memory. */
2530 static void la_func_end(TCGContext *s, int ng, int nt)
2534 for (i = 0; i < ng; ++i) {
2535 s->temps[i].state = TS_DEAD | TS_MEM;
2536 la_reset_pref(&s->temps[i]);
2538 for (i = ng; i < nt; ++i) {
2539 s->temps[i].state = TS_DEAD;
2540 la_reset_pref(&s->temps[i]);
2544 /* liveness analysis: end of basic block: all temps are dead, globals
2545 and local temps should be in memory. */
2546 static void la_bb_end(TCGContext *s, int ng, int nt)
2550 for (i = 0; i < ng; ++i) {
2551 s->temps[i].state = TS_DEAD | TS_MEM;
2552 la_reset_pref(&s->temps[i]);
2554 for (i = ng; i < nt; ++i) {
2555 s->temps[i].state = (s->temps[i].temp_local
2558 la_reset_pref(&s->temps[i]);
2562 /* liveness analysis: sync globals back to memory. */
2563 static void la_global_sync(TCGContext *s, int ng)
2567 for (i = 0; i < ng; ++i) {
2568 int state = s->temps[i].state;
2569 s->temps[i].state = state | TS_MEM;
2570 if (state == TS_DEAD) {
2571 /* If the global was previously dead, reset prefs. */
2572 la_reset_pref(&s->temps[i]);
2578 * liveness analysis: conditional branch: all temps are dead,
2579 * globals and local temps should be synced.
2581 static void la_bb_sync(TCGContext *s, int ng, int nt)
2583 la_global_sync(s, ng);
2585 for (int i = ng; i < nt; ++i) {
2586 if (s->temps[i].temp_local) {
2587 int state = s->temps[i].state;
2588 s->temps[i].state = state | TS_MEM;
2589 if (state != TS_DEAD) {
2593 s->temps[i].state = TS_DEAD;
2595 la_reset_pref(&s->temps[i]);
2599 /* liveness analysis: sync globals back to memory and kill. */
2600 static void la_global_kill(TCGContext *s, int ng)
2604 for (i = 0; i < ng; i++) {
2605 s->temps[i].state = TS_DEAD | TS_MEM;
2606 la_reset_pref(&s->temps[i]);
2610 /* liveness analysis: note live globals crossing calls. */
2611 static void la_cross_call(TCGContext *s, int nt)
2613 TCGRegSet mask = ~tcg_target_call_clobber_regs;
2616 for (i = 0; i < nt; i++) {
2617 TCGTemp *ts = &s->temps[i];
2618 if (!(ts->state & TS_DEAD)) {
2619 TCGRegSet *pset = la_temp_pref(ts);
2620 TCGRegSet set = *pset;
2623 /* If the combination is not possible, restart. */
2625 set = tcg_target_available_regs[ts->type] & mask;
2632 /* Liveness analysis : update the opc_arg_life array to tell if a
2633 given input arguments is dead. Instructions updating dead
2634 temporaries are removed. */
2635 static void liveness_pass_1(TCGContext *s)
2637 int nb_globals = s->nb_globals;
2638 int nb_temps = s->nb_temps;
2639 TCGOp *op, *op_prev;
2643 prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2644 for (i = 0; i < nb_temps; ++i) {
2645 s->temps[i].state_ptr = prefs + i;
2648 /* ??? Should be redundant with the exit_tb that ends the TB. */
2649 la_func_end(s, nb_globals, nb_temps);
2651 QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2652 int nb_iargs, nb_oargs;
2653 TCGOpcode opc_new, opc_new2;
2655 TCGLifeData arg_life = 0;
2657 TCGOpcode opc = op->opc;
2658 const TCGOpDef *def = &tcg_op_defs[opc];
2666 nb_oargs = TCGOP_CALLO(op);
2667 nb_iargs = TCGOP_CALLI(op);
2668 call_flags = op->args[nb_oargs + nb_iargs + 1];
2670 /* pure functions can be removed if their result is unused */
2671 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2672 for (i = 0; i < nb_oargs; i++) {
2673 ts = arg_temp(op->args[i]);
2674 if (ts->state != TS_DEAD) {
2675 goto do_not_remove_call;
2682 /* Output args are dead. */
2683 for (i = 0; i < nb_oargs; i++) {
2684 ts = arg_temp(op->args[i]);
2685 if (ts->state & TS_DEAD) {
2686 arg_life |= DEAD_ARG << i;
2688 if (ts->state & TS_MEM) {
2689 arg_life |= SYNC_ARG << i;
2691 ts->state = TS_DEAD;
2694 /* Not used -- it will be tcg_target_call_oarg_regs[i]. */
2695 op->output_pref[i] = 0;
2698 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2699 TCG_CALL_NO_READ_GLOBALS))) {
2700 la_global_kill(s, nb_globals);
2701 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2702 la_global_sync(s, nb_globals);
2705 /* Record arguments that die in this helper. */
2706 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2707 ts = arg_temp(op->args[i]);
2708 if (ts && ts->state & TS_DEAD) {
2709 arg_life |= DEAD_ARG << i;
2713 /* For all live registers, remove call-clobbered prefs. */
2714 la_cross_call(s, nb_temps);
2716 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2718 /* Input arguments are live for preceding opcodes. */
2719 for (i = 0; i < nb_iargs; i++) {
2720 ts = arg_temp(op->args[i + nb_oargs]);
2721 if (ts && ts->state & TS_DEAD) {
2722 /* For those arguments that die, and will be allocated
2723 * in registers, clear the register set for that arg,
2724 * to be filled in below. For args that will be on
2725 * the stack, reset to any available reg.
2728 = (i < nb_call_regs ? 0 :
2729 tcg_target_available_regs[ts->type]);
2730 ts->state &= ~TS_DEAD;
2734 /* For each input argument, add its input register to prefs.
2735 If a temp is used once, this produces a single set bit. */
2736 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2737 ts = arg_temp(op->args[i + nb_oargs]);
2739 tcg_regset_set_reg(*la_temp_pref(ts),
2740 tcg_target_call_iarg_regs[i]);
2745 case INDEX_op_insn_start:
2747 case INDEX_op_discard:
2748 /* mark the temporary as dead */
2749 ts = arg_temp(op->args[0]);
2750 ts->state = TS_DEAD;
2754 case INDEX_op_add2_i32:
2755 opc_new = INDEX_op_add_i32;
2757 case INDEX_op_sub2_i32:
2758 opc_new = INDEX_op_sub_i32;
2760 case INDEX_op_add2_i64:
2761 opc_new = INDEX_op_add_i64;
2763 case INDEX_op_sub2_i64:
2764 opc_new = INDEX_op_sub_i64;
2768 /* Test if the high part of the operation is dead, but not
2769 the low part. The result can be optimized to a simple
2770 add or sub. This happens often for x86_64 guest when the
2771 cpu mode is set to 32 bit. */
2772 if (arg_temp(op->args[1])->state == TS_DEAD) {
2773 if (arg_temp(op->args[0])->state == TS_DEAD) {
2776 /* Replace the opcode and adjust the args in place,
2777 leaving 3 unused args at the end. */
2778 op->opc = opc = opc_new;
2779 op->args[1] = op->args[2];
2780 op->args[2] = op->args[4];
2781 /* Fall through and mark the single-word operation live. */
2787 case INDEX_op_mulu2_i32:
2788 opc_new = INDEX_op_mul_i32;
2789 opc_new2 = INDEX_op_muluh_i32;
2790 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2792 case INDEX_op_muls2_i32:
2793 opc_new = INDEX_op_mul_i32;
2794 opc_new2 = INDEX_op_mulsh_i32;
2795 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2797 case INDEX_op_mulu2_i64:
2798 opc_new = INDEX_op_mul_i64;
2799 opc_new2 = INDEX_op_muluh_i64;
2800 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2802 case INDEX_op_muls2_i64:
2803 opc_new = INDEX_op_mul_i64;
2804 opc_new2 = INDEX_op_mulsh_i64;
2805 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2810 if (arg_temp(op->args[1])->state == TS_DEAD) {
2811 if (arg_temp(op->args[0])->state == TS_DEAD) {
2812 /* Both parts of the operation are dead. */
2815 /* The high part of the operation is dead; generate the low. */
2816 op->opc = opc = opc_new;
2817 op->args[1] = op->args[2];
2818 op->args[2] = op->args[3];
2819 } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2820 /* The low part of the operation is dead; generate the high. */
2821 op->opc = opc = opc_new2;
2822 op->args[0] = op->args[1];
2823 op->args[1] = op->args[2];
2824 op->args[2] = op->args[3];
2828 /* Mark the single-word operation live. */
2833 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2834 nb_iargs = def->nb_iargs;
2835 nb_oargs = def->nb_oargs;
2837 /* Test if the operation can be removed because all
2838 its outputs are dead. We assume that nb_oargs == 0
2839 implies side effects */
2840 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2841 for (i = 0; i < nb_oargs; i++) {
2842 if (arg_temp(op->args[i])->state != TS_DEAD) {
2851 tcg_op_remove(s, op);
2855 for (i = 0; i < nb_oargs; i++) {
2856 ts = arg_temp(op->args[i]);
2858 /* Remember the preference of the uses that followed. */
2859 op->output_pref[i] = *la_temp_pref(ts);
2861 /* Output args are dead. */
2862 if (ts->state & TS_DEAD) {
2863 arg_life |= DEAD_ARG << i;
2865 if (ts->state & TS_MEM) {
2866 arg_life |= SYNC_ARG << i;
2868 ts->state = TS_DEAD;
2872 /* If end of basic block, update. */
2873 if (def->flags & TCG_OPF_BB_EXIT) {
2874 la_func_end(s, nb_globals, nb_temps);
2875 } else if (def->flags & TCG_OPF_COND_BRANCH) {
2876 la_bb_sync(s, nb_globals, nb_temps);
2877 } else if (def->flags & TCG_OPF_BB_END) {
2878 la_bb_end(s, nb_globals, nb_temps);
2879 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2880 la_global_sync(s, nb_globals);
2881 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2882 la_cross_call(s, nb_temps);
2886 /* Record arguments that die in this opcode. */
2887 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2888 ts = arg_temp(op->args[i]);
2889 if (ts->state & TS_DEAD) {
2890 arg_life |= DEAD_ARG << i;
2894 /* Input arguments are live for preceding opcodes. */
2895 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2896 ts = arg_temp(op->args[i]);
2897 if (ts->state & TS_DEAD) {
2898 /* For operands that were dead, initially allow
2899 all regs for the type. */
2900 *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2901 ts->state &= ~TS_DEAD;
2905 /* Incorporate constraints for this operand. */
2907 case INDEX_op_mov_i32:
2908 case INDEX_op_mov_i64:
2909 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2910 have proper constraints. That said, special case
2911 moves to propagate preferences backward. */
2912 if (IS_DEAD_ARG(1)) {
2913 *la_temp_pref(arg_temp(op->args[0]))
2914 = *la_temp_pref(arg_temp(op->args[1]));
2919 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2920 const TCGArgConstraint *ct = &def->args_ct[i];
2921 TCGRegSet set, *pset;
2923 ts = arg_temp(op->args[i]);
2924 pset = la_temp_pref(ts);
2929 set &= op->output_pref[ct->alias_index];
2931 /* If the combination is not possible, restart. */
2941 op->life = arg_life;
2945 /* Liveness analysis: Convert indirect regs to direct temporaries. */
2946 static bool liveness_pass_2(TCGContext *s)
2948 int nb_globals = s->nb_globals;
2950 bool changes = false;
2951 TCGOp *op, *op_next;
2953 /* Create a temporary for each indirect global. */
2954 for (i = 0; i < nb_globals; ++i) {
2955 TCGTemp *its = &s->temps[i];
2956 if (its->indirect_reg) {
2957 TCGTemp *dts = tcg_temp_alloc(s);
2958 dts->type = its->type;
2959 dts->base_type = its->base_type;
2960 its->state_ptr = dts;
2962 its->state_ptr = NULL;
2964 /* All globals begin dead. */
2965 its->state = TS_DEAD;
2967 for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2968 TCGTemp *its = &s->temps[i];
2969 its->state_ptr = NULL;
2970 its->state = TS_DEAD;
2973 QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2974 TCGOpcode opc = op->opc;
2975 const TCGOpDef *def = &tcg_op_defs[opc];
2976 TCGLifeData arg_life = op->life;
2977 int nb_iargs, nb_oargs, call_flags;
2978 TCGTemp *arg_ts, *dir_ts;
2980 if (opc == INDEX_op_call) {
2981 nb_oargs = TCGOP_CALLO(op);
2982 nb_iargs = TCGOP_CALLI(op);
2983 call_flags = op->args[nb_oargs + nb_iargs + 1];
2985 nb_iargs = def->nb_iargs;
2986 nb_oargs = def->nb_oargs;
2988 /* Set flags similar to how calls require. */
2989 if (def->flags & TCG_OPF_COND_BRANCH) {
2990 /* Like reading globals: sync_globals */
2991 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2992 } else if (def->flags & TCG_OPF_BB_END) {
2993 /* Like writing globals: save_globals */
2995 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2996 /* Like reading globals: sync_globals */
2997 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2999 /* No effect on globals. */
3000 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3001 TCG_CALL_NO_WRITE_GLOBALS);
3005 /* Make sure that input arguments are available. */
3006 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3007 arg_ts = arg_temp(op->args[i]);
3009 dir_ts = arg_ts->state_ptr;
3010 if (dir_ts && arg_ts->state == TS_DEAD) {
3011 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3014 TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3016 lop->args[0] = temp_arg(dir_ts);
3017 lop->args[1] = temp_arg(arg_ts->mem_base);
3018 lop->args[2] = arg_ts->mem_offset;
3020 /* Loaded, but synced with memory. */
3021 arg_ts->state = TS_MEM;
3026 /* Perform input replacement, and mark inputs that became dead.
3027 No action is required except keeping temp_state up to date
3028 so that we reload when needed. */
3029 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3030 arg_ts = arg_temp(op->args[i]);
3032 dir_ts = arg_ts->state_ptr;
3034 op->args[i] = temp_arg(dir_ts);
3036 if (IS_DEAD_ARG(i)) {
3037 arg_ts->state = TS_DEAD;
3043 /* Liveness analysis should ensure that the following are
3044 all correct, for call sites and basic block end points. */
3045 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3047 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3048 for (i = 0; i < nb_globals; ++i) {
3049 /* Liveness should see that globals are synced back,
3050 that is, either TS_DEAD or TS_MEM. */
3051 arg_ts = &s->temps[i];
3052 tcg_debug_assert(arg_ts->state_ptr == 0
3053 || arg_ts->state != 0);
3056 for (i = 0; i < nb_globals; ++i) {
3057 /* Liveness should see that globals are saved back,
3058 that is, TS_DEAD, waiting to be reloaded. */
3059 arg_ts = &s->temps[i];
3060 tcg_debug_assert(arg_ts->state_ptr == 0
3061 || arg_ts->state == TS_DEAD);
3065 /* Outputs become available. */
3066 if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3067 arg_ts = arg_temp(op->args[0]);
3068 dir_ts = arg_ts->state_ptr;
3070 op->args[0] = temp_arg(dir_ts);
3073 /* The output is now live and modified. */
3076 if (NEED_SYNC_ARG(0)) {
3077 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3080 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3081 TCGTemp *out_ts = dir_ts;
3083 if (IS_DEAD_ARG(0)) {
3084 out_ts = arg_temp(op->args[1]);
3085 arg_ts->state = TS_DEAD;
3086 tcg_op_remove(s, op);
3088 arg_ts->state = TS_MEM;
3091 sop->args[0] = temp_arg(out_ts);
3092 sop->args[1] = temp_arg(arg_ts->mem_base);
3093 sop->args[2] = arg_ts->mem_offset;
3095 tcg_debug_assert(!IS_DEAD_ARG(0));
3099 for (i = 0; i < nb_oargs; i++) {
3100 arg_ts = arg_temp(op->args[i]);
3101 dir_ts = arg_ts->state_ptr;
3105 op->args[i] = temp_arg(dir_ts);
3108 /* The output is now live and modified. */
3111 /* Sync outputs upon their last write. */
3112 if (NEED_SYNC_ARG(i)) {
3113 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3116 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3118 sop->args[0] = temp_arg(dir_ts);
3119 sop->args[1] = temp_arg(arg_ts->mem_base);
3120 sop->args[2] = arg_ts->mem_offset;
3122 arg_ts->state = TS_MEM;
3124 /* Drop outputs that are dead. */
3125 if (IS_DEAD_ARG(i)) {
3126 arg_ts->state = TS_DEAD;
3135 #ifdef CONFIG_DEBUG_TCG
3136 static void dump_regs(TCGContext *s)
3142 for(i = 0; i < s->nb_temps; i++) {
3144 printf(" %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3145 switch(ts->val_type) {
3147 printf("%s", tcg_target_reg_names[ts->reg]);
3150 printf("%d(%s)", (int)ts->mem_offset,
3151 tcg_target_reg_names[ts->mem_base->reg]);
3153 case TEMP_VAL_CONST:
3154 printf("$0x%" TCG_PRIlx, ts->val);
3166 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3167 if (s->reg_to_temp[i] != NULL) {
3169 tcg_target_reg_names[i],
3170 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3175 static void check_regs(TCGContext *s)
3182 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3183 ts = s->reg_to_temp[reg];
3185 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3186 printf("Inconsistency for register %s:\n",
3187 tcg_target_reg_names[reg]);
3192 for (k = 0; k < s->nb_temps; k++) {
3194 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3195 && s->reg_to_temp[ts->reg] != ts) {
3196 printf("Inconsistency for temp %s:\n",
3197 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3199 printf("reg state:\n");
3207 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3209 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3210 /* Sparc64 stack is accessed with offset of 2047 */
3211 s->current_frame_offset = (s->current_frame_offset +
3212 (tcg_target_long)sizeof(tcg_target_long) - 1) &
3213 ~(sizeof(tcg_target_long) - 1);
3215 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3219 ts->mem_offset = s->current_frame_offset;
3220 ts->mem_base = s->frame_temp;
3221 ts->mem_allocated = 1;
3222 s->current_frame_offset += sizeof(tcg_target_long);
3225 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3227 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
3228 mark it free; otherwise mark it dead. */
3229 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3231 if (ts->fixed_reg) {
3234 if (ts->val_type == TEMP_VAL_REG) {
3235 s->reg_to_temp[ts->reg] = NULL;
3237 ts->val_type = (free_or_dead < 0
3240 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3243 /* Mark a temporary as dead. */
3244 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3246 temp_free_or_dead(s, ts, 1);
3249 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3250 registers needs to be allocated to store a constant. If 'free_or_dead'
3251 is non-zero, subsequently release the temporary; if it is positive, the
3252 temp is dead; if it is negative, the temp is free. */
3253 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3254 TCGRegSet preferred_regs, int free_or_dead)
3256 if (ts->fixed_reg) {
3259 if (!ts->mem_coherent) {
3260 if (!ts->mem_allocated) {
3261 temp_allocate_frame(s, ts);
3263 switch (ts->val_type) {
3264 case TEMP_VAL_CONST:
3265 /* If we're going to free the temp immediately, then we won't
3266 require it later in a register, so attempt to store the
3267 constant to memory directly. */
3269 && tcg_out_sti(s, ts->type, ts->val,
3270 ts->mem_base->reg, ts->mem_offset)) {
3273 temp_load(s, ts, tcg_target_available_regs[ts->type],
3274 allocated_regs, preferred_regs);
3278 tcg_out_st(s, ts->type, ts->reg,
3279 ts->mem_base->reg, ts->mem_offset);
3289 ts->mem_coherent = 1;
3292 temp_free_or_dead(s, ts, free_or_dead);
3296 /* free register 'reg' by spilling the corresponding temporary if necessary */
3297 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3299 TCGTemp *ts = s->reg_to_temp[reg];
3301 temp_sync(s, ts, allocated_regs, 0, -1);
3307 * @required_regs: Set of registers in which we must allocate.
3308 * @allocated_regs: Set of registers which must be avoided.
3309 * @preferred_regs: Set of registers we should prefer.
3310 * @rev: True if we search the registers in "indirect" order.
3312 * The allocated register must be in @required_regs & ~@allocated_regs,
3313 * but if we can put it in @preferred_regs we may save a move later.
3315 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3316 TCGRegSet allocated_regs,
3317 TCGRegSet preferred_regs, bool rev)
3319 int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3320 TCGRegSet reg_ct[2];
3323 reg_ct[1] = required_regs & ~allocated_regs;
3324 tcg_debug_assert(reg_ct[1] != 0);
3325 reg_ct[0] = reg_ct[1] & preferred_regs;
3327 /* Skip the preferred_regs option if it cannot be satisfied,
3328 or if the preference made no difference. */
3329 f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3331 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3333 /* Try free registers, preferences first. */
3334 for (j = f; j < 2; j++) {
3335 TCGRegSet set = reg_ct[j];
3337 if (tcg_regset_single(set)) {
3338 /* One register in the set. */
3339 TCGReg reg = tcg_regset_first(set);
3340 if (s->reg_to_temp[reg] == NULL) {
3344 for (i = 0; i < n; i++) {
3345 TCGReg reg = order[i];
3346 if (s->reg_to_temp[reg] == NULL &&
3347 tcg_regset_test_reg(set, reg)) {
3354 /* We must spill something. */
3355 for (j = f; j < 2; j++) {
3356 TCGRegSet set = reg_ct[j];
3358 if (tcg_regset_single(set)) {
3359 /* One register in the set. */
3360 TCGReg reg = tcg_regset_first(set);
3361 tcg_reg_free(s, reg, allocated_regs);
3364 for (i = 0; i < n; i++) {
3365 TCGReg reg = order[i];
3366 if (tcg_regset_test_reg(set, reg)) {
3367 tcg_reg_free(s, reg, allocated_regs);
3377 /* Make sure the temporary is in a register. If needed, allocate the register
3378 from DESIRED while avoiding ALLOCATED. */
3379 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3380 TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3384 switch (ts->val_type) {
3387 case TEMP_VAL_CONST:
3388 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3389 preferred_regs, ts->indirect_base);
3390 tcg_out_movi(s, ts->type, reg, ts->val);
3391 ts->mem_coherent = 0;
3394 reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3395 preferred_regs, ts->indirect_base);
3396 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3397 ts->mem_coherent = 1;
3404 ts->val_type = TEMP_VAL_REG;
3405 s->reg_to_temp[reg] = ts;
3408 /* Save a temporary to memory. 'allocated_regs' is used in case a
3409 temporary registers needs to be allocated to store a constant. */
3410 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3412 /* The liveness analysis already ensures that globals are back
3413 in memory. Keep an tcg_debug_assert for safety. */
3414 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3417 /* save globals to their canonical location and assume they can be
3418 modified be the following code. 'allocated_regs' is used in case a
3419 temporary registers needs to be allocated to store a constant. */
3420 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3424 for (i = 0, n = s->nb_globals; i < n; i++) {
3425 temp_save(s, &s->temps[i], allocated_regs);
3429 /* sync globals to their canonical location and assume they can be
3430 read by the following code. 'allocated_regs' is used in case a
3431 temporary registers needs to be allocated to store a constant. */
3432 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3436 for (i = 0, n = s->nb_globals; i < n; i++) {
3437 TCGTemp *ts = &s->temps[i];
3438 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3440 || ts->mem_coherent);
3444 /* at the end of a basic block, we assume all temporaries are dead and
3445 all globals are stored at their canonical location. */
3446 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3450 for (i = s->nb_globals; i < s->nb_temps; i++) {
3451 TCGTemp *ts = &s->temps[i];
3452 if (ts->temp_local) {
3453 temp_save(s, ts, allocated_regs);
3455 /* The liveness analysis already ensures that temps are dead.
3456 Keep an tcg_debug_assert for safety. */
3457 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3461 save_globals(s, allocated_regs);
3465 * At a conditional branch, we assume all temporaries are dead and
3466 * all globals and local temps are synced to their location.
3468 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3470 sync_globals(s, allocated_regs);
3472 for (int i = s->nb_globals; i < s->nb_temps; i++) {
3473 TCGTemp *ts = &s->temps[i];
3475 * The liveness analysis already ensures that temps are dead.
3476 * Keep tcg_debug_asserts for safety.
3478 if (ts->temp_local) {
3479 tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3481 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3487 * Specialized code generation for INDEX_op_movi_*.
3489 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3490 tcg_target_ulong val, TCGLifeData arg_life,
3491 TCGRegSet preferred_regs)
3493 /* ENV should not be modified. */
3494 tcg_debug_assert(!ots->fixed_reg);
3496 /* The movi is not explicitly generated here. */
3497 if (ots->val_type == TEMP_VAL_REG) {
3498 s->reg_to_temp[ots->reg] = NULL;
3500 ots->val_type = TEMP_VAL_CONST;
3502 ots->mem_coherent = 0;
3503 if (NEED_SYNC_ARG(0)) {
3504 temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3505 } else if (IS_DEAD_ARG(0)) {
3510 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3512 TCGTemp *ots = arg_temp(op->args[0]);
3513 tcg_target_ulong val = op->args[1];
3515 tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3519 * Specialized code generation for INDEX_op_mov_*.
3521 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3523 const TCGLifeData arg_life = op->life;
3524 TCGRegSet allocated_regs, preferred_regs;
3526 TCGType otype, itype;
3528 allocated_regs = s->reserved_regs;
3529 preferred_regs = op->output_pref[0];
3530 ots = arg_temp(op->args[0]);
3531 ts = arg_temp(op->args[1]);
3533 /* ENV should not be modified. */
3534 tcg_debug_assert(!ots->fixed_reg);
3536 /* Note that otype != itype for no-op truncation. */
3540 if (ts->val_type == TEMP_VAL_CONST) {
3541 /* propagate constant or generate sti */
3542 tcg_target_ulong val = ts->val;
3543 if (IS_DEAD_ARG(1)) {
3546 tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3550 /* If the source value is in memory we're going to be forced
3551 to have it in a register in order to perform the copy. Copy
3552 the SOURCE value into its own register first, that way we
3553 don't have to reload SOURCE the next time it is used. */
3554 if (ts->val_type == TEMP_VAL_MEM) {
3555 temp_load(s, ts, tcg_target_available_regs[itype],
3556 allocated_regs, preferred_regs);
3559 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3560 if (IS_DEAD_ARG(0)) {
3561 /* mov to a non-saved dead register makes no sense (even with
3562 liveness analysis disabled). */
3563 tcg_debug_assert(NEED_SYNC_ARG(0));
3564 if (!ots->mem_allocated) {
3565 temp_allocate_frame(s, ots);
3567 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3568 if (IS_DEAD_ARG(1)) {
3573 if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3574 /* the mov can be suppressed */
3575 if (ots->val_type == TEMP_VAL_REG) {
3576 s->reg_to_temp[ots->reg] = NULL;
3581 if (ots->val_type != TEMP_VAL_REG) {
3582 /* When allocating a new register, make sure to not spill the
3584 tcg_regset_set_reg(allocated_regs, ts->reg);
3585 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3586 allocated_regs, preferred_regs,
3587 ots->indirect_base);
3589 if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3591 * Cross register class move not supported.
3592 * Store the source register into the destination slot
3593 * and leave the destination temp as TEMP_VAL_MEM.
3595 assert(!ots->fixed_reg);
3596 if (!ts->mem_allocated) {
3597 temp_allocate_frame(s, ots);
3599 tcg_out_st(s, ts->type, ts->reg,
3600 ots->mem_base->reg, ots->mem_offset);
3601 ots->mem_coherent = 1;
3602 temp_free_or_dead(s, ots, -1);
3606 ots->val_type = TEMP_VAL_REG;
3607 ots->mem_coherent = 0;
3608 s->reg_to_temp[ots->reg] = ots;
3609 if (NEED_SYNC_ARG(0)) {
3610 temp_sync(s, ots, allocated_regs, 0, 0);
3616 * Specialized code generation for INDEX_op_dup_vec.
3618 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3620 const TCGLifeData arg_life = op->life;
3621 TCGRegSet dup_out_regs, dup_in_regs;
3623 TCGType itype, vtype;
3624 intptr_t endian_fixup;
3628 ots = arg_temp(op->args[0]);
3629 its = arg_temp(op->args[1]);
3631 /* ENV should not be modified. */
3632 tcg_debug_assert(!ots->fixed_reg);
3635 vece = TCGOP_VECE(op);
3636 vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3638 if (its->val_type == TEMP_VAL_CONST) {
3639 /* Propagate constant via movi -> dupi. */
3640 tcg_target_ulong val = its->val;
3641 if (IS_DEAD_ARG(1)) {
3644 tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3648 dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3649 dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3651 /* Allocate the output register now. */
3652 if (ots->val_type != TEMP_VAL_REG) {
3653 TCGRegSet allocated_regs = s->reserved_regs;
3655 if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3656 /* Make sure to not spill the input register. */
3657 tcg_regset_set_reg(allocated_regs, its->reg);
3659 ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3660 op->output_pref[0], ots->indirect_base);
3661 ots->val_type = TEMP_VAL_REG;
3662 ots->mem_coherent = 0;
3663 s->reg_to_temp[ots->reg] = ots;
3666 switch (its->val_type) {
3669 * The dup constriaints must be broad, covering all possible VECE.
3670 * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3671 * to fail, indicating that extra moves are required for that case.
3673 if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3674 if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3677 /* Try again from memory or a vector input register. */
3679 if (!its->mem_coherent) {
3681 * The input register is not synced, and so an extra store
3682 * would be required to use memory. Attempt an integer-vector
3683 * register move first. We do not have a TCGRegSet for this.
3685 if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3688 /* Sync the temp back to its slot and load from there. */
3689 temp_sync(s, its, s->reserved_regs, 0, 0);
3694 #ifdef HOST_WORDS_BIGENDIAN
3695 endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3696 endian_fixup -= 1 << vece;
3700 if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3701 its->mem_offset + endian_fixup)) {
3704 tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3708 g_assert_not_reached();
3711 /* We now have a vector input register, so dup must succeed. */
3712 ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3713 tcg_debug_assert(ok);
3716 if (IS_DEAD_ARG(1)) {
3719 if (NEED_SYNC_ARG(0)) {
3720 temp_sync(s, ots, s->reserved_regs, 0, 0);
3722 if (IS_DEAD_ARG(0)) {
3727 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3729 const TCGLifeData arg_life = op->life;
3730 const TCGOpDef * const def = &tcg_op_defs[op->opc];
3731 TCGRegSet i_allocated_regs;
3732 TCGRegSet o_allocated_regs;
3733 int i, k, nb_iargs, nb_oargs;
3736 const TCGArgConstraint *arg_ct;
3738 TCGArg new_args[TCG_MAX_OP_ARGS];
3739 int const_args[TCG_MAX_OP_ARGS];
3741 nb_oargs = def->nb_oargs;
3742 nb_iargs = def->nb_iargs;
3744 /* copy constants */
3745 memcpy(new_args + nb_oargs + nb_iargs,
3746 op->args + nb_oargs + nb_iargs,
3747 sizeof(TCGArg) * def->nb_cargs);
3749 i_allocated_regs = s->reserved_regs;
3750 o_allocated_regs = s->reserved_regs;
3752 /* satisfy input constraints */
3753 for (k = 0; k < nb_iargs; k++) {
3754 TCGRegSet i_preferred_regs, o_preferred_regs;
3756 i = def->args_ct[nb_oargs + k].sort_index;
3758 arg_ct = &def->args_ct[i];
3761 if (ts->val_type == TEMP_VAL_CONST
3762 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3763 /* constant is OK for instruction */
3765 new_args[i] = ts->val;
3769 i_preferred_regs = o_preferred_regs = 0;
3770 if (arg_ct->ialias) {
3771 o_preferred_regs = op->output_pref[arg_ct->alias_index];
3772 if (ts->fixed_reg) {
3773 /* if fixed register, we must allocate a new register
3774 if the alias is not the same register */
3775 if (arg != op->args[arg_ct->alias_index]) {
3776 goto allocate_in_reg;
3779 /* if the input is aliased to an output and if it is
3780 not dead after the instruction, we must allocate
3781 a new register and move it */
3782 if (!IS_DEAD_ARG(i)) {
3783 goto allocate_in_reg;
3786 /* check if the current register has already been allocated
3787 for another input aliased to an output */
3788 if (ts->val_type == TEMP_VAL_REG) {
3791 for (k2 = 0 ; k2 < k ; k2++) {
3792 i2 = def->args_ct[nb_oargs + k2].sort_index;
3793 if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3794 goto allocate_in_reg;
3798 i_preferred_regs = o_preferred_regs;
3802 temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3805 if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3806 /* nothing to do : the constraint is satisfied */
3809 /* allocate a new register matching the constraint
3810 and move the temporary register into it */
3811 temp_load(s, ts, tcg_target_available_regs[ts->type],
3812 i_allocated_regs, 0);
3813 reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3814 o_preferred_regs, ts->indirect_base);
3815 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3817 * Cross register class move not supported. Sync the
3818 * temp back to its slot and load from there.
3820 temp_sync(s, ts, i_allocated_regs, 0, 0);
3821 tcg_out_ld(s, ts->type, reg,
3822 ts->mem_base->reg, ts->mem_offset);
3827 tcg_regset_set_reg(i_allocated_regs, reg);
3830 /* mark dead temporaries and free the associated registers */
3831 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3832 if (IS_DEAD_ARG(i)) {
3833 temp_dead(s, arg_temp(op->args[i]));
3837 if (def->flags & TCG_OPF_COND_BRANCH) {
3838 tcg_reg_alloc_cbranch(s, i_allocated_regs);
3839 } else if (def->flags & TCG_OPF_BB_END) {
3840 tcg_reg_alloc_bb_end(s, i_allocated_regs);
3842 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3843 /* XXX: permit generic clobber register list ? */
3844 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3845 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3846 tcg_reg_free(s, i, i_allocated_regs);
3850 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3851 /* sync globals if the op has side effects and might trigger
3853 sync_globals(s, i_allocated_regs);
3856 /* satisfy the output constraints */
3857 for(k = 0; k < nb_oargs; k++) {
3858 i = def->args_ct[k].sort_index;
3860 arg_ct = &def->args_ct[i];
3863 /* ENV should not be modified. */
3864 tcg_debug_assert(!ts->fixed_reg);
3866 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3867 reg = new_args[arg_ct->alias_index];
3868 } else if (arg_ct->newreg) {
3869 reg = tcg_reg_alloc(s, arg_ct->regs,
3870 i_allocated_regs | o_allocated_regs,
3871 op->output_pref[k], ts->indirect_base);
3873 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3874 op->output_pref[k], ts->indirect_base);
3876 tcg_regset_set_reg(o_allocated_regs, reg);
3877 if (ts->val_type == TEMP_VAL_REG) {
3878 s->reg_to_temp[ts->reg] = NULL;
3880 ts->val_type = TEMP_VAL_REG;
3883 * Temp value is modified, so the value kept in memory is
3884 * potentially not the same.
3886 ts->mem_coherent = 0;
3887 s->reg_to_temp[reg] = ts;
3892 /* emit instruction */
3893 if (def->flags & TCG_OPF_VECTOR) {
3894 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3895 new_args, const_args);
3897 tcg_out_op(s, op->opc, new_args, const_args);
3900 /* move the outputs in the correct register if needed */
3901 for(i = 0; i < nb_oargs; i++) {
3902 ts = arg_temp(op->args[i]);
3904 /* ENV should not be modified. */
3905 tcg_debug_assert(!ts->fixed_reg);
3907 if (NEED_SYNC_ARG(i)) {
3908 temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3909 } else if (IS_DEAD_ARG(i)) {
3915 #ifdef TCG_TARGET_STACK_GROWSUP
3916 #define STACK_DIR(x) (-(x))
3918 #define STACK_DIR(x) (x)
3921 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3923 const int nb_oargs = TCGOP_CALLO(op);
3924 const int nb_iargs = TCGOP_CALLI(op);
3925 const TCGLifeData arg_life = op->life;
3926 int flags, nb_regs, i;
3930 intptr_t stack_offset;
3931 size_t call_stack_size;
3932 tcg_insn_unit *func_addr;
3934 TCGRegSet allocated_regs;
3936 func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3937 flags = op->args[nb_oargs + nb_iargs + 1];
3939 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3940 if (nb_regs > nb_iargs) {
3944 /* assign stack slots first */
3945 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3946 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3947 ~(TCG_TARGET_STACK_ALIGN - 1);
3948 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3949 if (allocate_args) {
3950 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3951 preallocate call stack */
3955 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3956 for (i = nb_regs; i < nb_iargs; i++) {
3957 arg = op->args[nb_oargs + i];
3958 #ifdef TCG_TARGET_STACK_GROWSUP
3959 stack_offset -= sizeof(tcg_target_long);
3961 if (arg != TCG_CALL_DUMMY_ARG) {
3963 temp_load(s, ts, tcg_target_available_regs[ts->type],
3964 s->reserved_regs, 0);
3965 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3967 #ifndef TCG_TARGET_STACK_GROWSUP
3968 stack_offset += sizeof(tcg_target_long);
3972 /* assign input registers */
3973 allocated_regs = s->reserved_regs;
3974 for (i = 0; i < nb_regs; i++) {
3975 arg = op->args[nb_oargs + i];
3976 if (arg != TCG_CALL_DUMMY_ARG) {
3978 reg = tcg_target_call_iarg_regs[i];
3980 if (ts->val_type == TEMP_VAL_REG) {
3981 if (ts->reg != reg) {
3982 tcg_reg_free(s, reg, allocated_regs);
3983 if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3985 * Cross register class move not supported. Sync the
3986 * temp back to its slot and load from there.
3988 temp_sync(s, ts, allocated_regs, 0, 0);
3989 tcg_out_ld(s, ts->type, reg,
3990 ts->mem_base->reg, ts->mem_offset);
3994 TCGRegSet arg_set = 0;
3996 tcg_reg_free(s, reg, allocated_regs);
3997 tcg_regset_set_reg(arg_set, reg);
3998 temp_load(s, ts, arg_set, allocated_regs, 0);
4001 tcg_regset_set_reg(allocated_regs, reg);
4005 /* mark dead temporaries and free the associated registers */
4006 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4007 if (IS_DEAD_ARG(i)) {
4008 temp_dead(s, arg_temp(op->args[i]));
4012 /* clobber call registers */
4013 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4014 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4015 tcg_reg_free(s, i, allocated_regs);
4019 /* Save globals if they might be written by the helper, sync them if
4020 they might be read. */
4021 if (flags & TCG_CALL_NO_READ_GLOBALS) {
4023 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4024 sync_globals(s, allocated_regs);
4026 save_globals(s, allocated_regs);
4029 tcg_out_call(s, func_addr);
4031 /* assign output registers and emit moves if needed */
4032 for(i = 0; i < nb_oargs; i++) {
4036 /* ENV should not be modified. */
4037 tcg_debug_assert(!ts->fixed_reg);
4039 reg = tcg_target_call_oarg_regs[i];
4040 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4041 if (ts->val_type == TEMP_VAL_REG) {
4042 s->reg_to_temp[ts->reg] = NULL;
4044 ts->val_type = TEMP_VAL_REG;
4046 ts->mem_coherent = 0;
4047 s->reg_to_temp[reg] = ts;
4048 if (NEED_SYNC_ARG(i)) {
4049 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4050 } else if (IS_DEAD_ARG(i)) {
4056 #ifdef CONFIG_PROFILER
4058 /* avoid copy/paste errors */
4059 #define PROF_ADD(to, from, field) \
4061 (to)->field += qatomic_read(&((from)->field)); \
4064 #define PROF_MAX(to, from, field) \
4066 typeof((from)->field) val__ = qatomic_read(&((from)->field)); \
4067 if (val__ > (to)->field) { \
4068 (to)->field = val__; \
4072 /* Pass in a zero'ed @prof */
4074 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4076 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4079 for (i = 0; i < n_ctxs; i++) {
4080 TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4081 const TCGProfile *orig = &s->prof;
4084 PROF_ADD(prof, orig, cpu_exec_time);
4085 PROF_ADD(prof, orig, tb_count1);
4086 PROF_ADD(prof, orig, tb_count);
4087 PROF_ADD(prof, orig, op_count);
4088 PROF_MAX(prof, orig, op_count_max);
4089 PROF_ADD(prof, orig, temp_count);
4090 PROF_MAX(prof, orig, temp_count_max);
4091 PROF_ADD(prof, orig, del_op_count);
4092 PROF_ADD(prof, orig, code_in_len);
4093 PROF_ADD(prof, orig, code_out_len);
4094 PROF_ADD(prof, orig, search_out_len);
4095 PROF_ADD(prof, orig, interm_time);
4096 PROF_ADD(prof, orig, code_time);
4097 PROF_ADD(prof, orig, la_time);
4098 PROF_ADD(prof, orig, opt_time);
4099 PROF_ADD(prof, orig, restore_count);
4100 PROF_ADD(prof, orig, restore_time);
4105 for (i = 0; i < NB_OPS; i++) {
4106 PROF_ADD(prof, orig, table_op_count[i]);
4115 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4117 tcg_profile_snapshot(prof, true, false);
4120 static void tcg_profile_snapshot_table(TCGProfile *prof)
4122 tcg_profile_snapshot(prof, false, true);
4125 void tcg_dump_op_count(void)
4127 TCGProfile prof = {};
4130 tcg_profile_snapshot_table(&prof);
4131 for (i = 0; i < NB_OPS; i++) {
4132 qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4133 prof.table_op_count[i]);
4137 int64_t tcg_cpu_exec_time(void)
4139 unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4143 for (i = 0; i < n_ctxs; i++) {
4144 const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4145 const TCGProfile *prof = &s->prof;
4147 ret += qatomic_read(&prof->cpu_exec_time);
4152 void tcg_dump_op_count(void)
4154 qemu_printf("[TCG profiler not compiled]\n");
4157 int64_t tcg_cpu_exec_time(void)
4159 error_report("%s: TCG profiler not compiled", __func__);
4165 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4167 #ifdef CONFIG_PROFILER
4168 TCGProfile *prof = &s->prof;
4173 #ifdef CONFIG_PROFILER
4177 QTAILQ_FOREACH(op, &s->ops, link) {
4180 qatomic_set(&prof->op_count, prof->op_count + n);
4181 if (n > prof->op_count_max) {
4182 qatomic_set(&prof->op_count_max, n);
4186 qatomic_set(&prof->temp_count, prof->temp_count + n);
4187 if (n > prof->temp_count_max) {
4188 qatomic_set(&prof->temp_count_max, n);
4194 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4195 && qemu_log_in_addr_range(tb->pc))) {
4196 FILE *logfile = qemu_log_lock();
4198 tcg_dump_ops(s, false);
4200 qemu_log_unlock(logfile);
4204 #ifdef CONFIG_DEBUG_TCG
4205 /* Ensure all labels referenced have been emitted. */
4210 QSIMPLEQ_FOREACH(l, &s->labels, next) {
4211 if (unlikely(!l->present) && l->refs) {
4212 qemu_log_mask(CPU_LOG_TB_OP,
4213 "$L%d referenced but not present.\n", l->id);
4221 #ifdef CONFIG_PROFILER
4222 qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4225 #ifdef USE_TCG_OPTIMIZATIONS
4229 #ifdef CONFIG_PROFILER
4230 qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4231 qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4234 reachable_code_pass(s);
4237 if (s->nb_indirects > 0) {
4239 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4240 && qemu_log_in_addr_range(tb->pc))) {
4241 FILE *logfile = qemu_log_lock();
4242 qemu_log("OP before indirect lowering:\n");
4243 tcg_dump_ops(s, false);
4245 qemu_log_unlock(logfile);
4248 /* Replace indirect temps with direct temps. */
4249 if (liveness_pass_2(s)) {
4250 /* If changes were made, re-run liveness. */
4255 #ifdef CONFIG_PROFILER
4256 qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4260 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4261 && qemu_log_in_addr_range(tb->pc))) {
4262 FILE *logfile = qemu_log_lock();
4263 qemu_log("OP after optimization and liveness analysis:\n");
4264 tcg_dump_ops(s, true);
4266 qemu_log_unlock(logfile);
4270 tcg_reg_alloc_start(s);
4273 * Reset the buffer pointers when restarting after overflow.
4274 * TODO: Move this into translate-all.c with the rest of the
4275 * buffer management. Having only this done here is confusing.
4277 s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4278 s->code_ptr = s->code_buf;
4280 #ifdef TCG_TARGET_NEED_LDST_LABELS
4281 QSIMPLEQ_INIT(&s->ldst_labels);
4283 #ifdef TCG_TARGET_NEED_POOL_LABELS
4284 s->pool_labels = NULL;
4288 QTAILQ_FOREACH(op, &s->ops, link) {
4289 TCGOpcode opc = op->opc;
4291 #ifdef CONFIG_PROFILER
4292 qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4296 case INDEX_op_mov_i32:
4297 case INDEX_op_mov_i64:
4298 case INDEX_op_mov_vec:
4299 tcg_reg_alloc_mov(s, op);
4301 case INDEX_op_movi_i32:
4302 case INDEX_op_movi_i64:
4303 case INDEX_op_dupi_vec:
4304 tcg_reg_alloc_movi(s, op);
4306 case INDEX_op_dup_vec:
4307 tcg_reg_alloc_dup(s, op);
4309 case INDEX_op_insn_start:
4310 if (num_insns >= 0) {
4311 size_t off = tcg_current_code_size(s);
4312 s->gen_insn_end_off[num_insns] = off;
4313 /* Assert that we do not overflow our stored offset. */
4314 assert(s->gen_insn_end_off[num_insns] == off);
4317 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4319 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4320 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4324 s->gen_insn_data[num_insns][i] = a;
4327 case INDEX_op_discard:
4328 temp_dead(s, arg_temp(op->args[0]));
4330 case INDEX_op_set_label:
4331 tcg_reg_alloc_bb_end(s, s->reserved_regs);
4332 tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4335 tcg_reg_alloc_call(s, op);
4338 /* Sanity check that we've not introduced any unhandled opcodes. */
4339 tcg_debug_assert(tcg_op_supported(opc));
4340 /* Note: in order to speed up the code, it would be much
4341 faster to have specialized register allocator functions for
4342 some common argument patterns */
4343 tcg_reg_alloc_op(s, op);
4346 #ifdef CONFIG_DEBUG_TCG
4349 /* Test for (pending) buffer overflow. The assumption is that any
4350 one operation beginning below the high water mark cannot overrun
4351 the buffer completely. Thus we can test for overflow after
4352 generating code without having to check during generation. */
4353 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4356 /* Test for TB overflow, as seen by gen_insn_end_off. */
4357 if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4361 tcg_debug_assert(num_insns >= 0);
4362 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4364 /* Generate TB finalization at the end of block */
4365 #ifdef TCG_TARGET_NEED_LDST_LABELS
4366 i = tcg_out_ldst_finalize(s);
4371 #ifdef TCG_TARGET_NEED_POOL_LABELS
4372 i = tcg_out_pool_finalize(s);
4377 if (!tcg_resolve_relocs(s)) {
4381 #ifndef CONFIG_TCG_INTERPRETER
4382 /* flush instruction cache */
4383 flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4384 (uintptr_t)s->code_buf,
4385 tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4388 return tcg_current_code_size(s);
4391 #ifdef CONFIG_PROFILER
4392 void tcg_dump_info(void)
4394 TCGProfile prof = {};
4395 const TCGProfile *s;
4397 int64_t tb_div_count;
4400 tcg_profile_snapshot_counters(&prof);
4402 tb_count = s->tb_count;
4403 tb_div_count = tb_count ? tb_count : 1;
4404 tot = s->interm_time + s->code_time;
4406 qemu_printf("JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4408 qemu_printf("translated TBs %" PRId64 " (aborted=%" PRId64
4410 tb_count, s->tb_count1 - tb_count,
4411 (double)(s->tb_count1 - s->tb_count)
4412 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4413 qemu_printf("avg ops/TB %0.1f max=%d\n",
4414 (double)s->op_count / tb_div_count, s->op_count_max);
4415 qemu_printf("deleted ops/TB %0.2f\n",
4416 (double)s->del_op_count / tb_div_count);
4417 qemu_printf("avg temps/TB %0.2f max=%d\n",
4418 (double)s->temp_count / tb_div_count, s->temp_count_max);
4419 qemu_printf("avg host code/TB %0.1f\n",
4420 (double)s->code_out_len / tb_div_count);
4421 qemu_printf("avg search data/TB %0.1f\n",
4422 (double)s->search_out_len / tb_div_count);
4424 qemu_printf("cycles/op %0.1f\n",
4425 s->op_count ? (double)tot / s->op_count : 0);
4426 qemu_printf("cycles/in byte %0.1f\n",
4427 s->code_in_len ? (double)tot / s->code_in_len : 0);
4428 qemu_printf("cycles/out byte %0.1f\n",
4429 s->code_out_len ? (double)tot / s->code_out_len : 0);
4430 qemu_printf("cycles/search byte %0.1f\n",
4431 s->search_out_len ? (double)tot / s->search_out_len : 0);
4435 qemu_printf(" gen_interm time %0.1f%%\n",
4436 (double)s->interm_time / tot * 100.0);
4437 qemu_printf(" gen_code time %0.1f%%\n",
4438 (double)s->code_time / tot * 100.0);
4439 qemu_printf("optim./code time %0.1f%%\n",
4440 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4442 qemu_printf("liveness/code time %0.1f%%\n",
4443 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4444 qemu_printf("cpu_restore count %" PRId64 "\n",
4446 qemu_printf(" avg cycles %0.1f\n",
4447 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4450 void tcg_dump_info(void)
4452 qemu_printf("[TCG profiler not compiled]\n");
4456 #ifdef ELF_HOST_MACHINE
4457 /* In order to use this feature, the backend needs to do three things:
4459 (1) Define ELF_HOST_MACHINE to indicate both what value to
4460 put into the ELF image and to indicate support for the feature.
4462 (2) Define tcg_register_jit. This should create a buffer containing
4463 the contents of a .debug_frame section that describes the post-
4464 prologue unwind info for the tcg machine.
4466 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4469 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
4476 struct jit_code_entry {
4477 struct jit_code_entry *next_entry;
4478 struct jit_code_entry *prev_entry;
4479 const void *symfile_addr;
4480 uint64_t symfile_size;
4483 struct jit_descriptor {
4485 uint32_t action_flag;
4486 struct jit_code_entry *relevant_entry;
4487 struct jit_code_entry *first_entry;
4490 void __jit_debug_register_code(void) __attribute__((noinline));
4491 void __jit_debug_register_code(void)
4496 /* Must statically initialize the version, because GDB may check
4497 the version before we can set it. */
4498 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4500 /* End GDB interface. */
4502 static int find_string(const char *strtab, const char *str)
4504 const char *p = strtab + 1;
4507 if (strcmp(p, str) == 0) {
4514 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4515 const void *debug_frame,
4516 size_t debug_frame_size)
4518 struct __attribute__((packed)) DebugInfo {
4525 uintptr_t cu_low_pc;
4526 uintptr_t cu_high_pc;
4529 uintptr_t fn_low_pc;
4530 uintptr_t fn_high_pc;
4539 struct DebugInfo di;
4544 struct ElfImage *img;
4546 static const struct ElfImage img_template = {
4548 .e_ident[EI_MAG0] = ELFMAG0,
4549 .e_ident[EI_MAG1] = ELFMAG1,
4550 .e_ident[EI_MAG2] = ELFMAG2,
4551 .e_ident[EI_MAG3] = ELFMAG3,
4552 .e_ident[EI_CLASS] = ELF_CLASS,
4553 .e_ident[EI_DATA] = ELF_DATA,
4554 .e_ident[EI_VERSION] = EV_CURRENT,
4556 .e_machine = ELF_HOST_MACHINE,
4557 .e_version = EV_CURRENT,
4558 .e_phoff = offsetof(struct ElfImage, phdr),
4559 .e_shoff = offsetof(struct ElfImage, shdr),
4560 .e_ehsize = sizeof(ElfW(Shdr)),
4561 .e_phentsize = sizeof(ElfW(Phdr)),
4563 .e_shentsize = sizeof(ElfW(Shdr)),
4564 .e_shnum = ARRAY_SIZE(img->shdr),
4565 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4566 #ifdef ELF_HOST_FLAGS
4567 .e_flags = ELF_HOST_FLAGS,
4570 .e_ident[EI_OSABI] = ELF_OSABI,
4578 [0] = { .sh_type = SHT_NULL },
4579 /* Trick: The contents of code_gen_buffer are not present in
4580 this fake ELF file; that got allocated elsewhere. Therefore
4581 we mark .text as SHT_NOBITS (similar to .bss) so that readers
4582 will not look for contents. We can record any address. */
4584 .sh_type = SHT_NOBITS,
4585 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4587 [2] = { /* .debug_info */
4588 .sh_type = SHT_PROGBITS,
4589 .sh_offset = offsetof(struct ElfImage, di),
4590 .sh_size = sizeof(struct DebugInfo),
4592 [3] = { /* .debug_abbrev */
4593 .sh_type = SHT_PROGBITS,
4594 .sh_offset = offsetof(struct ElfImage, da),
4595 .sh_size = sizeof(img->da),
4597 [4] = { /* .debug_frame */
4598 .sh_type = SHT_PROGBITS,
4599 .sh_offset = sizeof(struct ElfImage),
4601 [5] = { /* .symtab */
4602 .sh_type = SHT_SYMTAB,
4603 .sh_offset = offsetof(struct ElfImage, sym),
4604 .sh_size = sizeof(img->sym),
4606 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4607 .sh_entsize = sizeof(ElfW(Sym)),
4609 [6] = { /* .strtab */
4610 .sh_type = SHT_STRTAB,
4611 .sh_offset = offsetof(struct ElfImage, str),
4612 .sh_size = sizeof(img->str),
4616 [1] = { /* code_gen_buffer */
4617 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4622 .len = sizeof(struct DebugInfo) - 4,
4624 .ptr_size = sizeof(void *),
4626 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
4628 .fn_name = "code_gen_buffer"
4631 1, /* abbrev number (the cu) */
4632 0x11, 1, /* DW_TAG_compile_unit, has children */
4633 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
4634 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4635 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4636 0, 0, /* end of abbrev */
4637 2, /* abbrev number (the fn) */
4638 0x2e, 0, /* DW_TAG_subprogram, no children */
4639 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
4640 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
4641 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
4642 0, 0, /* end of abbrev */
4643 0 /* no more abbrev */
4645 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4646 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4649 /* We only need a single jit entry; statically allocate it. */
4650 static struct jit_code_entry one_entry;
4652 uintptr_t buf = (uintptr_t)buf_ptr;
4653 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4654 DebugFrameHeader *dfh;
4656 img = g_malloc(img_size);
4657 *img = img_template;
4659 img->phdr.p_vaddr = buf;
4660 img->phdr.p_paddr = buf;
4661 img->phdr.p_memsz = buf_size;
4663 img->shdr[1].sh_name = find_string(img->str, ".text");
4664 img->shdr[1].sh_addr = buf;
4665 img->shdr[1].sh_size = buf_size;
4667 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4668 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4670 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4671 img->shdr[4].sh_size = debug_frame_size;
4673 img->shdr[5].sh_name = find_string(img->str, ".symtab");
4674 img->shdr[6].sh_name = find_string(img->str, ".strtab");
4676 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4677 img->sym[1].st_value = buf;
4678 img->sym[1].st_size = buf_size;
4680 img->di.cu_low_pc = buf;
4681 img->di.cu_high_pc = buf + buf_size;
4682 img->di.fn_low_pc = buf;
4683 img->di.fn_high_pc = buf + buf_size;
4685 dfh = (DebugFrameHeader *)(img + 1);
4686 memcpy(dfh, debug_frame, debug_frame_size);
4687 dfh->fde.func_start = buf;
4688 dfh->fde.func_len = buf_size;
4691 /* Enable this block to be able to debug the ELF image file creation.
4692 One can use readelf, objdump, or other inspection utilities. */
4694 FILE *f = fopen("/tmp/qemu.jit", "w+b");
4696 if (fwrite(img, img_size, 1, f) != img_size) {
4697 /* Avoid stupid unused return value warning for fwrite. */
4704 one_entry.symfile_addr = img;
4705 one_entry.symfile_size = img_size;
4707 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4708 __jit_debug_descriptor.relevant_entry = &one_entry;
4709 __jit_debug_descriptor.first_entry = &one_entry;
4710 __jit_debug_register_code();
4713 /* No support for the feature. Provide the entry point expected by exec.c,
4714 and implement the internal function we declared earlier. */
4716 static void tcg_register_jit_int(void *buf, size_t size,
4717 const void *debug_frame,
4718 size_t debug_frame_size)
4722 void tcg_register_jit(void *buf, size_t buf_size)
4725 #endif /* ELF_HOST_MACHINE */
4727 #if !TCG_TARGET_MAYBE_vec
4728 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4730 g_assert_not_reached();