2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu-common.h"
40 #include "qemu/cache-utils.h"
41 #include "qemu/host-utils.h"
42 #include "qemu/timer.h"
44 /* Note: the long term plan is to reduce the dependencies on the QEMU
45 CPU definitions. Currently they are used for qemu_ld/st
47 #define NO_CPU_IO_DEFS
52 #if UINTPTR_MAX == UINT32_MAX
53 # define ELF_CLASS ELFCLASS32
55 # define ELF_CLASS ELFCLASS64
57 #ifdef HOST_WORDS_BIGENDIAN
58 # define ELF_DATA ELFDATA2MSB
60 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint8_t return_column;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
87 } DebugFrameFDEHeader;
89 typedef struct QEMU_PACKED {
91 DebugFrameFDEHeader fde;
94 static void tcg_register_jit_int(void *buf, size_t size,
95 const void *debug_frame,
96 size_t debug_frame_size)
97 __attribute__((unused));
99 /* Forward declarations for functions declared and used in tcg-target.c. */
100 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105 TCGReg ret, tcg_target_long arg);
106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
107 const int *const_args);
108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
111 static int tcg_target_const_match(tcg_target_long val, TCGType type,
112 const TCGArgConstraint *arg_ct);
113 static void tcg_out_tb_init(TCGContext *s);
114 static void tcg_out_tb_finalize(TCGContext *s);
117 TCGOpDef tcg_op_defs[] = {
118 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
122 const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
124 static TCGRegSet tcg_target_available_regs[2];
125 static TCGRegSet tcg_target_call_clobber_regs;
127 #if TCG_TARGET_INSN_UNIT_SIZE == 1
128 static inline void tcg_out8(TCGContext *s, uint8_t v)
133 static inline void tcg_patch8(tcg_insn_unit *p, uint8_t v)
139 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
140 static inline void tcg_out16(TCGContext *s, uint16_t v)
142 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
145 tcg_insn_unit *p = s->code_ptr;
146 memcpy(p, &v, sizeof(v));
147 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
151 static inline void tcg_patch16(tcg_insn_unit *p, uint16_t v)
153 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
156 memcpy(p, &v, sizeof(v));
161 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
162 static inline void tcg_out32(TCGContext *s, uint32_t v)
164 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
167 tcg_insn_unit *p = s->code_ptr;
168 memcpy(p, &v, sizeof(v));
169 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
173 static inline void tcg_patch32(tcg_insn_unit *p, uint32_t v)
175 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
178 memcpy(p, &v, sizeof(v));
183 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
184 static inline void tcg_out64(TCGContext *s, uint64_t v)
186 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
189 tcg_insn_unit *p = s->code_ptr;
190 memcpy(p, &v, sizeof(v));
191 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
195 static inline void tcg_patch64(tcg_insn_unit *p, uint64_t v)
197 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
200 memcpy(p, &v, sizeof(v));
205 /* label relocation processing */
207 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
208 int label_index, intptr_t addend)
213 l = &s->labels[label_index];
215 /* FIXME: This may break relocations on RISC targets that
216 modify instruction fields in place. The caller may not have
217 written the initial value. */
218 patch_reloc(code_ptr, type, l->u.value, addend);
220 /* add a new relocation entry */
221 r = tcg_malloc(sizeof(TCGRelocation));
225 r->next = l->u.first_reloc;
226 l->u.first_reloc = r;
230 static void tcg_out_label(TCGContext *s, int label_index, tcg_insn_unit *ptr)
232 TCGLabel *l = &s->labels[label_index];
233 intptr_t value = (intptr_t)ptr;
236 assert(!l->has_value);
238 for (r = l->u.first_reloc; r != NULL; r = r->next) {
239 patch_reloc(r->ptr, r->type, value, r->addend);
243 l->u.value_ptr = ptr;
246 int gen_new_label(void)
248 TCGContext *s = &tcg_ctx;
252 if (s->nb_labels >= TCG_MAX_LABELS)
254 idx = s->nb_labels++;
257 l->u.first_reloc = NULL;
261 #include "tcg-target.c"
263 /* pool based memory allocation */
264 void *tcg_malloc_internal(TCGContext *s, int size)
269 if (size > TCG_POOL_CHUNK_SIZE) {
270 /* big malloc: insert a new pool (XXX: could optimize) */
271 p = g_malloc(sizeof(TCGPool) + size);
273 p->next = s->pool_first_large;
274 s->pool_first_large = p;
285 pool_size = TCG_POOL_CHUNK_SIZE;
286 p = g_malloc(sizeof(TCGPool) + pool_size);
290 s->pool_current->next = p;
299 s->pool_cur = p->data + size;
300 s->pool_end = p->data + p->size;
304 void tcg_pool_reset(TCGContext *s)
307 for (p = s->pool_first_large; p; p = t) {
311 s->pool_first_large = NULL;
312 s->pool_cur = s->pool_end = NULL;
313 s->pool_current = NULL;
316 typedef struct TCGHelperInfo {
323 #include "exec/helper-proto.h"
325 static const TCGHelperInfo all_helpers[] = {
326 #include "exec/helper-tcg.h"
329 void tcg_context_init(TCGContext *s)
331 int op, total_args, n, i;
333 TCGArgConstraint *args_ct;
335 GHashTable *helper_table;
337 memset(s, 0, sizeof(*s));
340 /* Count total number of arguments and allocate the corresponding
343 for(op = 0; op < NB_OPS; op++) {
344 def = &tcg_op_defs[op];
345 n = def->nb_iargs + def->nb_oargs;
349 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
350 sorted_args = g_malloc(sizeof(int) * total_args);
352 for(op = 0; op < NB_OPS; op++) {
353 def = &tcg_op_defs[op];
354 def->args_ct = args_ct;
355 def->sorted_args = sorted_args;
356 n = def->nb_iargs + def->nb_oargs;
361 /* Register helpers. */
362 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
363 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
365 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
366 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
367 (gpointer)&all_helpers[i]);
373 void tcg_prologue_init(TCGContext *s)
375 /* init global prologue and epilogue */
376 s->code_buf = s->code_gen_prologue;
377 s->code_ptr = s->code_buf;
378 tcg_target_qemu_prologue(s);
379 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
382 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
383 size_t size = tcg_current_code_size(s);
384 qemu_log("PROLOGUE: [size=%zu]\n", size);
385 log_disas(s->code_buf, size);
392 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size)
394 s->frame_start = start;
395 s->frame_end = start + size;
399 void tcg_func_start(TCGContext *s)
402 s->nb_temps = s->nb_globals;
404 /* No temps have been previously allocated for size or locality. */
405 memset(s->free_temps, 0, sizeof(s->free_temps));
407 s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
409 s->current_frame_offset = s->frame_start;
411 #ifdef CONFIG_DEBUG_TCG
412 s->goto_tb_issue_mask = 0;
415 s->gen_opc_ptr = s->gen_opc_buf;
416 s->gen_opparam_ptr = s->gen_opparam_buf;
418 s->be = tcg_malloc(sizeof(TCGBackendData));
421 static inline void tcg_temp_alloc(TCGContext *s, int n)
423 if (n > TCG_MAX_TEMPS)
427 static inline int tcg_global_reg_new_internal(TCGType type, int reg,
430 TCGContext *s = &tcg_ctx;
434 #if TCG_TARGET_REG_BITS == 32
435 if (type != TCG_TYPE_I32)
438 if (tcg_regset_test_reg(s->reserved_regs, reg))
441 tcg_temp_alloc(s, s->nb_globals + 1);
442 ts = &s->temps[s->nb_globals];
443 ts->base_type = type;
449 tcg_regset_set_reg(s->reserved_regs, reg);
453 TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name)
457 idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name);
458 return MAKE_TCGV_I32(idx);
461 TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name)
465 idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name);
466 return MAKE_TCGV_I64(idx);
469 static inline int tcg_global_mem_new_internal(TCGType type, int reg,
473 TCGContext *s = &tcg_ctx;
478 #if TCG_TARGET_REG_BITS == 32
479 if (type == TCG_TYPE_I64) {
481 tcg_temp_alloc(s, s->nb_globals + 2);
482 ts = &s->temps[s->nb_globals];
483 ts->base_type = type;
484 ts->type = TCG_TYPE_I32;
486 ts->mem_allocated = 1;
488 #ifdef HOST_WORDS_BIGENDIAN
489 ts->mem_offset = offset + 4;
491 ts->mem_offset = offset;
493 pstrcpy(buf, sizeof(buf), name);
494 pstrcat(buf, sizeof(buf), "_0");
495 ts->name = strdup(buf);
498 ts->base_type = type;
499 ts->type = TCG_TYPE_I32;
501 ts->mem_allocated = 1;
503 #ifdef HOST_WORDS_BIGENDIAN
504 ts->mem_offset = offset;
506 ts->mem_offset = offset + 4;
508 pstrcpy(buf, sizeof(buf), name);
509 pstrcat(buf, sizeof(buf), "_1");
510 ts->name = strdup(buf);
516 tcg_temp_alloc(s, s->nb_globals + 1);
517 ts = &s->temps[s->nb_globals];
518 ts->base_type = type;
521 ts->mem_allocated = 1;
523 ts->mem_offset = offset;
530 TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name)
532 int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
533 return MAKE_TCGV_I32(idx);
536 TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name)
538 int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
539 return MAKE_TCGV_I64(idx);
542 static inline int tcg_temp_new_internal(TCGType type, int temp_local)
544 TCGContext *s = &tcg_ctx;
548 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
549 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
550 if (idx < TCG_MAX_TEMPS) {
551 /* There is already an available temp with the right type. */
552 clear_bit(idx, s->free_temps[k].l);
555 ts->temp_allocated = 1;
556 assert(ts->base_type == type);
557 assert(ts->temp_local == temp_local);
560 #if TCG_TARGET_REG_BITS == 32
561 if (type == TCG_TYPE_I64) {
562 tcg_temp_alloc(s, s->nb_temps + 2);
563 ts = &s->temps[s->nb_temps];
564 ts->base_type = type;
565 ts->type = TCG_TYPE_I32;
566 ts->temp_allocated = 1;
567 ts->temp_local = temp_local;
570 ts->base_type = type;
571 ts->type = TCG_TYPE_I32;
572 ts->temp_allocated = 1;
573 ts->temp_local = temp_local;
579 tcg_temp_alloc(s, s->nb_temps + 1);
580 ts = &s->temps[s->nb_temps];
581 ts->base_type = type;
583 ts->temp_allocated = 1;
584 ts->temp_local = temp_local;
590 #if defined(CONFIG_DEBUG_TCG)
596 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
600 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
601 return MAKE_TCGV_I32(idx);
604 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
608 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
609 return MAKE_TCGV_I64(idx);
612 static void tcg_temp_free_internal(int idx)
614 TCGContext *s = &tcg_ctx;
618 #if defined(CONFIG_DEBUG_TCG)
620 if (s->temps_in_use < 0) {
621 fprintf(stderr, "More temporaries freed than allocated!\n");
625 assert(idx >= s->nb_globals && idx < s->nb_temps);
627 assert(ts->temp_allocated != 0);
628 ts->temp_allocated = 0;
630 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
631 set_bit(idx, s->free_temps[k].l);
634 void tcg_temp_free_i32(TCGv_i32 arg)
636 tcg_temp_free_internal(GET_TCGV_I32(arg));
639 void tcg_temp_free_i64(TCGv_i64 arg)
641 tcg_temp_free_internal(GET_TCGV_I64(arg));
644 TCGv_i32 tcg_const_i32(int32_t val)
647 t0 = tcg_temp_new_i32();
648 tcg_gen_movi_i32(t0, val);
652 TCGv_i64 tcg_const_i64(int64_t val)
655 t0 = tcg_temp_new_i64();
656 tcg_gen_movi_i64(t0, val);
660 TCGv_i32 tcg_const_local_i32(int32_t val)
663 t0 = tcg_temp_local_new_i32();
664 tcg_gen_movi_i32(t0, val);
668 TCGv_i64 tcg_const_local_i64(int64_t val)
671 t0 = tcg_temp_local_new_i64();
672 tcg_gen_movi_i64(t0, val);
676 #if defined(CONFIG_DEBUG_TCG)
677 void tcg_clear_temp_count(void)
679 TCGContext *s = &tcg_ctx;
683 int tcg_check_temp_count(void)
685 TCGContext *s = &tcg_ctx;
686 if (s->temps_in_use) {
687 /* Clear the count so that we don't give another
688 * warning immediately next time around.
697 /* Note: we convert the 64 bit args to 32 bit and do some alignment
698 and endian swap. Maybe it would be better to do the alignment
699 and endian swap in tcg_reg_alloc_call(). */
700 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
701 int nargs, TCGArg *args)
703 int i, real_args, nb_rets;
704 unsigned sizemask, flags;
708 info = g_hash_table_lookup(s->helpers, (gpointer)func);
710 sizemask = info->sizemask;
712 #if defined(__sparc__) && !defined(__arch64__) \
713 && !defined(CONFIG_TCG_INTERPRETER)
714 /* We have 64-bit values in one register, but need to pass as two
715 separate parameters. Split them. */
716 int orig_sizemask = sizemask;
717 int orig_nargs = nargs;
720 TCGV_UNUSED_I64(retl);
721 TCGV_UNUSED_I64(reth);
723 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
724 for (i = real_args = 0; i < nargs; ++i) {
725 int is_64bit = sizemask & (1 << (i+1)*2);
727 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
728 TCGv_i32 h = tcg_temp_new_i32();
729 TCGv_i32 l = tcg_temp_new_i32();
730 tcg_gen_extr_i64_i32(l, h, orig);
731 split_args[real_args++] = GET_TCGV_I32(h);
732 split_args[real_args++] = GET_TCGV_I32(l);
734 split_args[real_args++] = args[i];
741 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
742 for (i = 0; i < nargs; ++i) {
743 int is_64bit = sizemask & (1 << (i+1)*2);
744 int is_signed = sizemask & (2 << (i+1)*2);
746 TCGv_i64 temp = tcg_temp_new_i64();
747 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
749 tcg_gen_ext32s_i64(temp, orig);
751 tcg_gen_ext32u_i64(temp, orig);
753 args[i] = GET_TCGV_I64(temp);
756 #endif /* TCG_TARGET_EXTEND_ARGS */
758 *s->gen_opc_ptr++ = INDEX_op_call;
759 nparam = s->gen_opparam_ptr++;
760 if (ret != TCG_CALL_DUMMY_ARG) {
761 #if defined(__sparc__) && !defined(__arch64__) \
762 && !defined(CONFIG_TCG_INTERPRETER)
763 if (orig_sizemask & 1) {
764 /* The 32-bit ABI is going to return the 64-bit value in
765 the %o0/%o1 register pair. Prepare for this by using
766 two return temporaries, and reassemble below. */
767 retl = tcg_temp_new_i64();
768 reth = tcg_temp_new_i64();
769 *s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
770 *s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
773 *s->gen_opparam_ptr++ = ret;
777 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
778 #ifdef HOST_WORDS_BIGENDIAN
779 *s->gen_opparam_ptr++ = ret + 1;
780 *s->gen_opparam_ptr++ = ret;
782 *s->gen_opparam_ptr++ = ret;
783 *s->gen_opparam_ptr++ = ret + 1;
787 *s->gen_opparam_ptr++ = ret;
795 for (i = 0; i < nargs; i++) {
796 int is_64bit = sizemask & (1 << (i+1)*2);
797 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
798 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
799 /* some targets want aligned 64 bit args */
801 *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
805 /* If stack grows up, then we will be placing successive
806 arguments at lower addresses, which means we need to
807 reverse the order compared to how we would normally
808 treat either big or little-endian. For those arguments
809 that will wind up in registers, this still works for
810 HPPA (the only current STACK_GROWSUP target) since the
811 argument registers are *also* allocated in decreasing
812 order. If another such target is added, this logic may
813 have to get more complicated to differentiate between
814 stack arguments and register arguments. */
815 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
816 *s->gen_opparam_ptr++ = args[i] + 1;
817 *s->gen_opparam_ptr++ = args[i];
819 *s->gen_opparam_ptr++ = args[i];
820 *s->gen_opparam_ptr++ = args[i] + 1;
826 *s->gen_opparam_ptr++ = args[i];
829 *s->gen_opparam_ptr++ = (uintptr_t)func;
830 *s->gen_opparam_ptr++ = flags;
832 *nparam = (nb_rets << 16) | real_args;
834 /* total parameters, needed to go backward in the instruction stream */
835 *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
837 #if defined(__sparc__) && !defined(__arch64__) \
838 && !defined(CONFIG_TCG_INTERPRETER)
839 /* Free all of the parts we allocated above. */
840 for (i = real_args = 0; i < orig_nargs; ++i) {
841 int is_64bit = orig_sizemask & (1 << (i+1)*2);
843 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
844 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
845 tcg_temp_free_i32(h);
846 tcg_temp_free_i32(l);
851 if (orig_sizemask & 1) {
852 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
853 Note that describing these as TCGv_i64 eliminates an unnecessary
854 zero-extension that tcg_gen_concat_i32_i64 would create. */
855 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
856 tcg_temp_free_i64(retl);
857 tcg_temp_free_i64(reth);
859 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
860 for (i = 0; i < nargs; ++i) {
861 int is_64bit = sizemask & (1 << (i+1)*2);
863 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
864 tcg_temp_free_i64(temp);
867 #endif /* TCG_TARGET_EXTEND_ARGS */
870 #if TCG_TARGET_REG_BITS == 32
871 void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
872 int c, int right, int arith)
875 tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
876 tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
877 } else if (c >= 32) {
881 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
882 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
884 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
885 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
888 tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
889 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
894 t0 = tcg_temp_new_i32();
895 t1 = tcg_temp_new_i32();
897 tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
899 tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
901 tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
902 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
903 tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
904 tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
906 tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
907 /* Note: ret can be the same as arg1, so we use t1 */
908 tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
909 tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
910 tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
911 tcg_gen_mov_i32(TCGV_LOW(ret), t1);
913 tcg_temp_free_i32(t0);
914 tcg_temp_free_i32(t1);
919 static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
921 switch (op & MO_SIZE) {
944 static const TCGOpcode old_ld_opc[8] = {
945 [MO_UB] = INDEX_op_qemu_ld8u,
946 [MO_SB] = INDEX_op_qemu_ld8s,
947 [MO_UW] = INDEX_op_qemu_ld16u,
948 [MO_SW] = INDEX_op_qemu_ld16s,
949 #if TCG_TARGET_REG_BITS == 32
950 [MO_UL] = INDEX_op_qemu_ld32,
951 [MO_SL] = INDEX_op_qemu_ld32,
953 [MO_UL] = INDEX_op_qemu_ld32u,
954 [MO_SL] = INDEX_op_qemu_ld32s,
956 [MO_Q] = INDEX_op_qemu_ld64,
959 static const TCGOpcode old_st_opc[4] = {
960 [MO_UB] = INDEX_op_qemu_st8,
961 [MO_UW] = INDEX_op_qemu_st16,
962 [MO_UL] = INDEX_op_qemu_st32,
963 [MO_Q] = INDEX_op_qemu_st64,
966 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
968 memop = tcg_canonicalize_memop(memop, 0, 0);
970 if (TCG_TARGET_HAS_new_ldst) {
971 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
972 tcg_add_param_i32(val);
973 tcg_add_param_tl(addr);
974 *tcg_ctx.gen_opparam_ptr++ = memop;
975 *tcg_ctx.gen_opparam_ptr++ = idx;
979 /* The old opcodes only support target-endian memory operations. */
980 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
981 assert(old_ld_opc[memop & MO_SSIZE] != 0);
983 if (TCG_TARGET_REG_BITS == 32) {
984 *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
985 tcg_add_param_i32(val);
986 tcg_add_param_tl(addr);
987 *tcg_ctx.gen_opparam_ptr++ = idx;
989 TCGv_i64 val64 = tcg_temp_new_i64();
991 *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
992 tcg_add_param_i64(val64);
993 tcg_add_param_tl(addr);
994 *tcg_ctx.gen_opparam_ptr++ = idx;
996 tcg_gen_trunc_i64_i32(val, val64);
997 tcg_temp_free_i64(val64);
1001 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
1003 memop = tcg_canonicalize_memop(memop, 0, 1);
1005 if (TCG_TARGET_HAS_new_ldst) {
1006 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
1007 tcg_add_param_i32(val);
1008 tcg_add_param_tl(addr);
1009 *tcg_ctx.gen_opparam_ptr++ = memop;
1010 *tcg_ctx.gen_opparam_ptr++ = idx;
1014 /* The old opcodes only support target-endian memory operations. */
1015 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1016 assert(old_st_opc[memop & MO_SIZE] != 0);
1018 if (TCG_TARGET_REG_BITS == 32) {
1019 *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1020 tcg_add_param_i32(val);
1021 tcg_add_param_tl(addr);
1022 *tcg_ctx.gen_opparam_ptr++ = idx;
1024 TCGv_i64 val64 = tcg_temp_new_i64();
1026 tcg_gen_extu_i32_i64(val64, val);
1028 *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1029 tcg_add_param_i64(val64);
1030 tcg_add_param_tl(addr);
1031 *tcg_ctx.gen_opparam_ptr++ = idx;
1033 tcg_temp_free_i64(val64);
1037 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
1039 memop = tcg_canonicalize_memop(memop, 1, 0);
1041 #if TCG_TARGET_REG_BITS == 32
1042 if ((memop & MO_SIZE) < MO_64) {
1043 tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1044 if (memop & MO_SIGN) {
1045 tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1047 tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1053 if (TCG_TARGET_HAS_new_ldst) {
1054 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
1055 tcg_add_param_i64(val);
1056 tcg_add_param_tl(addr);
1057 *tcg_ctx.gen_opparam_ptr++ = memop;
1058 *tcg_ctx.gen_opparam_ptr++ = idx;
1062 /* The old opcodes only support target-endian memory operations. */
1063 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1064 assert(old_ld_opc[memop & MO_SSIZE] != 0);
1066 *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
1067 tcg_add_param_i64(val);
1068 tcg_add_param_tl(addr);
1069 *tcg_ctx.gen_opparam_ptr++ = idx;
1072 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
1074 memop = tcg_canonicalize_memop(memop, 1, 1);
1076 #if TCG_TARGET_REG_BITS == 32
1077 if ((memop & MO_SIZE) < MO_64) {
1078 tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1083 if (TCG_TARGET_HAS_new_ldst) {
1084 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
1085 tcg_add_param_i64(val);
1086 tcg_add_param_tl(addr);
1087 *tcg_ctx.gen_opparam_ptr++ = memop;
1088 *tcg_ctx.gen_opparam_ptr++ = idx;
1092 /* The old opcodes only support target-endian memory operations. */
1093 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1094 assert(old_st_opc[memop & MO_SIZE] != 0);
1096 *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1097 tcg_add_param_i64(val);
1098 tcg_add_param_tl(addr);
1099 *tcg_ctx.gen_opparam_ptr++ = idx;
1102 static void tcg_reg_alloc_start(TCGContext *s)
1106 for(i = 0; i < s->nb_globals; i++) {
1108 if (ts->fixed_reg) {
1109 ts->val_type = TEMP_VAL_REG;
1111 ts->val_type = TEMP_VAL_MEM;
1114 for(i = s->nb_globals; i < s->nb_temps; i++) {
1116 if (ts->temp_local) {
1117 ts->val_type = TEMP_VAL_MEM;
1119 ts->val_type = TEMP_VAL_DEAD;
1121 ts->mem_allocated = 0;
1124 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1125 s->reg_to_temp[i] = -1;
1129 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
1134 assert(idx >= 0 && idx < s->nb_temps);
1135 ts = &s->temps[idx];
1136 if (idx < s->nb_globals) {
1137 pstrcpy(buf, buf_size, ts->name);
1140 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1142 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1147 char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg)
1149 return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg));
1152 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg)
1154 return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg));
1157 /* Find helper name. */
1158 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1160 const char *ret = NULL;
1162 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
1170 static const char * const cond_name[] =
1172 [TCG_COND_NEVER] = "never",
1173 [TCG_COND_ALWAYS] = "always",
1174 [TCG_COND_EQ] = "eq",
1175 [TCG_COND_NE] = "ne",
1176 [TCG_COND_LT] = "lt",
1177 [TCG_COND_GE] = "ge",
1178 [TCG_COND_LE] = "le",
1179 [TCG_COND_GT] = "gt",
1180 [TCG_COND_LTU] = "ltu",
1181 [TCG_COND_GEU] = "geu",
1182 [TCG_COND_LEU] = "leu",
1183 [TCG_COND_GTU] = "gtu"
1186 static const char * const ldst_name[] =
1202 void tcg_dump_ops(TCGContext *s)
1204 const uint16_t *opc_ptr;
1208 int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
1209 const TCGOpDef *def;
1213 opc_ptr = s->gen_opc_buf;
1214 args = s->gen_opparam_buf;
1215 while (opc_ptr < s->gen_opc_ptr) {
1217 def = &tcg_op_defs[c];
1218 if (c == INDEX_op_debug_insn_start) {
1220 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1221 pc = ((uint64_t)args[1] << 32) | args[0];
1228 qemu_log(" ---- 0x%" PRIx64, pc);
1230 nb_oargs = def->nb_oargs;
1231 nb_iargs = def->nb_iargs;
1232 nb_cargs = def->nb_cargs;
1233 } else if (c == INDEX_op_call) {
1236 /* variable number of arguments */
1238 nb_oargs = arg >> 16;
1239 nb_iargs = arg & 0xffff;
1240 nb_cargs = def->nb_cargs;
1242 /* function name, flags, out args */
1243 qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1244 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1245 args[nb_oargs + nb_iargs + 1], nb_oargs);
1246 for (i = 0; i < nb_oargs; i++) {
1247 qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1250 for (i = 0; i < nb_iargs; i++) {
1251 TCGArg arg = args[nb_oargs + i];
1252 const char *t = "<dummy>";
1253 if (arg != TCG_CALL_DUMMY_ARG) {
1254 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1259 qemu_log(" %s ", def->name);
1260 if (c == INDEX_op_nopn) {
1261 /* variable number of arguments */
1266 nb_oargs = def->nb_oargs;
1267 nb_iargs = def->nb_iargs;
1268 nb_cargs = def->nb_cargs;
1272 for(i = 0; i < nb_oargs; i++) {
1276 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1279 for(i = 0; i < nb_iargs; i++) {
1283 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1287 case INDEX_op_brcond_i32:
1288 case INDEX_op_setcond_i32:
1289 case INDEX_op_movcond_i32:
1290 case INDEX_op_brcond2_i32:
1291 case INDEX_op_setcond2_i32:
1292 case INDEX_op_brcond_i64:
1293 case INDEX_op_setcond_i64:
1294 case INDEX_op_movcond_i64:
1295 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1296 qemu_log(",%s", cond_name[args[k++]]);
1298 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1302 case INDEX_op_qemu_ld_i32:
1303 case INDEX_op_qemu_st_i32:
1304 case INDEX_op_qemu_ld_i64:
1305 case INDEX_op_qemu_st_i64:
1306 if (args[k] < ARRAY_SIZE(ldst_name) && ldst_name[args[k]]) {
1307 qemu_log(",%s", ldst_name[args[k++]]);
1309 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1317 for(; i < nb_cargs; i++) {
1322 qemu_log("$0x%" TCG_PRIlx, arg);
1326 args += nb_iargs + nb_oargs + nb_cargs;
1330 /* we give more priority to constraints with less registers */
1331 static int get_constraint_priority(const TCGOpDef *def, int k)
1333 const TCGArgConstraint *arg_ct;
1336 arg_ct = &def->args_ct[k];
1337 if (arg_ct->ct & TCG_CT_ALIAS) {
1338 /* an alias is equivalent to a single register */
1341 if (!(arg_ct->ct & TCG_CT_REG))
1344 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1345 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1349 return TCG_TARGET_NB_REGS - n + 1;
1352 /* sort from highest priority to lowest */
1353 static void sort_constraints(TCGOpDef *def, int start, int n)
1355 int i, j, p1, p2, tmp;
1357 for(i = 0; i < n; i++)
1358 def->sorted_args[start + i] = start + i;
1361 for(i = 0; i < n - 1; i++) {
1362 for(j = i + 1; j < n; j++) {
1363 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1364 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1366 tmp = def->sorted_args[start + i];
1367 def->sorted_args[start + i] = def->sorted_args[start + j];
1368 def->sorted_args[start + j] = tmp;
1374 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1382 if (tdefs->op == (TCGOpcode)-1)
1385 assert((unsigned)op < NB_OPS);
1386 def = &tcg_op_defs[op];
1387 #if defined(CONFIG_DEBUG_TCG)
1388 /* Duplicate entry in op definitions? */
1392 nb_args = def->nb_iargs + def->nb_oargs;
1393 for(i = 0; i < nb_args; i++) {
1394 ct_str = tdefs->args_ct_str[i];
1395 /* Incomplete TCGTargetOpDef entry? */
1396 assert(ct_str != NULL);
1397 tcg_regset_clear(def->args_ct[i].u.regs);
1398 def->args_ct[i].ct = 0;
1399 if (ct_str[0] >= '0' && ct_str[0] <= '9') {
1401 oarg = ct_str[0] - '0';
1402 assert(oarg < def->nb_oargs);
1403 assert(def->args_ct[oarg].ct & TCG_CT_REG);
1404 /* TCG_CT_ALIAS is for the output arguments. The input
1405 argument is tagged with TCG_CT_IALIAS. */
1406 def->args_ct[i] = def->args_ct[oarg];
1407 def->args_ct[oarg].ct = TCG_CT_ALIAS;
1408 def->args_ct[oarg].alias_index = i;
1409 def->args_ct[i].ct |= TCG_CT_IALIAS;
1410 def->args_ct[i].alias_index = oarg;
1413 if (*ct_str == '\0')
1417 def->args_ct[i].ct |= TCG_CT_CONST;
1421 if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
1422 fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1423 ct_str, i, def->name);
1431 /* TCGTargetOpDef entry with too much information? */
1432 assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1434 /* sort the constraints (XXX: this is just an heuristic) */
1435 sort_constraints(def, 0, def->nb_oargs);
1436 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1442 printf("%s: sorted=", def->name);
1443 for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
1444 printf(" %d", def->sorted_args[i]);
1451 #if defined(CONFIG_DEBUG_TCG)
1453 for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
1454 const TCGOpDef *def = &tcg_op_defs[op];
1455 if (def->flags & TCG_OPF_NOT_PRESENT) {
1456 /* Wrong entry in op definitions? */
1458 fprintf(stderr, "Invalid op definition for %s\n", def->name);
1462 /* Missing entry in op definitions? */
1464 fprintf(stderr, "Missing op definition for %s\n", def->name);
1475 #ifdef USE_LIVENESS_ANALYSIS
1477 /* set a nop for an operation using 'nb_args' */
1478 static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
1479 TCGArg *args, int nb_args)
1482 *opc_ptr = INDEX_op_nop;
1484 *opc_ptr = INDEX_op_nopn;
1486 args[nb_args - 1] = nb_args;
1490 /* liveness analysis: end of function: all temps are dead, and globals
1491 should be in memory. */
1492 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
1495 memset(dead_temps, 1, s->nb_temps);
1496 memset(mem_temps, 1, s->nb_globals);
1497 memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
1500 /* liveness analysis: end of basic block: all temps are dead, globals
1501 and local temps should be in memory. */
1502 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
1507 memset(dead_temps, 1, s->nb_temps);
1508 memset(mem_temps, 1, s->nb_globals);
1509 for(i = s->nb_globals; i < s->nb_temps; i++) {
1510 mem_temps[i] = s->temps[i].temp_local;
1514 /* Liveness analysis : update the opc_dead_args array to tell if a
1515 given input arguments is dead. Instructions updating dead
1516 temporaries are removed. */
1517 static void tcg_liveness_analysis(TCGContext *s)
1519 int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
1520 TCGOpcode op, op_new, op_new2;
1522 const TCGOpDef *def;
1523 uint8_t *dead_temps, *mem_temps;
1528 s->gen_opc_ptr++; /* skip end */
1530 nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
1532 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1533 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1535 dead_temps = tcg_malloc(s->nb_temps);
1536 mem_temps = tcg_malloc(s->nb_temps);
1537 tcg_la_func_end(s, dead_temps, mem_temps);
1539 args = s->gen_opparam_ptr;
1540 op_index = nb_ops - 1;
1541 while (op_index >= 0) {
1542 op = s->gen_opc_buf[op_index];
1543 def = &tcg_op_defs[op];
1552 nb_iargs = arg & 0xffff;
1553 nb_oargs = arg >> 16;
1554 call_flags = args[nb_oargs + nb_iargs + 1];
1556 /* pure functions can be removed if their result is not
1558 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1559 for (i = 0; i < nb_oargs; i++) {
1561 if (!dead_temps[arg] || mem_temps[arg]) {
1562 goto do_not_remove_call;
1565 tcg_set_nop(s, s->gen_opc_buf + op_index,
1570 /* output args are dead */
1573 for (i = 0; i < nb_oargs; i++) {
1575 if (dead_temps[arg]) {
1576 dead_args |= (1 << i);
1578 if (mem_temps[arg]) {
1579 sync_args |= (1 << i);
1581 dead_temps[arg] = 1;
1585 if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1586 /* globals should be synced to memory */
1587 memset(mem_temps, 1, s->nb_globals);
1589 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1590 TCG_CALL_NO_READ_GLOBALS))) {
1591 /* globals should go back to memory */
1592 memset(dead_temps, 1, s->nb_globals);
1595 /* input args are live */
1596 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1598 if (arg != TCG_CALL_DUMMY_ARG) {
1599 if (dead_temps[arg]) {
1600 dead_args |= (1 << i);
1602 dead_temps[arg] = 0;
1605 s->op_dead_args[op_index] = dead_args;
1606 s->op_sync_args[op_index] = sync_args;
1611 case INDEX_op_debug_insn_start:
1612 args -= def->nb_args;
1618 case INDEX_op_discard:
1620 /* mark the temporary as dead */
1621 dead_temps[args[0]] = 1;
1622 mem_temps[args[0]] = 0;
1627 case INDEX_op_add2_i32:
1628 op_new = INDEX_op_add_i32;
1630 case INDEX_op_sub2_i32:
1631 op_new = INDEX_op_sub_i32;
1633 case INDEX_op_add2_i64:
1634 op_new = INDEX_op_add_i64;
1636 case INDEX_op_sub2_i64:
1637 op_new = INDEX_op_sub_i64;
1642 /* Test if the high part of the operation is dead, but not
1643 the low part. The result can be optimized to a simple
1644 add or sub. This happens often for x86_64 guest when the
1645 cpu mode is set to 32 bit. */
1646 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1647 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1650 /* Create the single operation plus nop. */
1651 s->gen_opc_buf[op_index] = op = op_new;
1654 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1655 tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
1656 /* Fall through and mark the single-word operation live. */
1662 case INDEX_op_mulu2_i32:
1663 op_new = INDEX_op_mul_i32;
1664 op_new2 = INDEX_op_muluh_i32;
1665 have_op_new2 = TCG_TARGET_HAS_muluh_i32;
1667 case INDEX_op_muls2_i32:
1668 op_new = INDEX_op_mul_i32;
1669 op_new2 = INDEX_op_mulsh_i32;
1670 have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
1672 case INDEX_op_mulu2_i64:
1673 op_new = INDEX_op_mul_i64;
1674 op_new2 = INDEX_op_muluh_i64;
1675 have_op_new2 = TCG_TARGET_HAS_muluh_i64;
1677 case INDEX_op_muls2_i64:
1678 op_new = INDEX_op_mul_i64;
1679 op_new2 = INDEX_op_mulsh_i64;
1680 have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
1686 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1687 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1688 /* Both parts of the operation are dead. */
1691 /* The high part of the operation is dead; generate the low. */
1692 s->gen_opc_buf[op_index] = op = op_new;
1695 } else if (have_op_new2 && dead_temps[args[0]]
1696 && !mem_temps[args[0]]) {
1697 /* The low part of the operation is dead; generate the high. */
1698 s->gen_opc_buf[op_index] = op = op_new2;
1705 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1706 tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
1707 /* Mark the single-word operation live. */
1712 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1713 args -= def->nb_args;
1714 nb_iargs = def->nb_iargs;
1715 nb_oargs = def->nb_oargs;
1717 /* Test if the operation can be removed because all
1718 its outputs are dead. We assume that nb_oargs == 0
1719 implies side effects */
1720 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1721 for(i = 0; i < nb_oargs; i++) {
1723 if (!dead_temps[arg] || mem_temps[arg]) {
1728 tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
1729 #ifdef CONFIG_PROFILER
1735 /* output args are dead */
1738 for(i = 0; i < nb_oargs; i++) {
1740 if (dead_temps[arg]) {
1741 dead_args |= (1 << i);
1743 if (mem_temps[arg]) {
1744 sync_args |= (1 << i);
1746 dead_temps[arg] = 1;
1750 /* if end of basic block, update */
1751 if (def->flags & TCG_OPF_BB_END) {
1752 tcg_la_bb_end(s, dead_temps, mem_temps);
1753 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1754 /* globals should be synced to memory */
1755 memset(mem_temps, 1, s->nb_globals);
1758 /* input args are live */
1759 for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1761 if (dead_temps[arg]) {
1762 dead_args |= (1 << i);
1764 dead_temps[arg] = 0;
1766 s->op_dead_args[op_index] = dead_args;
1767 s->op_sync_args[op_index] = sync_args;
1774 if (args != s->gen_opparam_buf) {
1779 /* dummy liveness analysis */
1780 static void tcg_liveness_analysis(TCGContext *s)
1783 nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
1785 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1786 memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
1787 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1788 memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
1793 static void dump_regs(TCGContext *s)
1799 for(i = 0; i < s->nb_temps; i++) {
1801 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1802 switch(ts->val_type) {
1804 printf("%s", tcg_target_reg_names[ts->reg]);
1807 printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]);
1809 case TEMP_VAL_CONST:
1810 printf("$0x%" TCG_PRIlx, ts->val);
1822 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1823 if (s->reg_to_temp[i] >= 0) {
1825 tcg_target_reg_names[i],
1826 tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
1831 static void check_regs(TCGContext *s)
1837 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1838 k = s->reg_to_temp[reg];
1841 if (ts->val_type != TEMP_VAL_REG ||
1843 printf("Inconsistency for register %s:\n",
1844 tcg_target_reg_names[reg]);
1849 for(k = 0; k < s->nb_temps; k++) {
1851 if (ts->val_type == TEMP_VAL_REG &&
1853 s->reg_to_temp[ts->reg] != k) {
1854 printf("Inconsistency for temp %s:\n",
1855 tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
1857 printf("reg state:\n");
1865 static void temp_allocate_frame(TCGContext *s, int temp)
1868 ts = &s->temps[temp];
1869 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1870 /* Sparc64 stack is accessed with offset of 2047 */
1871 s->current_frame_offset = (s->current_frame_offset +
1872 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1873 ~(sizeof(tcg_target_long) - 1);
1875 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1879 ts->mem_offset = s->current_frame_offset;
1880 ts->mem_reg = s->frame_reg;
1881 ts->mem_allocated = 1;
1882 s->current_frame_offset += sizeof(tcg_target_long);
1885 /* sync register 'reg' by saving it to the corresponding temporary */
1886 static inline void tcg_reg_sync(TCGContext *s, int reg)
1891 temp = s->reg_to_temp[reg];
1892 ts = &s->temps[temp];
1893 assert(ts->val_type == TEMP_VAL_REG);
1894 if (!ts->mem_coherent && !ts->fixed_reg) {
1895 if (!ts->mem_allocated) {
1896 temp_allocate_frame(s, temp);
1898 tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
1900 ts->mem_coherent = 1;
1903 /* free register 'reg' by spilling the corresponding temporary if necessary */
1904 static void tcg_reg_free(TCGContext *s, int reg)
1908 temp = s->reg_to_temp[reg];
1910 tcg_reg_sync(s, reg);
1911 s->temps[temp].val_type = TEMP_VAL_MEM;
1912 s->reg_to_temp[reg] = -1;
1916 /* Allocate a register belonging to reg1 & ~reg2 */
1917 static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
1922 tcg_regset_andnot(reg_ct, reg1, reg2);
1924 /* first try free registers */
1925 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1926 reg = tcg_target_reg_alloc_order[i];
1927 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1)
1931 /* XXX: do better spill choice */
1932 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1933 reg = tcg_target_reg_alloc_order[i];
1934 if (tcg_regset_test_reg(reg_ct, reg)) {
1935 tcg_reg_free(s, reg);
1943 /* mark a temporary as dead. */
1944 static inline void temp_dead(TCGContext *s, int temp)
1948 ts = &s->temps[temp];
1949 if (!ts->fixed_reg) {
1950 if (ts->val_type == TEMP_VAL_REG) {
1951 s->reg_to_temp[ts->reg] = -1;
1953 if (temp < s->nb_globals || ts->temp_local) {
1954 ts->val_type = TEMP_VAL_MEM;
1956 ts->val_type = TEMP_VAL_DEAD;
1961 /* sync a temporary to memory. 'allocated_regs' is used in case a
1962 temporary registers needs to be allocated to store a constant. */
1963 static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
1967 ts = &s->temps[temp];
1968 if (!ts->fixed_reg) {
1969 switch(ts->val_type) {
1970 case TEMP_VAL_CONST:
1971 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
1973 ts->val_type = TEMP_VAL_REG;
1974 s->reg_to_temp[ts->reg] = temp;
1975 ts->mem_coherent = 0;
1976 tcg_out_movi(s, ts->type, ts->reg, ts->val);
1979 tcg_reg_sync(s, ts->reg);
1990 /* save a temporary to memory. 'allocated_regs' is used in case a
1991 temporary registers needs to be allocated to store a constant. */
1992 static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
1994 #ifdef USE_LIVENESS_ANALYSIS
1995 /* The liveness analysis already ensures that globals are back
1996 in memory. Keep an assert for safety. */
1997 assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
1999 temp_sync(s, temp, allocated_regs);
2004 /* save globals to their canonical location and assume they can be
2005 modified be the following code. 'allocated_regs' is used in case a
2006 temporary registers needs to be allocated to store a constant. */
2007 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2011 for(i = 0; i < s->nb_globals; i++) {
2012 temp_save(s, i, allocated_regs);
2016 /* sync globals to their canonical location and assume they can be
2017 read by the following code. 'allocated_regs' is used in case a
2018 temporary registers needs to be allocated to store a constant. */
2019 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2023 for (i = 0; i < s->nb_globals; i++) {
2024 #ifdef USE_LIVENESS_ANALYSIS
2025 assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
2026 s->temps[i].mem_coherent);
2028 temp_sync(s, i, allocated_regs);
2033 /* at the end of a basic block, we assume all temporaries are dead and
2034 all globals are stored at their canonical location. */
2035 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2040 for(i = s->nb_globals; i < s->nb_temps; i++) {
2042 if (ts->temp_local) {
2043 temp_save(s, i, allocated_regs);
2045 #ifdef USE_LIVENESS_ANALYSIS
2046 /* The liveness analysis already ensures that temps are dead.
2047 Keep an assert for safety. */
2048 assert(ts->val_type == TEMP_VAL_DEAD);
2055 save_globals(s, allocated_regs);
2058 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
2059 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
2061 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
2062 uint16_t dead_args, uint8_t sync_args)
2065 tcg_target_ulong val;
2067 ots = &s->temps[args[0]];
2070 if (ots->fixed_reg) {
2071 /* for fixed registers, we do not do any constant
2073 tcg_out_movi(s, ots->type, ots->reg, val);
2075 /* The movi is not explicitly generated here */
2076 if (ots->val_type == TEMP_VAL_REG)
2077 s->reg_to_temp[ots->reg] = -1;
2078 ots->val_type = TEMP_VAL_CONST;
2081 if (NEED_SYNC_ARG(0)) {
2082 temp_sync(s, args[0], s->reserved_regs);
2084 if (IS_DEAD_ARG(0)) {
2085 temp_dead(s, args[0]);
2089 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
2090 const TCGArg *args, uint16_t dead_args,
2093 TCGRegSet allocated_regs;
2095 TCGType otype, itype;
2097 tcg_regset_set(allocated_regs, s->reserved_regs);
2098 ots = &s->temps[args[0]];
2099 ts = &s->temps[args[1]];
2101 /* Note that otype != itype for no-op truncation. */
2105 /* If the source value is not in a register, and we're going to be
2106 forced to have it in a register in order to perform the copy,
2107 then copy the SOURCE value into its own register first. That way
2108 we don't have to reload SOURCE the next time it is used. */
2109 if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
2110 || ts->val_type == TEMP_VAL_MEM) {
2111 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype],
2113 if (ts->val_type == TEMP_VAL_MEM) {
2114 tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset);
2115 ts->mem_coherent = 1;
2116 } else if (ts->val_type == TEMP_VAL_CONST) {
2117 tcg_out_movi(s, itype, ts->reg, ts->val);
2119 s->reg_to_temp[ts->reg] = args[1];
2120 ts->val_type = TEMP_VAL_REG;
2123 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2124 /* mov to a non-saved dead register makes no sense (even with
2125 liveness analysis disabled). */
2126 assert(NEED_SYNC_ARG(0));
2127 /* The code above should have moved the temp to a register. */
2128 assert(ts->val_type == TEMP_VAL_REG);
2129 if (!ots->mem_allocated) {
2130 temp_allocate_frame(s, args[0]);
2132 tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset);
2133 if (IS_DEAD_ARG(1)) {
2134 temp_dead(s, args[1]);
2136 temp_dead(s, args[0]);
2137 } else if (ts->val_type == TEMP_VAL_CONST) {
2138 /* propagate constant */
2139 if (ots->val_type == TEMP_VAL_REG) {
2140 s->reg_to_temp[ots->reg] = -1;
2142 ots->val_type = TEMP_VAL_CONST;
2145 /* The code in the first if block should have moved the
2146 temp to a register. */
2147 assert(ts->val_type == TEMP_VAL_REG);
2148 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2149 /* the mov can be suppressed */
2150 if (ots->val_type == TEMP_VAL_REG) {
2151 s->reg_to_temp[ots->reg] = -1;
2154 temp_dead(s, args[1]);
2156 if (ots->val_type != TEMP_VAL_REG) {
2157 /* When allocating a new register, make sure to not spill the
2159 tcg_regset_set_reg(allocated_regs, ts->reg);
2160 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2163 tcg_out_mov(s, otype, ots->reg, ts->reg);
2165 ots->val_type = TEMP_VAL_REG;
2166 ots->mem_coherent = 0;
2167 s->reg_to_temp[ots->reg] = args[0];
2168 if (NEED_SYNC_ARG(0)) {
2169 tcg_reg_sync(s, ots->reg);
2174 static void tcg_reg_alloc_op(TCGContext *s,
2175 const TCGOpDef *def, TCGOpcode opc,
2176 const TCGArg *args, uint16_t dead_args,
2179 TCGRegSet allocated_regs;
2180 int i, k, nb_iargs, nb_oargs, reg;
2182 const TCGArgConstraint *arg_ct;
2184 TCGArg new_args[TCG_MAX_OP_ARGS];
2185 int const_args[TCG_MAX_OP_ARGS];
2187 nb_oargs = def->nb_oargs;
2188 nb_iargs = def->nb_iargs;
2190 /* copy constants */
2191 memcpy(new_args + nb_oargs + nb_iargs,
2192 args + nb_oargs + nb_iargs,
2193 sizeof(TCGArg) * def->nb_cargs);
2195 /* satisfy input constraints */
2196 tcg_regset_set(allocated_regs, s->reserved_regs);
2197 for(k = 0; k < nb_iargs; k++) {
2198 i = def->sorted_args[nb_oargs + k];
2200 arg_ct = &def->args_ct[i];
2201 ts = &s->temps[arg];
2202 if (ts->val_type == TEMP_VAL_MEM) {
2203 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2204 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2205 ts->val_type = TEMP_VAL_REG;
2207 ts->mem_coherent = 1;
2208 s->reg_to_temp[reg] = arg;
2209 } else if (ts->val_type == TEMP_VAL_CONST) {
2210 if (tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2211 /* constant is OK for instruction */
2213 new_args[i] = ts->val;
2216 /* need to move to a register */
2217 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2218 tcg_out_movi(s, ts->type, reg, ts->val);
2219 ts->val_type = TEMP_VAL_REG;
2221 ts->mem_coherent = 0;
2222 s->reg_to_temp[reg] = arg;
2225 assert(ts->val_type == TEMP_VAL_REG);
2226 if (arg_ct->ct & TCG_CT_IALIAS) {
2227 if (ts->fixed_reg) {
2228 /* if fixed register, we must allocate a new register
2229 if the alias is not the same register */
2230 if (arg != args[arg_ct->alias_index])
2231 goto allocate_in_reg;
2233 /* if the input is aliased to an output and if it is
2234 not dead after the instruction, we must allocate
2235 a new register and move it */
2236 if (!IS_DEAD_ARG(i)) {
2237 goto allocate_in_reg;
2242 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2243 /* nothing to do : the constraint is satisfied */
2246 /* allocate a new register matching the constraint
2247 and move the temporary register into it */
2248 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2249 tcg_out_mov(s, ts->type, reg, ts->reg);
2253 tcg_regset_set_reg(allocated_regs, reg);
2257 /* mark dead temporaries and free the associated registers */
2258 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2259 if (IS_DEAD_ARG(i)) {
2260 temp_dead(s, args[i]);
2264 if (def->flags & TCG_OPF_BB_END) {
2265 tcg_reg_alloc_bb_end(s, allocated_regs);
2267 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2268 /* XXX: permit generic clobber register list ? */
2269 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2270 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
2271 tcg_reg_free(s, reg);
2275 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2276 /* sync globals if the op has side effects and might trigger
2278 sync_globals(s, allocated_regs);
2281 /* satisfy the output constraints */
2282 tcg_regset_set(allocated_regs, s->reserved_regs);
2283 for(k = 0; k < nb_oargs; k++) {
2284 i = def->sorted_args[k];
2286 arg_ct = &def->args_ct[i];
2287 ts = &s->temps[arg];
2288 if (arg_ct->ct & TCG_CT_ALIAS) {
2289 reg = new_args[arg_ct->alias_index];
2291 /* if fixed register, we try to use it */
2293 if (ts->fixed_reg &&
2294 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2297 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2299 tcg_regset_set_reg(allocated_regs, reg);
2300 /* if a fixed register is used, then a move will be done afterwards */
2301 if (!ts->fixed_reg) {
2302 if (ts->val_type == TEMP_VAL_REG) {
2303 s->reg_to_temp[ts->reg] = -1;
2305 ts->val_type = TEMP_VAL_REG;
2307 /* temp value is modified, so the value kept in memory is
2308 potentially not the same */
2309 ts->mem_coherent = 0;
2310 s->reg_to_temp[reg] = arg;
2317 /* emit instruction */
2318 tcg_out_op(s, opc, new_args, const_args);
2320 /* move the outputs in the correct register if needed */
2321 for(i = 0; i < nb_oargs; i++) {
2322 ts = &s->temps[args[i]];
2324 if (ts->fixed_reg && ts->reg != reg) {
2325 tcg_out_mov(s, ts->type, ts->reg, reg);
2327 if (NEED_SYNC_ARG(i)) {
2328 tcg_reg_sync(s, reg);
2330 if (IS_DEAD_ARG(i)) {
2331 temp_dead(s, args[i]);
2336 #ifdef TCG_TARGET_STACK_GROWSUP
2337 #define STACK_DIR(x) (-(x))
2339 #define STACK_DIR(x) (x)
2342 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
2343 TCGOpcode opc, const TCGArg *args,
2344 uint16_t dead_args, uint8_t sync_args)
2346 int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
2349 intptr_t stack_offset;
2350 size_t call_stack_size;
2351 tcg_insn_unit *func_addr;
2353 TCGRegSet allocated_regs;
2357 nb_oargs = arg >> 16;
2358 nb_iargs = arg & 0xffff;
2359 nb_params = nb_iargs;
2361 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2362 flags = args[nb_oargs + nb_iargs + 1];
2364 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2365 if (nb_regs > nb_params) {
2366 nb_regs = nb_params;
2369 /* assign stack slots first */
2370 call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
2371 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2372 ~(TCG_TARGET_STACK_ALIGN - 1);
2373 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2374 if (allocate_args) {
2375 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2376 preallocate call stack */
2380 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2381 for(i = nb_regs; i < nb_params; i++) {
2382 arg = args[nb_oargs + i];
2383 #ifdef TCG_TARGET_STACK_GROWSUP
2384 stack_offset -= sizeof(tcg_target_long);
2386 if (arg != TCG_CALL_DUMMY_ARG) {
2387 ts = &s->temps[arg];
2388 if (ts->val_type == TEMP_VAL_REG) {
2389 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2390 } else if (ts->val_type == TEMP_VAL_MEM) {
2391 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2393 /* XXX: not correct if reading values from the stack */
2394 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2395 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2396 } else if (ts->val_type == TEMP_VAL_CONST) {
2397 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2399 /* XXX: sign extend may be needed on some targets */
2400 tcg_out_movi(s, ts->type, reg, ts->val);
2401 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2406 #ifndef TCG_TARGET_STACK_GROWSUP
2407 stack_offset += sizeof(tcg_target_long);
2411 /* assign input registers */
2412 tcg_regset_set(allocated_regs, s->reserved_regs);
2413 for(i = 0; i < nb_regs; i++) {
2414 arg = args[nb_oargs + i];
2415 if (arg != TCG_CALL_DUMMY_ARG) {
2416 ts = &s->temps[arg];
2417 reg = tcg_target_call_iarg_regs[i];
2418 tcg_reg_free(s, reg);
2419 if (ts->val_type == TEMP_VAL_REG) {
2420 if (ts->reg != reg) {
2421 tcg_out_mov(s, ts->type, reg, ts->reg);
2423 } else if (ts->val_type == TEMP_VAL_MEM) {
2424 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2425 } else if (ts->val_type == TEMP_VAL_CONST) {
2426 /* XXX: sign extend ? */
2427 tcg_out_movi(s, ts->type, reg, ts->val);
2431 tcg_regset_set_reg(allocated_regs, reg);
2435 /* mark dead temporaries and free the associated registers */
2436 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2437 if (IS_DEAD_ARG(i)) {
2438 temp_dead(s, args[i]);
2442 /* clobber call registers */
2443 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2444 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
2445 tcg_reg_free(s, reg);
2449 /* Save globals if they might be written by the helper, sync them if
2450 they might be read. */
2451 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2453 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2454 sync_globals(s, allocated_regs);
2456 save_globals(s, allocated_regs);
2459 tcg_out_call(s, func_addr);
2461 /* assign output registers and emit moves if needed */
2462 for(i = 0; i < nb_oargs; i++) {
2464 ts = &s->temps[arg];
2465 reg = tcg_target_call_oarg_regs[i];
2466 assert(s->reg_to_temp[reg] == -1);
2468 if (ts->fixed_reg) {
2469 if (ts->reg != reg) {
2470 tcg_out_mov(s, ts->type, ts->reg, reg);
2473 if (ts->val_type == TEMP_VAL_REG) {
2474 s->reg_to_temp[ts->reg] = -1;
2476 ts->val_type = TEMP_VAL_REG;
2478 ts->mem_coherent = 0;
2479 s->reg_to_temp[reg] = arg;
2480 if (NEED_SYNC_ARG(i)) {
2481 tcg_reg_sync(s, reg);
2483 if (IS_DEAD_ARG(i)) {
2484 temp_dead(s, args[i]);
2489 return nb_iargs + nb_oargs + def->nb_cargs + 1;
2492 #ifdef CONFIG_PROFILER
2494 static int64_t tcg_table_op_count[NB_OPS];
2496 static void dump_op_count(void)
2500 f = fopen("/tmp/op.log", "w");
2501 for(i = INDEX_op_end; i < NB_OPS; i++) {
2502 fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]);
2509 static inline int tcg_gen_code_common(TCGContext *s,
2510 tcg_insn_unit *gen_code_buf,
2515 const TCGOpDef *def;
2519 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
2526 #ifdef CONFIG_PROFILER
2527 s->opt_time -= profile_getclock();
2530 #ifdef USE_TCG_OPTIMIZATIONS
2531 s->gen_opparam_ptr =
2532 tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
2535 #ifdef CONFIG_PROFILER
2536 s->opt_time += profile_getclock();
2537 s->la_time -= profile_getclock();
2540 tcg_liveness_analysis(s);
2542 #ifdef CONFIG_PROFILER
2543 s->la_time += profile_getclock();
2547 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
2548 qemu_log("OP after optimization and liveness analysis:\n");
2554 tcg_reg_alloc_start(s);
2556 s->code_buf = gen_code_buf;
2557 s->code_ptr = gen_code_buf;
2561 args = s->gen_opparam_buf;
2565 opc = s->gen_opc_buf[op_index];
2566 #ifdef CONFIG_PROFILER
2567 tcg_table_op_count[opc]++;
2569 def = &tcg_op_defs[opc];
2571 printf("%s: %d %d %d\n", def->name,
2572 def->nb_oargs, def->nb_iargs, def->nb_cargs);
2576 case INDEX_op_mov_i32:
2577 case INDEX_op_mov_i64:
2578 tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
2579 s->op_sync_args[op_index]);
2581 case INDEX_op_movi_i32:
2582 case INDEX_op_movi_i64:
2583 tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
2584 s->op_sync_args[op_index]);
2586 case INDEX_op_debug_insn_start:
2587 /* debug instruction */
2597 case INDEX_op_discard:
2598 temp_dead(s, args[0]);
2600 case INDEX_op_set_label:
2601 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2602 tcg_out_label(s, args[0], s->code_ptr);
2605 args += tcg_reg_alloc_call(s, def, opc, args,
2606 s->op_dead_args[op_index],
2607 s->op_sync_args[op_index]);
2612 /* Sanity check that we've not introduced any unhandled opcodes. */
2613 if (def->flags & TCG_OPF_NOT_PRESENT) {
2616 /* Note: in order to speed up the code, it would be much
2617 faster to have specialized register allocator functions for
2618 some common argument patterns */
2619 tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
2620 s->op_sync_args[op_index]);
2623 args += def->nb_args;
2625 if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
2634 /* Generate TB finalization at the end of block */
2635 tcg_out_tb_finalize(s);
2639 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
2641 #ifdef CONFIG_PROFILER
2644 n = (s->gen_opc_ptr - s->gen_opc_buf);
2646 if (n > s->op_count_max)
2647 s->op_count_max = n;
2649 s->temp_count += s->nb_temps;
2650 if (s->nb_temps > s->temp_count_max)
2651 s->temp_count_max = s->nb_temps;
2655 tcg_gen_code_common(s, gen_code_buf, -1);
2657 /* flush instruction cache */
2658 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2660 return tcg_current_code_size(s);
2663 /* Return the index of the micro operation such as the pc after is <
2664 offset bytes from the start of the TB. The contents of gen_code_buf must
2665 not be changed, though writing the same values is ok.
2666 Return -1 if not found. */
2667 int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
2670 return tcg_gen_code_common(s, gen_code_buf, offset);
2673 #ifdef CONFIG_PROFILER
2674 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2676 TCGContext *s = &tcg_ctx;
2679 tot = s->interm_time + s->code_time;
2680 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2682 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2684 s->tb_count1 - s->tb_count,
2685 s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
2686 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2687 s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
2688 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2690 (double)s->del_op_count / s->tb_count : 0);
2691 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2693 (double)s->temp_count / s->tb_count : 0,
2696 cpu_fprintf(f, "cycles/op %0.1f\n",
2697 s->op_count ? (double)tot / s->op_count : 0);
2698 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2699 s->code_in_len ? (double)tot / s->code_in_len : 0);
2700 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2701 s->code_out_len ? (double)tot / s->code_out_len : 0);
2704 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2705 (double)s->interm_time / tot * 100.0);
2706 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2707 (double)s->code_time / tot * 100.0);
2708 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2709 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2711 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2712 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2713 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2715 cpu_fprintf(f, " avg cycles %0.1f\n",
2716 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2721 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2723 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2727 #ifdef ELF_HOST_MACHINE
2728 /* In order to use this feature, the backend needs to do three things:
2730 (1) Define ELF_HOST_MACHINE to indicate both what value to
2731 put into the ELF image and to indicate support for the feature.
2733 (2) Define tcg_register_jit. This should create a buffer containing
2734 the contents of a .debug_frame section that describes the post-
2735 prologue unwind info for the tcg machine.
2737 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2740 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2747 struct jit_code_entry {
2748 struct jit_code_entry *next_entry;
2749 struct jit_code_entry *prev_entry;
2750 const void *symfile_addr;
2751 uint64_t symfile_size;
2754 struct jit_descriptor {
2756 uint32_t action_flag;
2757 struct jit_code_entry *relevant_entry;
2758 struct jit_code_entry *first_entry;
2761 void __jit_debug_register_code(void) __attribute__((noinline));
2762 void __jit_debug_register_code(void)
2767 /* Must statically initialize the version, because GDB may check
2768 the version before we can set it. */
2769 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2771 /* End GDB interface. */
2773 static int find_string(const char *strtab, const char *str)
2775 const char *p = strtab + 1;
2778 if (strcmp(p, str) == 0) {
2785 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2786 const void *debug_frame,
2787 size_t debug_frame_size)
2789 struct __attribute__((packed)) DebugInfo {
2796 uintptr_t cu_low_pc;
2797 uintptr_t cu_high_pc;
2800 uintptr_t fn_low_pc;
2801 uintptr_t fn_high_pc;
2810 struct DebugInfo di;
2815 struct ElfImage *img;
2817 static const struct ElfImage img_template = {
2819 .e_ident[EI_MAG0] = ELFMAG0,
2820 .e_ident[EI_MAG1] = ELFMAG1,
2821 .e_ident[EI_MAG2] = ELFMAG2,
2822 .e_ident[EI_MAG3] = ELFMAG3,
2823 .e_ident[EI_CLASS] = ELF_CLASS,
2824 .e_ident[EI_DATA] = ELF_DATA,
2825 .e_ident[EI_VERSION] = EV_CURRENT,
2827 .e_machine = ELF_HOST_MACHINE,
2828 .e_version = EV_CURRENT,
2829 .e_phoff = offsetof(struct ElfImage, phdr),
2830 .e_shoff = offsetof(struct ElfImage, shdr),
2831 .e_ehsize = sizeof(ElfW(Shdr)),
2832 .e_phentsize = sizeof(ElfW(Phdr)),
2834 .e_shentsize = sizeof(ElfW(Shdr)),
2835 .e_shnum = ARRAY_SIZE(img->shdr),
2836 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2837 #ifdef ELF_HOST_FLAGS
2838 .e_flags = ELF_HOST_FLAGS,
2841 .e_ident[EI_OSABI] = ELF_OSABI,
2849 [0] = { .sh_type = SHT_NULL },
2850 /* Trick: The contents of code_gen_buffer are not present in
2851 this fake ELF file; that got allocated elsewhere. Therefore
2852 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2853 will not look for contents. We can record any address. */
2855 .sh_type = SHT_NOBITS,
2856 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2858 [2] = { /* .debug_info */
2859 .sh_type = SHT_PROGBITS,
2860 .sh_offset = offsetof(struct ElfImage, di),
2861 .sh_size = sizeof(struct DebugInfo),
2863 [3] = { /* .debug_abbrev */
2864 .sh_type = SHT_PROGBITS,
2865 .sh_offset = offsetof(struct ElfImage, da),
2866 .sh_size = sizeof(img->da),
2868 [4] = { /* .debug_frame */
2869 .sh_type = SHT_PROGBITS,
2870 .sh_offset = sizeof(struct ElfImage),
2872 [5] = { /* .symtab */
2873 .sh_type = SHT_SYMTAB,
2874 .sh_offset = offsetof(struct ElfImage, sym),
2875 .sh_size = sizeof(img->sym),
2877 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2878 .sh_entsize = sizeof(ElfW(Sym)),
2880 [6] = { /* .strtab */
2881 .sh_type = SHT_STRTAB,
2882 .sh_offset = offsetof(struct ElfImage, str),
2883 .sh_size = sizeof(img->str),
2887 [1] = { /* code_gen_buffer */
2888 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2893 .len = sizeof(struct DebugInfo) - 4,
2895 .ptr_size = sizeof(void *),
2897 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2899 .fn_name = "code_gen_buffer"
2902 1, /* abbrev number (the cu) */
2903 0x11, 1, /* DW_TAG_compile_unit, has children */
2904 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2905 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2906 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2907 0, 0, /* end of abbrev */
2908 2, /* abbrev number (the fn) */
2909 0x2e, 0, /* DW_TAG_subprogram, no children */
2910 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2911 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2912 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2913 0, 0, /* end of abbrev */
2914 0 /* no more abbrev */
2916 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2917 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2920 /* We only need a single jit entry; statically allocate it. */
2921 static struct jit_code_entry one_entry;
2923 uintptr_t buf = (uintptr_t)buf_ptr;
2924 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2925 DebugFrameHeader *dfh;
2927 img = g_malloc(img_size);
2928 *img = img_template;
2930 img->phdr.p_vaddr = buf;
2931 img->phdr.p_paddr = buf;
2932 img->phdr.p_memsz = buf_size;
2934 img->shdr[1].sh_name = find_string(img->str, ".text");
2935 img->shdr[1].sh_addr = buf;
2936 img->shdr[1].sh_size = buf_size;
2938 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2939 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2941 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2942 img->shdr[4].sh_size = debug_frame_size;
2944 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2945 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2947 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2948 img->sym[1].st_value = buf;
2949 img->sym[1].st_size = buf_size;
2951 img->di.cu_low_pc = buf;
2952 img->di.cu_high_pc = buf + buf_size;
2953 img->di.fn_low_pc = buf;
2954 img->di.fn_high_pc = buf + buf_size;
2956 dfh = (DebugFrameHeader *)(img + 1);
2957 memcpy(dfh, debug_frame, debug_frame_size);
2958 dfh->fde.func_start = buf;
2959 dfh->fde.func_len = buf_size;
2962 /* Enable this block to be able to debug the ELF image file creation.
2963 One can use readelf, objdump, or other inspection utilities. */
2965 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2967 if (fwrite(img, img_size, 1, f) != img_size) {
2968 /* Avoid stupid unused return value warning for fwrite. */
2975 one_entry.symfile_addr = img;
2976 one_entry.symfile_size = img_size;
2978 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2979 __jit_debug_descriptor.relevant_entry = &one_entry;
2980 __jit_debug_descriptor.first_entry = &one_entry;
2981 __jit_debug_register_code();
2984 /* No support for the feature. Provide the entry point expected by exec.c,
2985 and implement the internal function we declared earlier. */
2987 static void tcg_register_jit_int(void *buf, size_t size,
2988 const void *debug_frame,
2989 size_t debug_frame_size)
2993 void tcg_register_jit(void *buf, size_t buf_size)
2996 #endif /* ELF_HOST_MACHINE */