2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu-common.h"
40 #include "qemu/cache-utils.h"
41 #include "qemu/host-utils.h"
42 #include "qemu/timer.h"
44 /* Note: the long term plan is to reduce the dependencies on the QEMU
45 CPU definitions. Currently they are used for qemu_ld/st
47 #define NO_CPU_IO_DEFS
52 #if UINTPTR_MAX == UINT32_MAX
53 # define ELF_CLASS ELFCLASS32
55 # define ELF_CLASS ELFCLASS64
57 #ifdef HOST_WORDS_BIGENDIAN
58 # define ELF_DATA ELFDATA2MSB
60 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint8_t return_column;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
87 } DebugFrameFDEHeader;
89 static void tcg_register_jit_int(void *buf, size_t size,
90 void *debug_frame, size_t debug_frame_size)
91 __attribute__((unused));
93 /* Forward declarations for functions declared and used in tcg-target.c. */
94 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
95 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
97 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
98 static void tcg_out_movi(TCGContext *s, TCGType type,
99 TCGReg ret, tcg_target_long arg);
100 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
101 const int *const_args);
102 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
104 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
105 static int tcg_target_const_match(tcg_target_long val, TCGType type,
106 const TCGArgConstraint *arg_ct);
107 static void tcg_out_tb_init(TCGContext *s);
108 static void tcg_out_tb_finalize(TCGContext *s);
111 TCGOpDef tcg_op_defs[] = {
112 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
116 const size_t tcg_op_defs_max = ARRAY_SIZE(tcg_op_defs);
118 static TCGRegSet tcg_target_available_regs[2];
119 static TCGRegSet tcg_target_call_clobber_regs;
121 #if TCG_TARGET_INSN_UNIT_SIZE == 1
122 static inline void tcg_out8(TCGContext *s, uint8_t v)
127 static inline void tcg_patch8(tcg_insn_unit *p, uint8_t v)
133 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
134 static inline void tcg_out16(TCGContext *s, uint16_t v)
136 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
139 tcg_insn_unit *p = s->code_ptr;
140 memcpy(p, &v, sizeof(v));
141 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
145 static inline void tcg_patch16(tcg_insn_unit *p, uint16_t v)
147 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
150 memcpy(p, &v, sizeof(v));
155 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
156 static inline void tcg_out32(TCGContext *s, uint32_t v)
158 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
161 tcg_insn_unit *p = s->code_ptr;
162 memcpy(p, &v, sizeof(v));
163 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
167 static inline void tcg_patch32(tcg_insn_unit *p, uint32_t v)
169 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
172 memcpy(p, &v, sizeof(v));
177 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
178 static inline void tcg_out64(TCGContext *s, uint64_t v)
180 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
183 tcg_insn_unit *p = s->code_ptr;
184 memcpy(p, &v, sizeof(v));
185 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
189 static inline void tcg_patch64(tcg_insn_unit *p, uint64_t v)
191 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
194 memcpy(p, &v, sizeof(v));
199 /* label relocation processing */
201 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
202 int label_index, intptr_t addend)
207 l = &s->labels[label_index];
209 /* FIXME: This may break relocations on RISC targets that
210 modify instruction fields in place. The caller may not have
211 written the initial value. */
212 patch_reloc(code_ptr, type, l->u.value, addend);
214 /* add a new relocation entry */
215 r = tcg_malloc(sizeof(TCGRelocation));
219 r->next = l->u.first_reloc;
220 l->u.first_reloc = r;
224 static void tcg_out_label(TCGContext *s, int label_index, tcg_insn_unit *ptr)
226 TCGLabel *l = &s->labels[label_index];
227 intptr_t value = (intptr_t)ptr;
230 assert(!l->has_value);
232 for (r = l->u.first_reloc; r != NULL; r = r->next) {
233 patch_reloc(r->ptr, r->type, value, r->addend);
237 l->u.value_ptr = ptr;
240 int gen_new_label(void)
242 TCGContext *s = &tcg_ctx;
246 if (s->nb_labels >= TCG_MAX_LABELS)
248 idx = s->nb_labels++;
251 l->u.first_reloc = NULL;
255 #include "tcg-target.c"
257 /* pool based memory allocation */
258 void *tcg_malloc_internal(TCGContext *s, int size)
263 if (size > TCG_POOL_CHUNK_SIZE) {
264 /* big malloc: insert a new pool (XXX: could optimize) */
265 p = g_malloc(sizeof(TCGPool) + size);
267 p->next = s->pool_first_large;
268 s->pool_first_large = p;
279 pool_size = TCG_POOL_CHUNK_SIZE;
280 p = g_malloc(sizeof(TCGPool) + pool_size);
284 s->pool_current->next = p;
293 s->pool_cur = p->data + size;
294 s->pool_end = p->data + p->size;
298 void tcg_pool_reset(TCGContext *s)
301 for (p = s->pool_first_large; p; p = t) {
305 s->pool_first_large = NULL;
306 s->pool_cur = s->pool_end = NULL;
307 s->pool_current = NULL;
310 typedef struct TCGHelperInfo {
315 #include "exec/helper-proto.h"
317 static const TCGHelperInfo all_helpers[] = {
318 #include "exec/helper-tcg.h"
321 void tcg_context_init(TCGContext *s)
323 int op, total_args, n, i;
325 TCGArgConstraint *args_ct;
327 GHashTable *helper_table;
329 memset(s, 0, sizeof(*s));
332 /* Count total number of arguments and allocate the corresponding
335 for(op = 0; op < NB_OPS; op++) {
336 def = &tcg_op_defs[op];
337 n = def->nb_iargs + def->nb_oargs;
341 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
342 sorted_args = g_malloc(sizeof(int) * total_args);
344 for(op = 0; op < NB_OPS; op++) {
345 def = &tcg_op_defs[op];
346 def->args_ct = args_ct;
347 def->sorted_args = sorted_args;
348 n = def->nb_iargs + def->nb_oargs;
353 /* Register helpers. */
354 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
355 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
357 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
358 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
359 (gpointer)all_helpers[i].name);
365 void tcg_prologue_init(TCGContext *s)
367 /* init global prologue and epilogue */
368 s->code_buf = s->code_gen_prologue;
369 s->code_ptr = s->code_buf;
370 tcg_target_qemu_prologue(s);
371 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
374 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
375 size_t size = tcg_current_code_size(s);
376 qemu_log("PROLOGUE: [size=%zu]\n", size);
377 log_disas(s->code_buf, size);
384 void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size)
386 s->frame_start = start;
387 s->frame_end = start + size;
391 void tcg_func_start(TCGContext *s)
394 s->nb_temps = s->nb_globals;
396 /* No temps have been previously allocated for size or locality. */
397 memset(s->free_temps, 0, sizeof(s->free_temps));
399 s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
401 s->current_frame_offset = s->frame_start;
403 #ifdef CONFIG_DEBUG_TCG
404 s->goto_tb_issue_mask = 0;
407 s->gen_opc_ptr = s->gen_opc_buf;
408 s->gen_opparam_ptr = s->gen_opparam_buf;
410 s->be = tcg_malloc(sizeof(TCGBackendData));
413 static inline void tcg_temp_alloc(TCGContext *s, int n)
415 if (n > TCG_MAX_TEMPS)
419 static inline int tcg_global_reg_new_internal(TCGType type, int reg,
422 TCGContext *s = &tcg_ctx;
426 #if TCG_TARGET_REG_BITS == 32
427 if (type != TCG_TYPE_I32)
430 if (tcg_regset_test_reg(s->reserved_regs, reg))
433 tcg_temp_alloc(s, s->nb_globals + 1);
434 ts = &s->temps[s->nb_globals];
435 ts->base_type = type;
441 tcg_regset_set_reg(s->reserved_regs, reg);
445 TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name)
449 idx = tcg_global_reg_new_internal(TCG_TYPE_I32, reg, name);
450 return MAKE_TCGV_I32(idx);
453 TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name)
457 idx = tcg_global_reg_new_internal(TCG_TYPE_I64, reg, name);
458 return MAKE_TCGV_I64(idx);
461 static inline int tcg_global_mem_new_internal(TCGType type, int reg,
465 TCGContext *s = &tcg_ctx;
470 #if TCG_TARGET_REG_BITS == 32
471 if (type == TCG_TYPE_I64) {
473 tcg_temp_alloc(s, s->nb_globals + 2);
474 ts = &s->temps[s->nb_globals];
475 ts->base_type = type;
476 ts->type = TCG_TYPE_I32;
478 ts->mem_allocated = 1;
480 #ifdef HOST_WORDS_BIGENDIAN
481 ts->mem_offset = offset + 4;
483 ts->mem_offset = offset;
485 pstrcpy(buf, sizeof(buf), name);
486 pstrcat(buf, sizeof(buf), "_0");
487 ts->name = strdup(buf);
490 ts->base_type = type;
491 ts->type = TCG_TYPE_I32;
493 ts->mem_allocated = 1;
495 #ifdef HOST_WORDS_BIGENDIAN
496 ts->mem_offset = offset;
498 ts->mem_offset = offset + 4;
500 pstrcpy(buf, sizeof(buf), name);
501 pstrcat(buf, sizeof(buf), "_1");
502 ts->name = strdup(buf);
508 tcg_temp_alloc(s, s->nb_globals + 1);
509 ts = &s->temps[s->nb_globals];
510 ts->base_type = type;
513 ts->mem_allocated = 1;
515 ts->mem_offset = offset;
522 TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name)
524 int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
525 return MAKE_TCGV_I32(idx);
528 TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name)
530 int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
531 return MAKE_TCGV_I64(idx);
534 static inline int tcg_temp_new_internal(TCGType type, int temp_local)
536 TCGContext *s = &tcg_ctx;
540 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
541 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
542 if (idx < TCG_MAX_TEMPS) {
543 /* There is already an available temp with the right type. */
544 clear_bit(idx, s->free_temps[k].l);
547 ts->temp_allocated = 1;
548 assert(ts->base_type == type);
549 assert(ts->temp_local == temp_local);
552 #if TCG_TARGET_REG_BITS == 32
553 if (type == TCG_TYPE_I64) {
554 tcg_temp_alloc(s, s->nb_temps + 2);
555 ts = &s->temps[s->nb_temps];
556 ts->base_type = type;
557 ts->type = TCG_TYPE_I32;
558 ts->temp_allocated = 1;
559 ts->temp_local = temp_local;
562 ts->base_type = type;
563 ts->type = TCG_TYPE_I32;
564 ts->temp_allocated = 1;
565 ts->temp_local = temp_local;
571 tcg_temp_alloc(s, s->nb_temps + 1);
572 ts = &s->temps[s->nb_temps];
573 ts->base_type = type;
575 ts->temp_allocated = 1;
576 ts->temp_local = temp_local;
582 #if defined(CONFIG_DEBUG_TCG)
588 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
592 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
593 return MAKE_TCGV_I32(idx);
596 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
600 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
601 return MAKE_TCGV_I64(idx);
604 static void tcg_temp_free_internal(int idx)
606 TCGContext *s = &tcg_ctx;
610 #if defined(CONFIG_DEBUG_TCG)
612 if (s->temps_in_use < 0) {
613 fprintf(stderr, "More temporaries freed than allocated!\n");
617 assert(idx >= s->nb_globals && idx < s->nb_temps);
619 assert(ts->temp_allocated != 0);
620 ts->temp_allocated = 0;
622 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
623 set_bit(idx, s->free_temps[k].l);
626 void tcg_temp_free_i32(TCGv_i32 arg)
628 tcg_temp_free_internal(GET_TCGV_I32(arg));
631 void tcg_temp_free_i64(TCGv_i64 arg)
633 tcg_temp_free_internal(GET_TCGV_I64(arg));
636 TCGv_i32 tcg_const_i32(int32_t val)
639 t0 = tcg_temp_new_i32();
640 tcg_gen_movi_i32(t0, val);
644 TCGv_i64 tcg_const_i64(int64_t val)
647 t0 = tcg_temp_new_i64();
648 tcg_gen_movi_i64(t0, val);
652 TCGv_i32 tcg_const_local_i32(int32_t val)
655 t0 = tcg_temp_local_new_i32();
656 tcg_gen_movi_i32(t0, val);
660 TCGv_i64 tcg_const_local_i64(int64_t val)
663 t0 = tcg_temp_local_new_i64();
664 tcg_gen_movi_i64(t0, val);
668 #if defined(CONFIG_DEBUG_TCG)
669 void tcg_clear_temp_count(void)
671 TCGContext *s = &tcg_ctx;
675 int tcg_check_temp_count(void)
677 TCGContext *s = &tcg_ctx;
678 if (s->temps_in_use) {
679 /* Clear the count so that we don't give another
680 * warning immediately next time around.
689 /* Note: we convert the 64 bit args to 32 bit and do some alignment
690 and endian swap. Maybe it would be better to do the alignment
691 and endian swap in tcg_reg_alloc_call(). */
692 void tcg_gen_callN(TCGContext *s, void *func, unsigned int flags,
693 int sizemask, TCGArg ret, int nargs, TCGArg *args)
700 #if defined(__sparc__) && !defined(__arch64__) \
701 && !defined(CONFIG_TCG_INTERPRETER)
702 /* We have 64-bit values in one register, but need to pass as two
703 separate parameters. Split them. */
704 int orig_sizemask = sizemask;
705 int orig_nargs = nargs;
708 TCGV_UNUSED_I64(retl);
709 TCGV_UNUSED_I64(reth);
711 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
712 for (i = real_args = 0; i < nargs; ++i) {
713 int is_64bit = sizemask & (1 << (i+1)*2);
715 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
716 TCGv_i32 h = tcg_temp_new_i32();
717 TCGv_i32 l = tcg_temp_new_i32();
718 tcg_gen_extr_i64_i32(l, h, orig);
719 split_args[real_args++] = GET_TCGV_I32(h);
720 split_args[real_args++] = GET_TCGV_I32(l);
722 split_args[real_args++] = args[i];
729 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
730 for (i = 0; i < nargs; ++i) {
731 int is_64bit = sizemask & (1 << (i+1)*2);
732 int is_signed = sizemask & (2 << (i+1)*2);
734 TCGv_i64 temp = tcg_temp_new_i64();
735 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
737 tcg_gen_ext32s_i64(temp, orig);
739 tcg_gen_ext32u_i64(temp, orig);
741 args[i] = GET_TCGV_I64(temp);
744 #endif /* TCG_TARGET_EXTEND_ARGS */
746 *s->gen_opc_ptr++ = INDEX_op_call;
747 nparam = s->gen_opparam_ptr++;
748 if (ret != TCG_CALL_DUMMY_ARG) {
749 #if defined(__sparc__) && !defined(__arch64__) \
750 && !defined(CONFIG_TCG_INTERPRETER)
751 if (orig_sizemask & 1) {
752 /* The 32-bit ABI is going to return the 64-bit value in
753 the %o0/%o1 register pair. Prepare for this by using
754 two return temporaries, and reassemble below. */
755 retl = tcg_temp_new_i64();
756 reth = tcg_temp_new_i64();
757 *s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
758 *s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
761 *s->gen_opparam_ptr++ = ret;
765 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
766 #ifdef HOST_WORDS_BIGENDIAN
767 *s->gen_opparam_ptr++ = ret + 1;
768 *s->gen_opparam_ptr++ = ret;
770 *s->gen_opparam_ptr++ = ret;
771 *s->gen_opparam_ptr++ = ret + 1;
775 *s->gen_opparam_ptr++ = ret;
783 for (i = 0; i < nargs; i++) {
784 #if TCG_TARGET_REG_BITS < 64
785 int is_64bit = sizemask & (1 << (i+1)*2);
787 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
788 /* some targets want aligned 64 bit args */
790 *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
794 /* If stack grows up, then we will be placing successive
795 arguments at lower addresses, which means we need to
796 reverse the order compared to how we would normally
797 treat either big or little-endian. For those arguments
798 that will wind up in registers, this still works for
799 HPPA (the only current STACK_GROWSUP target) since the
800 argument registers are *also* allocated in decreasing
801 order. If another such target is added, this logic may
802 have to get more complicated to differentiate between
803 stack arguments and register arguments. */
804 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
805 *s->gen_opparam_ptr++ = args[i] + 1;
806 *s->gen_opparam_ptr++ = args[i];
808 *s->gen_opparam_ptr++ = args[i];
809 *s->gen_opparam_ptr++ = args[i] + 1;
814 #endif /* TCG_TARGET_REG_BITS < 64 */
816 *s->gen_opparam_ptr++ = args[i];
819 *s->gen_opparam_ptr++ = (uintptr_t)func;
820 *s->gen_opparam_ptr++ = flags;
822 *nparam = (nb_rets << 16) | real_args;
824 /* total parameters, needed to go backward in the instruction stream */
825 *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
827 #if defined(__sparc__) && !defined(__arch64__) \
828 && !defined(CONFIG_TCG_INTERPRETER)
829 /* Free all of the parts we allocated above. */
830 for (i = real_args = 0; i < orig_nargs; ++i) {
831 int is_64bit = orig_sizemask & (1 << (i+1)*2);
833 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
834 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
835 tcg_temp_free_i32(h);
836 tcg_temp_free_i32(l);
841 if (orig_sizemask & 1) {
842 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
843 Note that describing these as TCGv_i64 eliminates an unnecessary
844 zero-extension that tcg_gen_concat_i32_i64 would create. */
845 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
846 tcg_temp_free_i64(retl);
847 tcg_temp_free_i64(reth);
849 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
850 for (i = 0; i < nargs; ++i) {
851 int is_64bit = sizemask & (1 << (i+1)*2);
853 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
854 tcg_temp_free_i64(temp);
857 #endif /* TCG_TARGET_EXTEND_ARGS */
860 #if TCG_TARGET_REG_BITS == 32
861 void tcg_gen_shifti_i64(TCGv_i64 ret, TCGv_i64 arg1,
862 int c, int right, int arith)
865 tcg_gen_mov_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
866 tcg_gen_mov_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
867 } else if (c >= 32) {
871 tcg_gen_sari_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
872 tcg_gen_sari_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), 31);
874 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_HIGH(arg1), c);
875 tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
878 tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_LOW(arg1), c);
879 tcg_gen_movi_i32(TCGV_LOW(ret), 0);
884 t0 = tcg_temp_new_i32();
885 t1 = tcg_temp_new_i32();
887 tcg_gen_shli_i32(t0, TCGV_HIGH(arg1), 32 - c);
889 tcg_gen_sari_i32(t1, TCGV_HIGH(arg1), c);
891 tcg_gen_shri_i32(t1, TCGV_HIGH(arg1), c);
892 tcg_gen_shri_i32(TCGV_LOW(ret), TCGV_LOW(arg1), c);
893 tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t0);
894 tcg_gen_mov_i32(TCGV_HIGH(ret), t1);
896 tcg_gen_shri_i32(t0, TCGV_LOW(arg1), 32 - c);
897 /* Note: ret can be the same as arg1, so we use t1 */
898 tcg_gen_shli_i32(t1, TCGV_LOW(arg1), c);
899 tcg_gen_shli_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1), c);
900 tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t0);
901 tcg_gen_mov_i32(TCGV_LOW(ret), t1);
903 tcg_temp_free_i32(t0);
904 tcg_temp_free_i32(t1);
909 static inline TCGMemOp tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
911 switch (op & MO_SIZE) {
934 static const TCGOpcode old_ld_opc[8] = {
935 [MO_UB] = INDEX_op_qemu_ld8u,
936 [MO_SB] = INDEX_op_qemu_ld8s,
937 [MO_UW] = INDEX_op_qemu_ld16u,
938 [MO_SW] = INDEX_op_qemu_ld16s,
939 #if TCG_TARGET_REG_BITS == 32
940 [MO_UL] = INDEX_op_qemu_ld32,
941 [MO_SL] = INDEX_op_qemu_ld32,
943 [MO_UL] = INDEX_op_qemu_ld32u,
944 [MO_SL] = INDEX_op_qemu_ld32s,
946 [MO_Q] = INDEX_op_qemu_ld64,
949 static const TCGOpcode old_st_opc[4] = {
950 [MO_UB] = INDEX_op_qemu_st8,
951 [MO_UW] = INDEX_op_qemu_st16,
952 [MO_UL] = INDEX_op_qemu_st32,
953 [MO_Q] = INDEX_op_qemu_st64,
956 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
958 memop = tcg_canonicalize_memop(memop, 0, 0);
960 if (TCG_TARGET_HAS_new_ldst) {
961 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
962 tcg_add_param_i32(val);
963 tcg_add_param_tl(addr);
964 *tcg_ctx.gen_opparam_ptr++ = memop;
965 *tcg_ctx.gen_opparam_ptr++ = idx;
969 /* The old opcodes only support target-endian memory operations. */
970 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
971 assert(old_ld_opc[memop & MO_SSIZE] != 0);
973 if (TCG_TARGET_REG_BITS == 32) {
974 *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
975 tcg_add_param_i32(val);
976 tcg_add_param_tl(addr);
977 *tcg_ctx.gen_opparam_ptr++ = idx;
979 TCGv_i64 val64 = tcg_temp_new_i64();
981 *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
982 tcg_add_param_i64(val64);
983 tcg_add_param_tl(addr);
984 *tcg_ctx.gen_opparam_ptr++ = idx;
986 tcg_gen_trunc_i64_i32(val, val64);
987 tcg_temp_free_i64(val64);
991 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
993 memop = tcg_canonicalize_memop(memop, 0, 1);
995 if (TCG_TARGET_HAS_new_ldst) {
996 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
997 tcg_add_param_i32(val);
998 tcg_add_param_tl(addr);
999 *tcg_ctx.gen_opparam_ptr++ = memop;
1000 *tcg_ctx.gen_opparam_ptr++ = idx;
1004 /* The old opcodes only support target-endian memory operations. */
1005 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1006 assert(old_st_opc[memop & MO_SIZE] != 0);
1008 if (TCG_TARGET_REG_BITS == 32) {
1009 *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1010 tcg_add_param_i32(val);
1011 tcg_add_param_tl(addr);
1012 *tcg_ctx.gen_opparam_ptr++ = idx;
1014 TCGv_i64 val64 = tcg_temp_new_i64();
1016 tcg_gen_extu_i32_i64(val64, val);
1018 *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1019 tcg_add_param_i64(val64);
1020 tcg_add_param_tl(addr);
1021 *tcg_ctx.gen_opparam_ptr++ = idx;
1023 tcg_temp_free_i64(val64);
1027 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
1029 memop = tcg_canonicalize_memop(memop, 1, 0);
1031 #if TCG_TARGET_REG_BITS == 32
1032 if ((memop & MO_SIZE) < MO_64) {
1033 tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
1034 if (memop & MO_SIGN) {
1035 tcg_gen_sari_i32(TCGV_HIGH(val), TCGV_LOW(val), 31);
1037 tcg_gen_movi_i32(TCGV_HIGH(val), 0);
1043 if (TCG_TARGET_HAS_new_ldst) {
1044 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
1045 tcg_add_param_i64(val);
1046 tcg_add_param_tl(addr);
1047 *tcg_ctx.gen_opparam_ptr++ = memop;
1048 *tcg_ctx.gen_opparam_ptr++ = idx;
1052 /* The old opcodes only support target-endian memory operations. */
1053 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1054 assert(old_ld_opc[memop & MO_SSIZE] != 0);
1056 *tcg_ctx.gen_opc_ptr++ = old_ld_opc[memop & MO_SSIZE];
1057 tcg_add_param_i64(val);
1058 tcg_add_param_tl(addr);
1059 *tcg_ctx.gen_opparam_ptr++ = idx;
1062 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
1064 memop = tcg_canonicalize_memop(memop, 1, 1);
1066 #if TCG_TARGET_REG_BITS == 32
1067 if ((memop & MO_SIZE) < MO_64) {
1068 tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
1073 if (TCG_TARGET_HAS_new_ldst) {
1074 *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
1075 tcg_add_param_i64(val);
1076 tcg_add_param_tl(addr);
1077 *tcg_ctx.gen_opparam_ptr++ = memop;
1078 *tcg_ctx.gen_opparam_ptr++ = idx;
1082 /* The old opcodes only support target-endian memory operations. */
1083 assert((memop & MO_BSWAP) == MO_TE || (memop & MO_SIZE) == MO_8);
1084 assert(old_st_opc[memop & MO_SIZE] != 0);
1086 *tcg_ctx.gen_opc_ptr++ = old_st_opc[memop & MO_SIZE];
1087 tcg_add_param_i64(val);
1088 tcg_add_param_tl(addr);
1089 *tcg_ctx.gen_opparam_ptr++ = idx;
1092 static void tcg_reg_alloc_start(TCGContext *s)
1096 for(i = 0; i < s->nb_globals; i++) {
1098 if (ts->fixed_reg) {
1099 ts->val_type = TEMP_VAL_REG;
1101 ts->val_type = TEMP_VAL_MEM;
1104 for(i = s->nb_globals; i < s->nb_temps; i++) {
1106 if (ts->temp_local) {
1107 ts->val_type = TEMP_VAL_MEM;
1109 ts->val_type = TEMP_VAL_DEAD;
1111 ts->mem_allocated = 0;
1114 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1115 s->reg_to_temp[i] = -1;
1119 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
1124 assert(idx >= 0 && idx < s->nb_temps);
1125 ts = &s->temps[idx];
1126 if (idx < s->nb_globals) {
1127 pstrcpy(buf, buf_size, ts->name);
1130 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1132 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1137 char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg)
1139 return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I32(arg));
1142 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg)
1144 return tcg_get_arg_str_idx(s, buf, buf_size, GET_TCGV_I64(arg));
1147 /* Find helper name. */
1148 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1150 const char *ret = NULL;
1152 ret = g_hash_table_lookup(s->helpers, (gpointer)val);
1157 static const char * const cond_name[] =
1159 [TCG_COND_NEVER] = "never",
1160 [TCG_COND_ALWAYS] = "always",
1161 [TCG_COND_EQ] = "eq",
1162 [TCG_COND_NE] = "ne",
1163 [TCG_COND_LT] = "lt",
1164 [TCG_COND_GE] = "ge",
1165 [TCG_COND_LE] = "le",
1166 [TCG_COND_GT] = "gt",
1167 [TCG_COND_LTU] = "ltu",
1168 [TCG_COND_GEU] = "geu",
1169 [TCG_COND_LEU] = "leu",
1170 [TCG_COND_GTU] = "gtu"
1173 static const char * const ldst_name[] =
1189 void tcg_dump_ops(TCGContext *s)
1191 const uint16_t *opc_ptr;
1195 int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
1196 const TCGOpDef *def;
1200 opc_ptr = s->gen_opc_buf;
1201 args = s->gen_opparam_buf;
1202 while (opc_ptr < s->gen_opc_ptr) {
1204 def = &tcg_op_defs[c];
1205 if (c == INDEX_op_debug_insn_start) {
1207 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1208 pc = ((uint64_t)args[1] << 32) | args[0];
1215 qemu_log(" ---- 0x%" PRIx64, pc);
1217 nb_oargs = def->nb_oargs;
1218 nb_iargs = def->nb_iargs;
1219 nb_cargs = def->nb_cargs;
1220 } else if (c == INDEX_op_call) {
1223 /* variable number of arguments */
1225 nb_oargs = arg >> 16;
1226 nb_iargs = arg & 0xffff;
1227 nb_cargs = def->nb_cargs;
1229 /* function name, flags, out args */
1230 qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1231 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1232 args[nb_oargs + nb_iargs + 1], nb_oargs);
1233 for (i = 0; i < nb_oargs; i++) {
1234 qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1237 for (i = 0; i < nb_iargs; i++) {
1238 TCGArg arg = args[nb_oargs + i];
1239 const char *t = "<dummy>";
1240 if (arg != TCG_CALL_DUMMY_ARG) {
1241 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1246 qemu_log(" %s ", def->name);
1247 if (c == INDEX_op_nopn) {
1248 /* variable number of arguments */
1253 nb_oargs = def->nb_oargs;
1254 nb_iargs = def->nb_iargs;
1255 nb_cargs = def->nb_cargs;
1259 for(i = 0; i < nb_oargs; i++) {
1263 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1266 for(i = 0; i < nb_iargs; i++) {
1270 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1274 case INDEX_op_brcond_i32:
1275 case INDEX_op_setcond_i32:
1276 case INDEX_op_movcond_i32:
1277 case INDEX_op_brcond2_i32:
1278 case INDEX_op_setcond2_i32:
1279 case INDEX_op_brcond_i64:
1280 case INDEX_op_setcond_i64:
1281 case INDEX_op_movcond_i64:
1282 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1283 qemu_log(",%s", cond_name[args[k++]]);
1285 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1289 case INDEX_op_qemu_ld_i32:
1290 case INDEX_op_qemu_st_i32:
1291 case INDEX_op_qemu_ld_i64:
1292 case INDEX_op_qemu_st_i64:
1293 if (args[k] < ARRAY_SIZE(ldst_name) && ldst_name[args[k]]) {
1294 qemu_log(",%s", ldst_name[args[k++]]);
1296 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1304 for(; i < nb_cargs; i++) {
1309 qemu_log("$0x%" TCG_PRIlx, arg);
1313 args += nb_iargs + nb_oargs + nb_cargs;
1317 /* we give more priority to constraints with less registers */
1318 static int get_constraint_priority(const TCGOpDef *def, int k)
1320 const TCGArgConstraint *arg_ct;
1323 arg_ct = &def->args_ct[k];
1324 if (arg_ct->ct & TCG_CT_ALIAS) {
1325 /* an alias is equivalent to a single register */
1328 if (!(arg_ct->ct & TCG_CT_REG))
1331 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1332 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1336 return TCG_TARGET_NB_REGS - n + 1;
1339 /* sort from highest priority to lowest */
1340 static void sort_constraints(TCGOpDef *def, int start, int n)
1342 int i, j, p1, p2, tmp;
1344 for(i = 0; i < n; i++)
1345 def->sorted_args[start + i] = start + i;
1348 for(i = 0; i < n - 1; i++) {
1349 for(j = i + 1; j < n; j++) {
1350 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1351 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1353 tmp = def->sorted_args[start + i];
1354 def->sorted_args[start + i] = def->sorted_args[start + j];
1355 def->sorted_args[start + j] = tmp;
1361 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1369 if (tdefs->op == (TCGOpcode)-1)
1372 assert((unsigned)op < NB_OPS);
1373 def = &tcg_op_defs[op];
1374 #if defined(CONFIG_DEBUG_TCG)
1375 /* Duplicate entry in op definitions? */
1379 nb_args = def->nb_iargs + def->nb_oargs;
1380 for(i = 0; i < nb_args; i++) {
1381 ct_str = tdefs->args_ct_str[i];
1382 /* Incomplete TCGTargetOpDef entry? */
1383 assert(ct_str != NULL);
1384 tcg_regset_clear(def->args_ct[i].u.regs);
1385 def->args_ct[i].ct = 0;
1386 if (ct_str[0] >= '0' && ct_str[0] <= '9') {
1388 oarg = ct_str[0] - '0';
1389 assert(oarg < def->nb_oargs);
1390 assert(def->args_ct[oarg].ct & TCG_CT_REG);
1391 /* TCG_CT_ALIAS is for the output arguments. The input
1392 argument is tagged with TCG_CT_IALIAS. */
1393 def->args_ct[i] = def->args_ct[oarg];
1394 def->args_ct[oarg].ct = TCG_CT_ALIAS;
1395 def->args_ct[oarg].alias_index = i;
1396 def->args_ct[i].ct |= TCG_CT_IALIAS;
1397 def->args_ct[i].alias_index = oarg;
1400 if (*ct_str == '\0')
1404 def->args_ct[i].ct |= TCG_CT_CONST;
1408 if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
1409 fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1410 ct_str, i, def->name);
1418 /* TCGTargetOpDef entry with too much information? */
1419 assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1421 /* sort the constraints (XXX: this is just an heuristic) */
1422 sort_constraints(def, 0, def->nb_oargs);
1423 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1429 printf("%s: sorted=", def->name);
1430 for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
1431 printf(" %d", def->sorted_args[i]);
1438 #if defined(CONFIG_DEBUG_TCG)
1440 for (op = 0; op < ARRAY_SIZE(tcg_op_defs); op++) {
1441 const TCGOpDef *def = &tcg_op_defs[op];
1442 if (def->flags & TCG_OPF_NOT_PRESENT) {
1443 /* Wrong entry in op definitions? */
1445 fprintf(stderr, "Invalid op definition for %s\n", def->name);
1449 /* Missing entry in op definitions? */
1451 fprintf(stderr, "Missing op definition for %s\n", def->name);
1462 #ifdef USE_LIVENESS_ANALYSIS
1464 /* set a nop for an operation using 'nb_args' */
1465 static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
1466 TCGArg *args, int nb_args)
1469 *opc_ptr = INDEX_op_nop;
1471 *opc_ptr = INDEX_op_nopn;
1473 args[nb_args - 1] = nb_args;
1477 /* liveness analysis: end of function: all temps are dead, and globals
1478 should be in memory. */
1479 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
1482 memset(dead_temps, 1, s->nb_temps);
1483 memset(mem_temps, 1, s->nb_globals);
1484 memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
1487 /* liveness analysis: end of basic block: all temps are dead, globals
1488 and local temps should be in memory. */
1489 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
1494 memset(dead_temps, 1, s->nb_temps);
1495 memset(mem_temps, 1, s->nb_globals);
1496 for(i = s->nb_globals; i < s->nb_temps; i++) {
1497 mem_temps[i] = s->temps[i].temp_local;
1501 /* Liveness analysis : update the opc_dead_args array to tell if a
1502 given input arguments is dead. Instructions updating dead
1503 temporaries are removed. */
1504 static void tcg_liveness_analysis(TCGContext *s)
1506 int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
1507 TCGOpcode op, op_new, op_new2;
1509 const TCGOpDef *def;
1510 uint8_t *dead_temps, *mem_temps;
1515 s->gen_opc_ptr++; /* skip end */
1517 nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
1519 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1520 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1522 dead_temps = tcg_malloc(s->nb_temps);
1523 mem_temps = tcg_malloc(s->nb_temps);
1524 tcg_la_func_end(s, dead_temps, mem_temps);
1526 args = s->gen_opparam_ptr;
1527 op_index = nb_ops - 1;
1528 while (op_index >= 0) {
1529 op = s->gen_opc_buf[op_index];
1530 def = &tcg_op_defs[op];
1539 nb_iargs = arg & 0xffff;
1540 nb_oargs = arg >> 16;
1541 call_flags = args[nb_oargs + nb_iargs + 1];
1543 /* pure functions can be removed if their result is not
1545 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1546 for (i = 0; i < nb_oargs; i++) {
1548 if (!dead_temps[arg] || mem_temps[arg]) {
1549 goto do_not_remove_call;
1552 tcg_set_nop(s, s->gen_opc_buf + op_index,
1557 /* output args are dead */
1560 for (i = 0; i < nb_oargs; i++) {
1562 if (dead_temps[arg]) {
1563 dead_args |= (1 << i);
1565 if (mem_temps[arg]) {
1566 sync_args |= (1 << i);
1568 dead_temps[arg] = 1;
1572 if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1573 /* globals should be synced to memory */
1574 memset(mem_temps, 1, s->nb_globals);
1576 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1577 TCG_CALL_NO_READ_GLOBALS))) {
1578 /* globals should go back to memory */
1579 memset(dead_temps, 1, s->nb_globals);
1582 /* input args are live */
1583 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1585 if (arg != TCG_CALL_DUMMY_ARG) {
1586 if (dead_temps[arg]) {
1587 dead_args |= (1 << i);
1589 dead_temps[arg] = 0;
1592 s->op_dead_args[op_index] = dead_args;
1593 s->op_sync_args[op_index] = sync_args;
1598 case INDEX_op_debug_insn_start:
1599 args -= def->nb_args;
1605 case INDEX_op_discard:
1607 /* mark the temporary as dead */
1608 dead_temps[args[0]] = 1;
1609 mem_temps[args[0]] = 0;
1614 case INDEX_op_add2_i32:
1615 op_new = INDEX_op_add_i32;
1617 case INDEX_op_sub2_i32:
1618 op_new = INDEX_op_sub_i32;
1620 case INDEX_op_add2_i64:
1621 op_new = INDEX_op_add_i64;
1623 case INDEX_op_sub2_i64:
1624 op_new = INDEX_op_sub_i64;
1629 /* Test if the high part of the operation is dead, but not
1630 the low part. The result can be optimized to a simple
1631 add or sub. This happens often for x86_64 guest when the
1632 cpu mode is set to 32 bit. */
1633 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1634 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1637 /* Create the single operation plus nop. */
1638 s->gen_opc_buf[op_index] = op = op_new;
1641 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1642 tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
1643 /* Fall through and mark the single-word operation live. */
1649 case INDEX_op_mulu2_i32:
1650 op_new = INDEX_op_mul_i32;
1651 op_new2 = INDEX_op_muluh_i32;
1652 have_op_new2 = TCG_TARGET_HAS_muluh_i32;
1654 case INDEX_op_muls2_i32:
1655 op_new = INDEX_op_mul_i32;
1656 op_new2 = INDEX_op_mulsh_i32;
1657 have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
1659 case INDEX_op_mulu2_i64:
1660 op_new = INDEX_op_mul_i64;
1661 op_new2 = INDEX_op_muluh_i64;
1662 have_op_new2 = TCG_TARGET_HAS_muluh_i64;
1664 case INDEX_op_muls2_i64:
1665 op_new = INDEX_op_mul_i64;
1666 op_new2 = INDEX_op_mulsh_i64;
1667 have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
1673 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1674 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1675 /* Both parts of the operation are dead. */
1678 /* The high part of the operation is dead; generate the low. */
1679 s->gen_opc_buf[op_index] = op = op_new;
1682 } else if (have_op_new2 && dead_temps[args[0]]
1683 && !mem_temps[args[0]]) {
1684 /* The low part of the operation is dead; generate the high. */
1685 s->gen_opc_buf[op_index] = op = op_new2;
1692 assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
1693 tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
1694 /* Mark the single-word operation live. */
1699 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1700 args -= def->nb_args;
1701 nb_iargs = def->nb_iargs;
1702 nb_oargs = def->nb_oargs;
1704 /* Test if the operation can be removed because all
1705 its outputs are dead. We assume that nb_oargs == 0
1706 implies side effects */
1707 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1708 for(i = 0; i < nb_oargs; i++) {
1710 if (!dead_temps[arg] || mem_temps[arg]) {
1715 tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
1716 #ifdef CONFIG_PROFILER
1722 /* output args are dead */
1725 for(i = 0; i < nb_oargs; i++) {
1727 if (dead_temps[arg]) {
1728 dead_args |= (1 << i);
1730 if (mem_temps[arg]) {
1731 sync_args |= (1 << i);
1733 dead_temps[arg] = 1;
1737 /* if end of basic block, update */
1738 if (def->flags & TCG_OPF_BB_END) {
1739 tcg_la_bb_end(s, dead_temps, mem_temps);
1740 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1741 /* globals should be synced to memory */
1742 memset(mem_temps, 1, s->nb_globals);
1745 /* input args are live */
1746 for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1748 if (dead_temps[arg]) {
1749 dead_args |= (1 << i);
1751 dead_temps[arg] = 0;
1753 s->op_dead_args[op_index] = dead_args;
1754 s->op_sync_args[op_index] = sync_args;
1761 if (args != s->gen_opparam_buf) {
1766 /* dummy liveness analysis */
1767 static void tcg_liveness_analysis(TCGContext *s)
1770 nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
1772 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1773 memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
1774 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1775 memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
1780 static void dump_regs(TCGContext *s)
1786 for(i = 0; i < s->nb_temps; i++) {
1788 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1789 switch(ts->val_type) {
1791 printf("%s", tcg_target_reg_names[ts->reg]);
1794 printf("%d(%s)", (int)ts->mem_offset, tcg_target_reg_names[ts->mem_reg]);
1796 case TEMP_VAL_CONST:
1797 printf("$0x%" TCG_PRIlx, ts->val);
1809 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1810 if (s->reg_to_temp[i] >= 0) {
1812 tcg_target_reg_names[i],
1813 tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
1818 static void check_regs(TCGContext *s)
1824 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1825 k = s->reg_to_temp[reg];
1828 if (ts->val_type != TEMP_VAL_REG ||
1830 printf("Inconsistency for register %s:\n",
1831 tcg_target_reg_names[reg]);
1836 for(k = 0; k < s->nb_temps; k++) {
1838 if (ts->val_type == TEMP_VAL_REG &&
1840 s->reg_to_temp[ts->reg] != k) {
1841 printf("Inconsistency for temp %s:\n",
1842 tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
1844 printf("reg state:\n");
1852 static void temp_allocate_frame(TCGContext *s, int temp)
1855 ts = &s->temps[temp];
1856 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1857 /* Sparc64 stack is accessed with offset of 2047 */
1858 s->current_frame_offset = (s->current_frame_offset +
1859 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1860 ~(sizeof(tcg_target_long) - 1);
1862 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1866 ts->mem_offset = s->current_frame_offset;
1867 ts->mem_reg = s->frame_reg;
1868 ts->mem_allocated = 1;
1869 s->current_frame_offset += sizeof(tcg_target_long);
1872 /* sync register 'reg' by saving it to the corresponding temporary */
1873 static inline void tcg_reg_sync(TCGContext *s, int reg)
1878 temp = s->reg_to_temp[reg];
1879 ts = &s->temps[temp];
1880 assert(ts->val_type == TEMP_VAL_REG);
1881 if (!ts->mem_coherent && !ts->fixed_reg) {
1882 if (!ts->mem_allocated) {
1883 temp_allocate_frame(s, temp);
1885 tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
1887 ts->mem_coherent = 1;
1890 /* free register 'reg' by spilling the corresponding temporary if necessary */
1891 static void tcg_reg_free(TCGContext *s, int reg)
1895 temp = s->reg_to_temp[reg];
1897 tcg_reg_sync(s, reg);
1898 s->temps[temp].val_type = TEMP_VAL_MEM;
1899 s->reg_to_temp[reg] = -1;
1903 /* Allocate a register belonging to reg1 & ~reg2 */
1904 static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
1909 tcg_regset_andnot(reg_ct, reg1, reg2);
1911 /* first try free registers */
1912 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1913 reg = tcg_target_reg_alloc_order[i];
1914 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1)
1918 /* XXX: do better spill choice */
1919 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1920 reg = tcg_target_reg_alloc_order[i];
1921 if (tcg_regset_test_reg(reg_ct, reg)) {
1922 tcg_reg_free(s, reg);
1930 /* mark a temporary as dead. */
1931 static inline void temp_dead(TCGContext *s, int temp)
1935 ts = &s->temps[temp];
1936 if (!ts->fixed_reg) {
1937 if (ts->val_type == TEMP_VAL_REG) {
1938 s->reg_to_temp[ts->reg] = -1;
1940 if (temp < s->nb_globals || ts->temp_local) {
1941 ts->val_type = TEMP_VAL_MEM;
1943 ts->val_type = TEMP_VAL_DEAD;
1948 /* sync a temporary to memory. 'allocated_regs' is used in case a
1949 temporary registers needs to be allocated to store a constant. */
1950 static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
1954 ts = &s->temps[temp];
1955 if (!ts->fixed_reg) {
1956 switch(ts->val_type) {
1957 case TEMP_VAL_CONST:
1958 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
1960 ts->val_type = TEMP_VAL_REG;
1961 s->reg_to_temp[ts->reg] = temp;
1962 ts->mem_coherent = 0;
1963 tcg_out_movi(s, ts->type, ts->reg, ts->val);
1966 tcg_reg_sync(s, ts->reg);
1977 /* save a temporary to memory. 'allocated_regs' is used in case a
1978 temporary registers needs to be allocated to store a constant. */
1979 static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
1981 #ifdef USE_LIVENESS_ANALYSIS
1982 /* The liveness analysis already ensures that globals are back
1983 in memory. Keep an assert for safety. */
1984 assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
1986 temp_sync(s, temp, allocated_regs);
1991 /* save globals to their canonical location and assume they can be
1992 modified be the following code. 'allocated_regs' is used in case a
1993 temporary registers needs to be allocated to store a constant. */
1994 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
1998 for(i = 0; i < s->nb_globals; i++) {
1999 temp_save(s, i, allocated_regs);
2003 /* sync globals to their canonical location and assume they can be
2004 read by the following code. 'allocated_regs' is used in case a
2005 temporary registers needs to be allocated to store a constant. */
2006 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2010 for (i = 0; i < s->nb_globals; i++) {
2011 #ifdef USE_LIVENESS_ANALYSIS
2012 assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
2013 s->temps[i].mem_coherent);
2015 temp_sync(s, i, allocated_regs);
2020 /* at the end of a basic block, we assume all temporaries are dead and
2021 all globals are stored at their canonical location. */
2022 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2027 for(i = s->nb_globals; i < s->nb_temps; i++) {
2029 if (ts->temp_local) {
2030 temp_save(s, i, allocated_regs);
2032 #ifdef USE_LIVENESS_ANALYSIS
2033 /* The liveness analysis already ensures that temps are dead.
2034 Keep an assert for safety. */
2035 assert(ts->val_type == TEMP_VAL_DEAD);
2042 save_globals(s, allocated_regs);
2045 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
2046 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
2048 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
2049 uint16_t dead_args, uint8_t sync_args)
2052 tcg_target_ulong val;
2054 ots = &s->temps[args[0]];
2057 if (ots->fixed_reg) {
2058 /* for fixed registers, we do not do any constant
2060 tcg_out_movi(s, ots->type, ots->reg, val);
2062 /* The movi is not explicitly generated here */
2063 if (ots->val_type == TEMP_VAL_REG)
2064 s->reg_to_temp[ots->reg] = -1;
2065 ots->val_type = TEMP_VAL_CONST;
2068 if (NEED_SYNC_ARG(0)) {
2069 temp_sync(s, args[0], s->reserved_regs);
2071 if (IS_DEAD_ARG(0)) {
2072 temp_dead(s, args[0]);
2076 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
2077 const TCGArg *args, uint16_t dead_args,
2080 TCGRegSet allocated_regs;
2082 TCGType otype, itype;
2084 tcg_regset_set(allocated_regs, s->reserved_regs);
2085 ots = &s->temps[args[0]];
2086 ts = &s->temps[args[1]];
2088 /* Note that otype != itype for no-op truncation. */
2092 /* If the source value is not in a register, and we're going to be
2093 forced to have it in a register in order to perform the copy,
2094 then copy the SOURCE value into its own register first. That way
2095 we don't have to reload SOURCE the next time it is used. */
2096 if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
2097 || ts->val_type == TEMP_VAL_MEM) {
2098 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype],
2100 if (ts->val_type == TEMP_VAL_MEM) {
2101 tcg_out_ld(s, itype, ts->reg, ts->mem_reg, ts->mem_offset);
2102 ts->mem_coherent = 1;
2103 } else if (ts->val_type == TEMP_VAL_CONST) {
2104 tcg_out_movi(s, itype, ts->reg, ts->val);
2106 s->reg_to_temp[ts->reg] = args[1];
2107 ts->val_type = TEMP_VAL_REG;
2110 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2111 /* mov to a non-saved dead register makes no sense (even with
2112 liveness analysis disabled). */
2113 assert(NEED_SYNC_ARG(0));
2114 /* The code above should have moved the temp to a register. */
2115 assert(ts->val_type == TEMP_VAL_REG);
2116 if (!ots->mem_allocated) {
2117 temp_allocate_frame(s, args[0]);
2119 tcg_out_st(s, otype, ts->reg, ots->mem_reg, ots->mem_offset);
2120 if (IS_DEAD_ARG(1)) {
2121 temp_dead(s, args[1]);
2123 temp_dead(s, args[0]);
2124 } else if (ts->val_type == TEMP_VAL_CONST) {
2125 /* propagate constant */
2126 if (ots->val_type == TEMP_VAL_REG) {
2127 s->reg_to_temp[ots->reg] = -1;
2129 ots->val_type = TEMP_VAL_CONST;
2132 /* The code in the first if block should have moved the
2133 temp to a register. */
2134 assert(ts->val_type == TEMP_VAL_REG);
2135 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2136 /* the mov can be suppressed */
2137 if (ots->val_type == TEMP_VAL_REG) {
2138 s->reg_to_temp[ots->reg] = -1;
2141 temp_dead(s, args[1]);
2143 if (ots->val_type != TEMP_VAL_REG) {
2144 /* When allocating a new register, make sure to not spill the
2146 tcg_regset_set_reg(allocated_regs, ts->reg);
2147 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2150 tcg_out_mov(s, otype, ots->reg, ts->reg);
2152 ots->val_type = TEMP_VAL_REG;
2153 ots->mem_coherent = 0;
2154 s->reg_to_temp[ots->reg] = args[0];
2155 if (NEED_SYNC_ARG(0)) {
2156 tcg_reg_sync(s, ots->reg);
2161 static void tcg_reg_alloc_op(TCGContext *s,
2162 const TCGOpDef *def, TCGOpcode opc,
2163 const TCGArg *args, uint16_t dead_args,
2166 TCGRegSet allocated_regs;
2167 int i, k, nb_iargs, nb_oargs, reg;
2169 const TCGArgConstraint *arg_ct;
2171 TCGArg new_args[TCG_MAX_OP_ARGS];
2172 int const_args[TCG_MAX_OP_ARGS];
2174 nb_oargs = def->nb_oargs;
2175 nb_iargs = def->nb_iargs;
2177 /* copy constants */
2178 memcpy(new_args + nb_oargs + nb_iargs,
2179 args + nb_oargs + nb_iargs,
2180 sizeof(TCGArg) * def->nb_cargs);
2182 /* satisfy input constraints */
2183 tcg_regset_set(allocated_regs, s->reserved_regs);
2184 for(k = 0; k < nb_iargs; k++) {
2185 i = def->sorted_args[nb_oargs + k];
2187 arg_ct = &def->args_ct[i];
2188 ts = &s->temps[arg];
2189 if (ts->val_type == TEMP_VAL_MEM) {
2190 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2191 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2192 ts->val_type = TEMP_VAL_REG;
2194 ts->mem_coherent = 1;
2195 s->reg_to_temp[reg] = arg;
2196 } else if (ts->val_type == TEMP_VAL_CONST) {
2197 if (tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2198 /* constant is OK for instruction */
2200 new_args[i] = ts->val;
2203 /* need to move to a register */
2204 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2205 tcg_out_movi(s, ts->type, reg, ts->val);
2206 ts->val_type = TEMP_VAL_REG;
2208 ts->mem_coherent = 0;
2209 s->reg_to_temp[reg] = arg;
2212 assert(ts->val_type == TEMP_VAL_REG);
2213 if (arg_ct->ct & TCG_CT_IALIAS) {
2214 if (ts->fixed_reg) {
2215 /* if fixed register, we must allocate a new register
2216 if the alias is not the same register */
2217 if (arg != args[arg_ct->alias_index])
2218 goto allocate_in_reg;
2220 /* if the input is aliased to an output and if it is
2221 not dead after the instruction, we must allocate
2222 a new register and move it */
2223 if (!IS_DEAD_ARG(i)) {
2224 goto allocate_in_reg;
2229 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2230 /* nothing to do : the constraint is satisfied */
2233 /* allocate a new register matching the constraint
2234 and move the temporary register into it */
2235 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2236 tcg_out_mov(s, ts->type, reg, ts->reg);
2240 tcg_regset_set_reg(allocated_regs, reg);
2244 /* mark dead temporaries and free the associated registers */
2245 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2246 if (IS_DEAD_ARG(i)) {
2247 temp_dead(s, args[i]);
2251 if (def->flags & TCG_OPF_BB_END) {
2252 tcg_reg_alloc_bb_end(s, allocated_regs);
2254 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2255 /* XXX: permit generic clobber register list ? */
2256 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2257 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
2258 tcg_reg_free(s, reg);
2262 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2263 /* sync globals if the op has side effects and might trigger
2265 sync_globals(s, allocated_regs);
2268 /* satisfy the output constraints */
2269 tcg_regset_set(allocated_regs, s->reserved_regs);
2270 for(k = 0; k < nb_oargs; k++) {
2271 i = def->sorted_args[k];
2273 arg_ct = &def->args_ct[i];
2274 ts = &s->temps[arg];
2275 if (arg_ct->ct & TCG_CT_ALIAS) {
2276 reg = new_args[arg_ct->alias_index];
2278 /* if fixed register, we try to use it */
2280 if (ts->fixed_reg &&
2281 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2284 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2286 tcg_regset_set_reg(allocated_regs, reg);
2287 /* if a fixed register is used, then a move will be done afterwards */
2288 if (!ts->fixed_reg) {
2289 if (ts->val_type == TEMP_VAL_REG) {
2290 s->reg_to_temp[ts->reg] = -1;
2292 ts->val_type = TEMP_VAL_REG;
2294 /* temp value is modified, so the value kept in memory is
2295 potentially not the same */
2296 ts->mem_coherent = 0;
2297 s->reg_to_temp[reg] = arg;
2304 /* emit instruction */
2305 tcg_out_op(s, opc, new_args, const_args);
2307 /* move the outputs in the correct register if needed */
2308 for(i = 0; i < nb_oargs; i++) {
2309 ts = &s->temps[args[i]];
2311 if (ts->fixed_reg && ts->reg != reg) {
2312 tcg_out_mov(s, ts->type, ts->reg, reg);
2314 if (NEED_SYNC_ARG(i)) {
2315 tcg_reg_sync(s, reg);
2317 if (IS_DEAD_ARG(i)) {
2318 temp_dead(s, args[i]);
2323 #ifdef TCG_TARGET_STACK_GROWSUP
2324 #define STACK_DIR(x) (-(x))
2326 #define STACK_DIR(x) (x)
2329 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
2330 TCGOpcode opc, const TCGArg *args,
2331 uint16_t dead_args, uint8_t sync_args)
2333 int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
2336 intptr_t stack_offset;
2337 size_t call_stack_size;
2338 tcg_insn_unit *func_addr;
2340 TCGRegSet allocated_regs;
2344 nb_oargs = arg >> 16;
2345 nb_iargs = arg & 0xffff;
2346 nb_params = nb_iargs;
2348 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2349 flags = args[nb_oargs + nb_iargs + 1];
2351 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2352 if (nb_regs > nb_params) {
2353 nb_regs = nb_params;
2356 /* assign stack slots first */
2357 call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
2358 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2359 ~(TCG_TARGET_STACK_ALIGN - 1);
2360 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2361 if (allocate_args) {
2362 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2363 preallocate call stack */
2367 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2368 for(i = nb_regs; i < nb_params; i++) {
2369 arg = args[nb_oargs + i];
2370 #ifdef TCG_TARGET_STACK_GROWSUP
2371 stack_offset -= sizeof(tcg_target_long);
2373 if (arg != TCG_CALL_DUMMY_ARG) {
2374 ts = &s->temps[arg];
2375 if (ts->val_type == TEMP_VAL_REG) {
2376 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2377 } else if (ts->val_type == TEMP_VAL_MEM) {
2378 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2380 /* XXX: not correct if reading values from the stack */
2381 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2382 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2383 } else if (ts->val_type == TEMP_VAL_CONST) {
2384 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2386 /* XXX: sign extend may be needed on some targets */
2387 tcg_out_movi(s, ts->type, reg, ts->val);
2388 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2393 #ifndef TCG_TARGET_STACK_GROWSUP
2394 stack_offset += sizeof(tcg_target_long);
2398 /* assign input registers */
2399 tcg_regset_set(allocated_regs, s->reserved_regs);
2400 for(i = 0; i < nb_regs; i++) {
2401 arg = args[nb_oargs + i];
2402 if (arg != TCG_CALL_DUMMY_ARG) {
2403 ts = &s->temps[arg];
2404 reg = tcg_target_call_iarg_regs[i];
2405 tcg_reg_free(s, reg);
2406 if (ts->val_type == TEMP_VAL_REG) {
2407 if (ts->reg != reg) {
2408 tcg_out_mov(s, ts->type, reg, ts->reg);
2410 } else if (ts->val_type == TEMP_VAL_MEM) {
2411 tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset);
2412 } else if (ts->val_type == TEMP_VAL_CONST) {
2413 /* XXX: sign extend ? */
2414 tcg_out_movi(s, ts->type, reg, ts->val);
2418 tcg_regset_set_reg(allocated_regs, reg);
2422 /* mark dead temporaries and free the associated registers */
2423 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2424 if (IS_DEAD_ARG(i)) {
2425 temp_dead(s, args[i]);
2429 /* clobber call registers */
2430 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2431 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, reg)) {
2432 tcg_reg_free(s, reg);
2436 /* Save globals if they might be written by the helper, sync them if
2437 they might be read. */
2438 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2440 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2441 sync_globals(s, allocated_regs);
2443 save_globals(s, allocated_regs);
2446 tcg_out_call(s, func_addr);
2448 /* assign output registers and emit moves if needed */
2449 for(i = 0; i < nb_oargs; i++) {
2451 ts = &s->temps[arg];
2452 reg = tcg_target_call_oarg_regs[i];
2453 assert(s->reg_to_temp[reg] == -1);
2455 if (ts->fixed_reg) {
2456 if (ts->reg != reg) {
2457 tcg_out_mov(s, ts->type, ts->reg, reg);
2460 if (ts->val_type == TEMP_VAL_REG) {
2461 s->reg_to_temp[ts->reg] = -1;
2463 ts->val_type = TEMP_VAL_REG;
2465 ts->mem_coherent = 0;
2466 s->reg_to_temp[reg] = arg;
2467 if (NEED_SYNC_ARG(i)) {
2468 tcg_reg_sync(s, reg);
2470 if (IS_DEAD_ARG(i)) {
2471 temp_dead(s, args[i]);
2476 return nb_iargs + nb_oargs + def->nb_cargs + 1;
2479 #ifdef CONFIG_PROFILER
2481 static int64_t tcg_table_op_count[NB_OPS];
2483 static void dump_op_count(void)
2487 f = fopen("/tmp/op.log", "w");
2488 for(i = INDEX_op_end; i < NB_OPS; i++) {
2489 fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name, tcg_table_op_count[i]);
2496 static inline int tcg_gen_code_common(TCGContext *s,
2497 tcg_insn_unit *gen_code_buf,
2502 const TCGOpDef *def;
2506 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
2513 #ifdef CONFIG_PROFILER
2514 s->opt_time -= profile_getclock();
2517 #ifdef USE_TCG_OPTIMIZATIONS
2518 s->gen_opparam_ptr =
2519 tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
2522 #ifdef CONFIG_PROFILER
2523 s->opt_time += profile_getclock();
2524 s->la_time -= profile_getclock();
2527 tcg_liveness_analysis(s);
2529 #ifdef CONFIG_PROFILER
2530 s->la_time += profile_getclock();
2534 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
2535 qemu_log("OP after optimization and liveness analysis:\n");
2541 tcg_reg_alloc_start(s);
2543 s->code_buf = gen_code_buf;
2544 s->code_ptr = gen_code_buf;
2548 args = s->gen_opparam_buf;
2552 opc = s->gen_opc_buf[op_index];
2553 #ifdef CONFIG_PROFILER
2554 tcg_table_op_count[opc]++;
2556 def = &tcg_op_defs[opc];
2558 printf("%s: %d %d %d\n", def->name,
2559 def->nb_oargs, def->nb_iargs, def->nb_cargs);
2563 case INDEX_op_mov_i32:
2564 case INDEX_op_mov_i64:
2565 tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
2566 s->op_sync_args[op_index]);
2568 case INDEX_op_movi_i32:
2569 case INDEX_op_movi_i64:
2570 tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
2571 s->op_sync_args[op_index]);
2573 case INDEX_op_debug_insn_start:
2574 /* debug instruction */
2584 case INDEX_op_discard:
2585 temp_dead(s, args[0]);
2587 case INDEX_op_set_label:
2588 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2589 tcg_out_label(s, args[0], s->code_ptr);
2592 args += tcg_reg_alloc_call(s, def, opc, args,
2593 s->op_dead_args[op_index],
2594 s->op_sync_args[op_index]);
2599 /* Sanity check that we've not introduced any unhandled opcodes. */
2600 if (def->flags & TCG_OPF_NOT_PRESENT) {
2603 /* Note: in order to speed up the code, it would be much
2604 faster to have specialized register allocator functions for
2605 some common argument patterns */
2606 tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
2607 s->op_sync_args[op_index]);
2610 args += def->nb_args;
2612 if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
2621 /* Generate TB finalization at the end of block */
2622 tcg_out_tb_finalize(s);
2626 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
2628 #ifdef CONFIG_PROFILER
2631 n = (s->gen_opc_ptr - s->gen_opc_buf);
2633 if (n > s->op_count_max)
2634 s->op_count_max = n;
2636 s->temp_count += s->nb_temps;
2637 if (s->nb_temps > s->temp_count_max)
2638 s->temp_count_max = s->nb_temps;
2642 tcg_gen_code_common(s, gen_code_buf, -1);
2644 /* flush instruction cache */
2645 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2647 return tcg_current_code_size(s);
2650 /* Return the index of the micro operation such as the pc after is <
2651 offset bytes from the start of the TB. The contents of gen_code_buf must
2652 not be changed, though writing the same values is ok.
2653 Return -1 if not found. */
2654 int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
2657 return tcg_gen_code_common(s, gen_code_buf, offset);
2660 #ifdef CONFIG_PROFILER
2661 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2663 TCGContext *s = &tcg_ctx;
2666 tot = s->interm_time + s->code_time;
2667 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2669 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2671 s->tb_count1 - s->tb_count,
2672 s->tb_count1 ? (double)(s->tb_count1 - s->tb_count) / s->tb_count1 * 100.0 : 0);
2673 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2674 s->tb_count ? (double)s->op_count / s->tb_count : 0, s->op_count_max);
2675 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2677 (double)s->del_op_count / s->tb_count : 0);
2678 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2680 (double)s->temp_count / s->tb_count : 0,
2683 cpu_fprintf(f, "cycles/op %0.1f\n",
2684 s->op_count ? (double)tot / s->op_count : 0);
2685 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2686 s->code_in_len ? (double)tot / s->code_in_len : 0);
2687 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2688 s->code_out_len ? (double)tot / s->code_out_len : 0);
2691 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2692 (double)s->interm_time / tot * 100.0);
2693 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2694 (double)s->code_time / tot * 100.0);
2695 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2696 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2698 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2699 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2700 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2702 cpu_fprintf(f, " avg cycles %0.1f\n",
2703 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2708 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2710 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2714 #ifdef ELF_HOST_MACHINE
2715 /* In order to use this feature, the backend needs to do three things:
2717 (1) Define ELF_HOST_MACHINE to indicate both what value to
2718 put into the ELF image and to indicate support for the feature.
2720 (2) Define tcg_register_jit. This should create a buffer containing
2721 the contents of a .debug_frame section that describes the post-
2722 prologue unwind info for the tcg machine.
2724 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2727 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2734 struct jit_code_entry {
2735 struct jit_code_entry *next_entry;
2736 struct jit_code_entry *prev_entry;
2737 const void *symfile_addr;
2738 uint64_t symfile_size;
2741 struct jit_descriptor {
2743 uint32_t action_flag;
2744 struct jit_code_entry *relevant_entry;
2745 struct jit_code_entry *first_entry;
2748 void __jit_debug_register_code(void) __attribute__((noinline));
2749 void __jit_debug_register_code(void)
2754 /* Must statically initialize the version, because GDB may check
2755 the version before we can set it. */
2756 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2758 /* End GDB interface. */
2760 static int find_string(const char *strtab, const char *str)
2762 const char *p = strtab + 1;
2765 if (strcmp(p, str) == 0) {
2772 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2773 void *debug_frame, size_t debug_frame_size)
2775 struct __attribute__((packed)) DebugInfo {
2782 uintptr_t cu_low_pc;
2783 uintptr_t cu_high_pc;
2786 uintptr_t fn_low_pc;
2787 uintptr_t fn_high_pc;
2796 struct DebugInfo di;
2801 struct ElfImage *img;
2803 static const struct ElfImage img_template = {
2805 .e_ident[EI_MAG0] = ELFMAG0,
2806 .e_ident[EI_MAG1] = ELFMAG1,
2807 .e_ident[EI_MAG2] = ELFMAG2,
2808 .e_ident[EI_MAG3] = ELFMAG3,
2809 .e_ident[EI_CLASS] = ELF_CLASS,
2810 .e_ident[EI_DATA] = ELF_DATA,
2811 .e_ident[EI_VERSION] = EV_CURRENT,
2813 .e_machine = ELF_HOST_MACHINE,
2814 .e_version = EV_CURRENT,
2815 .e_phoff = offsetof(struct ElfImage, phdr),
2816 .e_shoff = offsetof(struct ElfImage, shdr),
2817 .e_ehsize = sizeof(ElfW(Shdr)),
2818 .e_phentsize = sizeof(ElfW(Phdr)),
2820 .e_shentsize = sizeof(ElfW(Shdr)),
2821 .e_shnum = ARRAY_SIZE(img->shdr),
2822 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2823 #ifdef ELF_HOST_FLAGS
2824 .e_flags = ELF_HOST_FLAGS,
2827 .e_ident[EI_OSABI] = ELF_OSABI,
2835 [0] = { .sh_type = SHT_NULL },
2836 /* Trick: The contents of code_gen_buffer are not present in
2837 this fake ELF file; that got allocated elsewhere. Therefore
2838 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2839 will not look for contents. We can record any address. */
2841 .sh_type = SHT_NOBITS,
2842 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2844 [2] = { /* .debug_info */
2845 .sh_type = SHT_PROGBITS,
2846 .sh_offset = offsetof(struct ElfImage, di),
2847 .sh_size = sizeof(struct DebugInfo),
2849 [3] = { /* .debug_abbrev */
2850 .sh_type = SHT_PROGBITS,
2851 .sh_offset = offsetof(struct ElfImage, da),
2852 .sh_size = sizeof(img->da),
2854 [4] = { /* .debug_frame */
2855 .sh_type = SHT_PROGBITS,
2856 .sh_offset = sizeof(struct ElfImage),
2858 [5] = { /* .symtab */
2859 .sh_type = SHT_SYMTAB,
2860 .sh_offset = offsetof(struct ElfImage, sym),
2861 .sh_size = sizeof(img->sym),
2863 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2864 .sh_entsize = sizeof(ElfW(Sym)),
2866 [6] = { /* .strtab */
2867 .sh_type = SHT_STRTAB,
2868 .sh_offset = offsetof(struct ElfImage, str),
2869 .sh_size = sizeof(img->str),
2873 [1] = { /* code_gen_buffer */
2874 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2879 .len = sizeof(struct DebugInfo) - 4,
2881 .ptr_size = sizeof(void *),
2883 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2885 .fn_name = "code_gen_buffer"
2888 1, /* abbrev number (the cu) */
2889 0x11, 1, /* DW_TAG_compile_unit, has children */
2890 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2891 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2892 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2893 0, 0, /* end of abbrev */
2894 2, /* abbrev number (the fn) */
2895 0x2e, 0, /* DW_TAG_subprogram, no children */
2896 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2897 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2898 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2899 0, 0, /* end of abbrev */
2900 0 /* no more abbrev */
2902 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2903 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2906 /* We only need a single jit entry; statically allocate it. */
2907 static struct jit_code_entry one_entry;
2909 uintptr_t buf = (uintptr_t)buf_ptr;
2910 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2912 img = g_malloc(img_size);
2913 *img = img_template;
2914 memcpy(img + 1, debug_frame, debug_frame_size);
2916 img->phdr.p_vaddr = buf;
2917 img->phdr.p_paddr = buf;
2918 img->phdr.p_memsz = buf_size;
2920 img->shdr[1].sh_name = find_string(img->str, ".text");
2921 img->shdr[1].sh_addr = buf;
2922 img->shdr[1].sh_size = buf_size;
2924 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2925 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2927 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2928 img->shdr[4].sh_size = debug_frame_size;
2930 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2931 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2933 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2934 img->sym[1].st_value = buf;
2935 img->sym[1].st_size = buf_size;
2937 img->di.cu_low_pc = buf;
2938 img->di.cu_high_pc = buf + buf_size;
2939 img->di.fn_low_pc = buf;
2940 img->di.fn_high_pc = buf + buf_size;
2943 /* Enable this block to be able to debug the ELF image file creation.
2944 One can use readelf, objdump, or other inspection utilities. */
2946 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2948 if (fwrite(img, img_size, 1, f) != img_size) {
2949 /* Avoid stupid unused return value warning for fwrite. */
2956 one_entry.symfile_addr = img;
2957 one_entry.symfile_size = img_size;
2959 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2960 __jit_debug_descriptor.relevant_entry = &one_entry;
2961 __jit_debug_descriptor.first_entry = &one_entry;
2962 __jit_debug_register_code();
2965 /* No support for the feature. Provide the entry point expected by exec.c,
2966 and implement the internal function we declared earlier. */
2968 static void tcg_register_jit_int(void *buf, size_t size,
2969 void *debug_frame, size_t debug_frame_size)
2973 void tcg_register_jit(void *buf, size_t buf_size)
2976 #endif /* ELF_HOST_MACHINE */