2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
28 #include "qemu/osdep.h"
30 /* Define to jump the ELF file used to communicate with GDB. */
33 #include "qemu/cutils.h"
34 #include "qemu/host-utils.h"
35 #include "qemu/timer.h"
37 /* Note: the long term plan is to reduce the dependencies on the QEMU
38 CPU definitions. Currently they are used for qemu_ld/st
40 #define NO_CPU_IO_DEFS
43 #include "exec/cpu-common.h"
44 #include "exec/exec-all.h"
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS ELFCLASS32
51 # define ELF_CLASS ELFCLASS64
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA ELFDATA2MSB
56 # define ELF_DATA ELFDATA2LSB
62 /* Forward declarations for functions declared in tcg-target.inc.c and
64 static void tcg_target_init(TCGContext *s);
65 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
68 intptr_t value, intptr_t addend);
70 /* The CIE and FDE header definitions will be common to all hosts. */
72 uint32_t len __attribute__((aligned((sizeof(void *)))));
78 uint8_t return_column;
81 typedef struct QEMU_PACKED {
82 uint32_t len __attribute__((aligned((sizeof(void *)))));
86 } DebugFrameFDEHeader;
88 typedef struct QEMU_PACKED {
90 DebugFrameFDEHeader fde;
93 static void tcg_register_jit_int(void *buf, size_t size,
94 const void *debug_frame,
95 size_t debug_frame_size)
96 __attribute__((unused));
98 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
99 static const char *target_parse_constraint(TCGArgConstraint *ct,
100 const char *ct_str, TCGType type);
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105 TCGReg ret, tcg_target_long arg);
106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
107 const int *const_args);
108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
111 TCGReg base, intptr_t ofs);
112 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
113 static int tcg_target_const_match(tcg_target_long val, TCGType type,
114 const TCGArgConstraint *arg_ct);
115 static void tcg_out_tb_init(TCGContext *s);
116 static bool tcg_out_tb_finalize(TCGContext *s);
120 static TCGRegSet tcg_target_available_regs[2];
121 static TCGRegSet tcg_target_call_clobber_regs;
123 #if TCG_TARGET_INSN_UNIT_SIZE == 1
124 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
129 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
136 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
137 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
139 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
142 tcg_insn_unit *p = s->code_ptr;
143 memcpy(p, &v, sizeof(v));
144 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
148 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
151 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
154 memcpy(p, &v, sizeof(v));
159 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
160 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
162 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
165 tcg_insn_unit *p = s->code_ptr;
166 memcpy(p, &v, sizeof(v));
167 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
171 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
174 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
177 memcpy(p, &v, sizeof(v));
182 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
183 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
185 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
188 tcg_insn_unit *p = s->code_ptr;
189 memcpy(p, &v, sizeof(v));
190 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
194 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
197 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
200 memcpy(p, &v, sizeof(v));
205 /* label relocation processing */
207 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
208 TCGLabel *l, intptr_t addend)
213 /* FIXME: This may break relocations on RISC targets that
214 modify instruction fields in place. The caller may not have
215 written the initial value. */
216 patch_reloc(code_ptr, type, l->u.value, addend);
218 /* add a new relocation entry */
219 r = tcg_malloc(sizeof(TCGRelocation));
223 r->next = l->u.first_reloc;
224 l->u.first_reloc = r;
228 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
230 intptr_t value = (intptr_t)ptr;
233 tcg_debug_assert(!l->has_value);
235 for (r = l->u.first_reloc; r != NULL; r = r->next) {
236 patch_reloc(r->ptr, r->type, value, r->addend);
240 l->u.value_ptr = ptr;
243 TCGLabel *gen_new_label(void)
245 TCGContext *s = &tcg_ctx;
246 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
255 #include "tcg-target.inc.c"
257 /* pool based memory allocation */
258 void *tcg_malloc_internal(TCGContext *s, int size)
263 if (size > TCG_POOL_CHUNK_SIZE) {
264 /* big malloc: insert a new pool (XXX: could optimize) */
265 p = g_malloc(sizeof(TCGPool) + size);
267 p->next = s->pool_first_large;
268 s->pool_first_large = p;
279 pool_size = TCG_POOL_CHUNK_SIZE;
280 p = g_malloc(sizeof(TCGPool) + pool_size);
284 s->pool_current->next = p;
293 s->pool_cur = p->data + size;
294 s->pool_end = p->data + p->size;
298 void tcg_pool_reset(TCGContext *s)
301 for (p = s->pool_first_large; p; p = t) {
305 s->pool_first_large = NULL;
306 s->pool_cur = s->pool_end = NULL;
307 s->pool_current = NULL;
310 typedef struct TCGHelperInfo {
317 #include "exec/helper-proto.h"
319 static const TCGHelperInfo all_helpers[] = {
320 #include "exec/helper-tcg.h"
323 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
324 static void process_op_defs(TCGContext *s);
326 void tcg_context_init(TCGContext *s)
328 int op, total_args, n, i;
330 TCGArgConstraint *args_ct;
332 GHashTable *helper_table;
334 memset(s, 0, sizeof(*s));
337 /* Count total number of arguments and allocate the corresponding
340 for(op = 0; op < NB_OPS; op++) {
341 def = &tcg_op_defs[op];
342 n = def->nb_iargs + def->nb_oargs;
346 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
347 sorted_args = g_malloc(sizeof(int) * total_args);
349 for(op = 0; op < NB_OPS; op++) {
350 def = &tcg_op_defs[op];
351 def->args_ct = args_ct;
352 def->sorted_args = sorted_args;
353 n = def->nb_iargs + def->nb_oargs;
358 /* Register helpers. */
359 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
360 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
362 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
363 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
364 (gpointer)&all_helpers[i]);
370 /* Reverse the order of the saved registers, assuming they're all at
371 the start of tcg_target_reg_alloc_order. */
372 for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
373 int r = tcg_target_reg_alloc_order[n];
374 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
378 for (i = 0; i < n; ++i) {
379 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
381 for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
382 indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
386 void tcg_prologue_init(TCGContext *s)
388 size_t prologue_size, total_size;
391 /* Put the prologue at the beginning of code_gen_buffer. */
392 buf0 = s->code_gen_buffer;
395 s->code_gen_prologue = buf0;
397 /* Generate the prologue. */
398 tcg_target_qemu_prologue(s);
400 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
402 /* Deduct the prologue from the buffer. */
403 prologue_size = tcg_current_code_size(s);
404 s->code_gen_ptr = buf1;
405 s->code_gen_buffer = buf1;
407 total_size = s->code_gen_buffer_size - prologue_size;
408 s->code_gen_buffer_size = total_size;
410 /* Compute a high-water mark, at which we voluntarily flush the buffer
411 and start over. The size here is arbitrary, significantly larger
412 than we expect the code generation for any one opcode to require. */
413 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
415 tcg_register_jit(s->code_gen_buffer, total_size);
418 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
420 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
421 log_disas(buf0, prologue_size);
428 /* Assert that goto_ptr is implemented completely. */
429 if (TCG_TARGET_HAS_goto_ptr) {
430 tcg_debug_assert(s->code_gen_epilogue != NULL);
434 void tcg_func_start(TCGContext *s)
437 s->nb_temps = s->nb_globals;
439 /* No temps have been previously allocated for size or locality. */
440 memset(s->free_temps, 0, sizeof(s->free_temps));
443 s->current_frame_offset = s->frame_start;
445 #ifdef CONFIG_DEBUG_TCG
446 s->goto_tb_issue_mask = 0;
449 s->gen_op_buf[0].next = 1;
450 s->gen_op_buf[0].prev = 0;
451 s->gen_next_op_idx = 1;
452 s->gen_next_parm_idx = 0;
454 s->be = tcg_malloc(sizeof(TCGBackendData));
457 static inline int temp_idx(TCGContext *s, TCGTemp *ts)
459 ptrdiff_t n = ts - s->temps;
460 tcg_debug_assert(n >= 0 && n < s->nb_temps);
464 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
466 int n = s->nb_temps++;
467 tcg_debug_assert(n < TCG_MAX_TEMPS);
468 return memset(&s->temps[n], 0, sizeof(TCGTemp));
471 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
473 tcg_debug_assert(s->nb_globals == s->nb_temps);
475 return tcg_temp_alloc(s);
478 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
479 TCGReg reg, const char *name)
483 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
487 ts = tcg_global_alloc(s);
488 ts->base_type = type;
493 tcg_regset_set_reg(s->reserved_regs, reg);
495 return temp_idx(s, ts);
498 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
501 s->frame_start = start;
502 s->frame_end = start + size;
503 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
504 s->frame_temp = &s->temps[idx];
507 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name)
509 TCGContext *s = &tcg_ctx;
512 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
515 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
516 return MAKE_TCGV_I32(idx);
519 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name)
521 TCGContext *s = &tcg_ctx;
524 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
527 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
528 return MAKE_TCGV_I64(idx);
531 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
532 intptr_t offset, const char *name)
534 TCGContext *s = &tcg_ctx;
535 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)];
536 TCGTemp *ts = tcg_global_alloc(s);
537 int indirect_reg = 0, bigendian = 0;
538 #ifdef HOST_WORDS_BIGENDIAN
542 if (!base_ts->fixed_reg) {
543 /* We do not support double-indirect registers. */
544 tcg_debug_assert(!base_ts->indirect_reg);
545 base_ts->indirect_base = 1;
546 s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
551 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
552 TCGTemp *ts2 = tcg_global_alloc(s);
555 ts->base_type = TCG_TYPE_I64;
556 ts->type = TCG_TYPE_I32;
557 ts->indirect_reg = indirect_reg;
558 ts->mem_allocated = 1;
559 ts->mem_base = base_ts;
560 ts->mem_offset = offset + bigendian * 4;
561 pstrcpy(buf, sizeof(buf), name);
562 pstrcat(buf, sizeof(buf), "_0");
563 ts->name = strdup(buf);
565 tcg_debug_assert(ts2 == ts + 1);
566 ts2->base_type = TCG_TYPE_I64;
567 ts2->type = TCG_TYPE_I32;
568 ts2->indirect_reg = indirect_reg;
569 ts2->mem_allocated = 1;
570 ts2->mem_base = base_ts;
571 ts2->mem_offset = offset + (1 - bigendian) * 4;
572 pstrcpy(buf, sizeof(buf), name);
573 pstrcat(buf, sizeof(buf), "_1");
574 ts2->name = strdup(buf);
576 ts->base_type = type;
578 ts->indirect_reg = indirect_reg;
579 ts->mem_allocated = 1;
580 ts->mem_base = base_ts;
581 ts->mem_offset = offset;
584 return temp_idx(s, ts);
587 static int tcg_temp_new_internal(TCGType type, int temp_local)
589 TCGContext *s = &tcg_ctx;
593 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
594 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
595 if (idx < TCG_MAX_TEMPS) {
596 /* There is already an available temp with the right type. */
597 clear_bit(idx, s->free_temps[k].l);
600 ts->temp_allocated = 1;
601 tcg_debug_assert(ts->base_type == type);
602 tcg_debug_assert(ts->temp_local == temp_local);
604 ts = tcg_temp_alloc(s);
605 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
606 TCGTemp *ts2 = tcg_temp_alloc(s);
608 ts->base_type = type;
609 ts->type = TCG_TYPE_I32;
610 ts->temp_allocated = 1;
611 ts->temp_local = temp_local;
613 tcg_debug_assert(ts2 == ts + 1);
614 ts2->base_type = TCG_TYPE_I64;
615 ts2->type = TCG_TYPE_I32;
616 ts2->temp_allocated = 1;
617 ts2->temp_local = temp_local;
619 ts->base_type = type;
621 ts->temp_allocated = 1;
622 ts->temp_local = temp_local;
624 idx = temp_idx(s, ts);
627 #if defined(CONFIG_DEBUG_TCG)
633 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
637 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
638 return MAKE_TCGV_I32(idx);
641 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
645 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
646 return MAKE_TCGV_I64(idx);
649 static void tcg_temp_free_internal(int idx)
651 TCGContext *s = &tcg_ctx;
655 #if defined(CONFIG_DEBUG_TCG)
657 if (s->temps_in_use < 0) {
658 fprintf(stderr, "More temporaries freed than allocated!\n");
662 tcg_debug_assert(idx >= s->nb_globals && idx < s->nb_temps);
664 tcg_debug_assert(ts->temp_allocated != 0);
665 ts->temp_allocated = 0;
667 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
668 set_bit(idx, s->free_temps[k].l);
671 void tcg_temp_free_i32(TCGv_i32 arg)
673 tcg_temp_free_internal(GET_TCGV_I32(arg));
676 void tcg_temp_free_i64(TCGv_i64 arg)
678 tcg_temp_free_internal(GET_TCGV_I64(arg));
681 TCGv_i32 tcg_const_i32(int32_t val)
684 t0 = tcg_temp_new_i32();
685 tcg_gen_movi_i32(t0, val);
689 TCGv_i64 tcg_const_i64(int64_t val)
692 t0 = tcg_temp_new_i64();
693 tcg_gen_movi_i64(t0, val);
697 TCGv_i32 tcg_const_local_i32(int32_t val)
700 t0 = tcg_temp_local_new_i32();
701 tcg_gen_movi_i32(t0, val);
705 TCGv_i64 tcg_const_local_i64(int64_t val)
708 t0 = tcg_temp_local_new_i64();
709 tcg_gen_movi_i64(t0, val);
713 #if defined(CONFIG_DEBUG_TCG)
714 void tcg_clear_temp_count(void)
716 TCGContext *s = &tcg_ctx;
720 int tcg_check_temp_count(void)
722 TCGContext *s = &tcg_ctx;
723 if (s->temps_in_use) {
724 /* Clear the count so that we don't give another
725 * warning immediately next time around.
734 /* Note: we convert the 64 bit args to 32 bit and do some alignment
735 and endian swap. Maybe it would be better to do the alignment
736 and endian swap in tcg_reg_alloc_call(). */
737 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
738 int nargs, TCGArg *args)
740 int i, real_args, nb_rets, pi, pi_first;
741 unsigned sizemask, flags;
744 info = g_hash_table_lookup(s->helpers, (gpointer)func);
746 sizemask = info->sizemask;
748 #if defined(__sparc__) && !defined(__arch64__) \
749 && !defined(CONFIG_TCG_INTERPRETER)
750 /* We have 64-bit values in one register, but need to pass as two
751 separate parameters. Split them. */
752 int orig_sizemask = sizemask;
753 int orig_nargs = nargs;
756 TCGV_UNUSED_I64(retl);
757 TCGV_UNUSED_I64(reth);
759 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
760 for (i = real_args = 0; i < nargs; ++i) {
761 int is_64bit = sizemask & (1 << (i+1)*2);
763 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
764 TCGv_i32 h = tcg_temp_new_i32();
765 TCGv_i32 l = tcg_temp_new_i32();
766 tcg_gen_extr_i64_i32(l, h, orig);
767 split_args[real_args++] = GET_TCGV_I32(h);
768 split_args[real_args++] = GET_TCGV_I32(l);
770 split_args[real_args++] = args[i];
777 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
778 for (i = 0; i < nargs; ++i) {
779 int is_64bit = sizemask & (1 << (i+1)*2);
780 int is_signed = sizemask & (2 << (i+1)*2);
782 TCGv_i64 temp = tcg_temp_new_i64();
783 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
785 tcg_gen_ext32s_i64(temp, orig);
787 tcg_gen_ext32u_i64(temp, orig);
789 args[i] = GET_TCGV_I64(temp);
792 #endif /* TCG_TARGET_EXTEND_ARGS */
794 pi_first = pi = s->gen_next_parm_idx;
795 if (ret != TCG_CALL_DUMMY_ARG) {
796 #if defined(__sparc__) && !defined(__arch64__) \
797 && !defined(CONFIG_TCG_INTERPRETER)
798 if (orig_sizemask & 1) {
799 /* The 32-bit ABI is going to return the 64-bit value in
800 the %o0/%o1 register pair. Prepare for this by using
801 two return temporaries, and reassemble below. */
802 retl = tcg_temp_new_i64();
803 reth = tcg_temp_new_i64();
804 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
805 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
808 s->gen_opparam_buf[pi++] = ret;
812 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
813 #ifdef HOST_WORDS_BIGENDIAN
814 s->gen_opparam_buf[pi++] = ret + 1;
815 s->gen_opparam_buf[pi++] = ret;
817 s->gen_opparam_buf[pi++] = ret;
818 s->gen_opparam_buf[pi++] = ret + 1;
822 s->gen_opparam_buf[pi++] = ret;
830 for (i = 0; i < nargs; i++) {
831 int is_64bit = sizemask & (1 << (i+1)*2);
832 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
833 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
834 /* some targets want aligned 64 bit args */
836 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
840 /* If stack grows up, then we will be placing successive
841 arguments at lower addresses, which means we need to
842 reverse the order compared to how we would normally
843 treat either big or little-endian. For those arguments
844 that will wind up in registers, this still works for
845 HPPA (the only current STACK_GROWSUP target) since the
846 argument registers are *also* allocated in decreasing
847 order. If another such target is added, this logic may
848 have to get more complicated to differentiate between
849 stack arguments and register arguments. */
850 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
851 s->gen_opparam_buf[pi++] = args[i] + 1;
852 s->gen_opparam_buf[pi++] = args[i];
854 s->gen_opparam_buf[pi++] = args[i];
855 s->gen_opparam_buf[pi++] = args[i] + 1;
861 s->gen_opparam_buf[pi++] = args[i];
864 s->gen_opparam_buf[pi++] = (uintptr_t)func;
865 s->gen_opparam_buf[pi++] = flags;
867 i = s->gen_next_op_idx;
868 tcg_debug_assert(i < OPC_BUF_SIZE);
869 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
871 /* Set links for sequential allocation during translation. */
872 s->gen_op_buf[i] = (TCGOp){
873 .opc = INDEX_op_call,
881 /* Make sure the calli field didn't overflow. */
882 tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
884 s->gen_op_buf[0].prev = i;
885 s->gen_next_op_idx = i + 1;
886 s->gen_next_parm_idx = pi;
888 #if defined(__sparc__) && !defined(__arch64__) \
889 && !defined(CONFIG_TCG_INTERPRETER)
890 /* Free all of the parts we allocated above. */
891 for (i = real_args = 0; i < orig_nargs; ++i) {
892 int is_64bit = orig_sizemask & (1 << (i+1)*2);
894 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
895 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
896 tcg_temp_free_i32(h);
897 tcg_temp_free_i32(l);
902 if (orig_sizemask & 1) {
903 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
904 Note that describing these as TCGv_i64 eliminates an unnecessary
905 zero-extension that tcg_gen_concat_i32_i64 would create. */
906 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
907 tcg_temp_free_i64(retl);
908 tcg_temp_free_i64(reth);
910 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
911 for (i = 0; i < nargs; ++i) {
912 int is_64bit = sizemask & (1 << (i+1)*2);
914 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
915 tcg_temp_free_i64(temp);
918 #endif /* TCG_TARGET_EXTEND_ARGS */
921 static void tcg_reg_alloc_start(TCGContext *s)
925 for(i = 0; i < s->nb_globals; i++) {
928 ts->val_type = TEMP_VAL_REG;
930 ts->val_type = TEMP_VAL_MEM;
933 for(i = s->nb_globals; i < s->nb_temps; i++) {
935 if (ts->temp_local) {
936 ts->val_type = TEMP_VAL_MEM;
938 ts->val_type = TEMP_VAL_DEAD;
940 ts->mem_allocated = 0;
944 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
947 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
950 int idx = temp_idx(s, ts);
952 if (idx < s->nb_globals) {
953 pstrcpy(buf, buf_size, ts->name);
954 } else if (ts->temp_local) {
955 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
957 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
962 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf,
963 int buf_size, int idx)
965 tcg_debug_assert(idx >= 0 && idx < s->nb_temps);
966 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]);
969 /* Find helper name. */
970 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
972 const char *ret = NULL;
974 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
982 static const char * const cond_name[] =
984 [TCG_COND_NEVER] = "never",
985 [TCG_COND_ALWAYS] = "always",
986 [TCG_COND_EQ] = "eq",
987 [TCG_COND_NE] = "ne",
988 [TCG_COND_LT] = "lt",
989 [TCG_COND_GE] = "ge",
990 [TCG_COND_LE] = "le",
991 [TCG_COND_GT] = "gt",
992 [TCG_COND_LTU] = "ltu",
993 [TCG_COND_GEU] = "geu",
994 [TCG_COND_LEU] = "leu",
995 [TCG_COND_GTU] = "gtu"
998 static const char * const ldst_name[] =
1014 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1016 [MO_UNALN >> MO_ASHIFT] = "un+",
1017 [MO_ALIGN >> MO_ASHIFT] = "",
1019 [MO_UNALN >> MO_ASHIFT] = "",
1020 [MO_ALIGN >> MO_ASHIFT] = "al+",
1022 [MO_ALIGN_2 >> MO_ASHIFT] = "al2+",
1023 [MO_ALIGN_4 >> MO_ASHIFT] = "al4+",
1024 [MO_ALIGN_8 >> MO_ASHIFT] = "al8+",
1025 [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1026 [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1027 [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1030 void tcg_dump_ops(TCGContext *s)
1036 for (oi = s->gen_op_buf[0].next; oi != 0; oi = op->next) {
1037 int i, k, nb_oargs, nb_iargs, nb_cargs;
1038 const TCGOpDef *def;
1043 op = &s->gen_op_buf[oi];
1045 def = &tcg_op_defs[c];
1046 args = &s->gen_opparam_buf[op->args];
1048 if (c == INDEX_op_insn_start) {
1049 col += qemu_log("%s ----", oi != s->gen_op_buf[0].next ? "\n" : "");
1051 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1053 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1054 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
1058 col += qemu_log(" " TARGET_FMT_lx, a);
1060 } else if (c == INDEX_op_call) {
1061 /* variable number of arguments */
1062 nb_oargs = op->callo;
1063 nb_iargs = op->calli;
1064 nb_cargs = def->nb_cargs;
1066 /* function name, flags, out args */
1067 col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1068 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1069 args[nb_oargs + nb_iargs + 1], nb_oargs);
1070 for (i = 0; i < nb_oargs; i++) {
1071 col += qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1074 for (i = 0; i < nb_iargs; i++) {
1075 TCGArg arg = args[nb_oargs + i];
1076 const char *t = "<dummy>";
1077 if (arg != TCG_CALL_DUMMY_ARG) {
1078 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1080 col += qemu_log(",%s", t);
1083 col += qemu_log(" %s ", def->name);
1085 nb_oargs = def->nb_oargs;
1086 nb_iargs = def->nb_iargs;
1087 nb_cargs = def->nb_cargs;
1090 for (i = 0; i < nb_oargs; i++) {
1092 col += qemu_log(",");
1094 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1097 for (i = 0; i < nb_iargs; i++) {
1099 col += qemu_log(",");
1101 col += qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1105 case INDEX_op_brcond_i32:
1106 case INDEX_op_setcond_i32:
1107 case INDEX_op_movcond_i32:
1108 case INDEX_op_brcond2_i32:
1109 case INDEX_op_setcond2_i32:
1110 case INDEX_op_brcond_i64:
1111 case INDEX_op_setcond_i64:
1112 case INDEX_op_movcond_i64:
1113 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1114 col += qemu_log(",%s", cond_name[args[k++]]);
1116 col += qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1120 case INDEX_op_qemu_ld_i32:
1121 case INDEX_op_qemu_st_i32:
1122 case INDEX_op_qemu_ld_i64:
1123 case INDEX_op_qemu_st_i64:
1125 TCGMemOpIdx oi = args[k++];
1126 TCGMemOp op = get_memop(oi);
1127 unsigned ix = get_mmuidx(oi);
1129 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1130 col += qemu_log(",$0x%x,%u", op, ix);
1132 const char *s_al, *s_op;
1133 s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1134 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1135 col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1145 case INDEX_op_set_label:
1147 case INDEX_op_brcond_i32:
1148 case INDEX_op_brcond_i64:
1149 case INDEX_op_brcond2_i32:
1150 col += qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
1156 for (; i < nb_cargs; i++, k++) {
1157 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
1161 unsigned life = op->life;
1163 for (; col < 48; ++col) {
1164 putc(' ', qemu_logfile);
1167 if (life & (SYNC_ARG * 3)) {
1169 for (i = 0; i < 2; ++i) {
1170 if (life & (SYNC_ARG << i)) {
1178 for (i = 0; life; ++i, life >>= 1) {
1189 /* we give more priority to constraints with less registers */
1190 static int get_constraint_priority(const TCGOpDef *def, int k)
1192 const TCGArgConstraint *arg_ct;
1195 arg_ct = &def->args_ct[k];
1196 if (arg_ct->ct & TCG_CT_ALIAS) {
1197 /* an alias is equivalent to a single register */
1200 if (!(arg_ct->ct & TCG_CT_REG))
1203 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1204 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1208 return TCG_TARGET_NB_REGS - n + 1;
1211 /* sort from highest priority to lowest */
1212 static void sort_constraints(TCGOpDef *def, int start, int n)
1214 int i, j, p1, p2, tmp;
1216 for(i = 0; i < n; i++)
1217 def->sorted_args[start + i] = start + i;
1220 for(i = 0; i < n - 1; i++) {
1221 for(j = i + 1; j < n; j++) {
1222 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1223 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1225 tmp = def->sorted_args[start + i];
1226 def->sorted_args[start + i] = def->sorted_args[start + j];
1227 def->sorted_args[start + j] = tmp;
1233 static void process_op_defs(TCGContext *s)
1237 for (op = 0; op < NB_OPS; op++) {
1238 TCGOpDef *def = &tcg_op_defs[op];
1239 const TCGTargetOpDef *tdefs;
1243 if (def->flags & TCG_OPF_NOT_PRESENT) {
1247 nb_args = def->nb_iargs + def->nb_oargs;
1252 tdefs = tcg_target_op_def(op);
1253 /* Missing TCGTargetOpDef entry. */
1254 tcg_debug_assert(tdefs != NULL);
1256 type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
1257 for (i = 0; i < nb_args; i++) {
1258 const char *ct_str = tdefs->args_ct_str[i];
1259 /* Incomplete TCGTargetOpDef entry. */
1260 tcg_debug_assert(ct_str != NULL);
1262 tcg_regset_clear(def->args_ct[i].u.regs);
1263 def->args_ct[i].ct = 0;
1264 while (*ct_str != '\0') {
1268 int oarg = *ct_str - '0';
1269 tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
1270 tcg_debug_assert(oarg < def->nb_oargs);
1271 tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
1272 /* TCG_CT_ALIAS is for the output arguments.
1273 The input is tagged with TCG_CT_IALIAS. */
1274 def->args_ct[i] = def->args_ct[oarg];
1275 def->args_ct[oarg].ct |= TCG_CT_ALIAS;
1276 def->args_ct[oarg].alias_index = i;
1277 def->args_ct[i].ct |= TCG_CT_IALIAS;
1278 def->args_ct[i].alias_index = oarg;
1283 def->args_ct[i].ct |= TCG_CT_NEWREG;
1287 def->args_ct[i].ct |= TCG_CT_CONST;
1291 ct_str = target_parse_constraint(&def->args_ct[i],
1293 /* Typo in TCGTargetOpDef constraint. */
1294 tcg_debug_assert(ct_str != NULL);
1299 /* TCGTargetOpDef entry with too much information? */
1300 tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1302 /* sort the constraints (XXX: this is just an heuristic) */
1303 sort_constraints(def, 0, def->nb_oargs);
1304 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1308 void tcg_op_remove(TCGContext *s, TCGOp *op)
1310 int next = op->next;
1311 int prev = op->prev;
1313 /* We should never attempt to remove the list terminator. */
1314 tcg_debug_assert(op != &s->gen_op_buf[0]);
1316 s->gen_op_buf[next].prev = prev;
1317 s->gen_op_buf[prev].next = next;
1319 memset(op, 0, sizeof(*op));
1321 #ifdef CONFIG_PROFILER
1326 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
1327 TCGOpcode opc, int nargs)
1329 int oi = s->gen_next_op_idx;
1330 int pi = s->gen_next_parm_idx;
1331 int prev = old_op->prev;
1332 int next = old_op - s->gen_op_buf;
1335 tcg_debug_assert(oi < OPC_BUF_SIZE);
1336 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
1337 s->gen_next_op_idx = oi + 1;
1338 s->gen_next_parm_idx = pi + nargs;
1340 new_op = &s->gen_op_buf[oi];
1347 s->gen_op_buf[prev].next = oi;
1353 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
1354 TCGOpcode opc, int nargs)
1356 int oi = s->gen_next_op_idx;
1357 int pi = s->gen_next_parm_idx;
1358 int prev = old_op - s->gen_op_buf;
1359 int next = old_op->next;
1362 tcg_debug_assert(oi < OPC_BUF_SIZE);
1363 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
1364 s->gen_next_op_idx = oi + 1;
1365 s->gen_next_parm_idx = pi + nargs;
1367 new_op = &s->gen_op_buf[oi];
1374 s->gen_op_buf[next].prev = oi;
1383 #define IS_DEAD_ARG(n) (arg_life & (DEAD_ARG << (n)))
1384 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
1386 /* liveness analysis: end of function: all temps are dead, and globals
1387 should be in memory. */
1388 static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
1390 memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
1391 memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
1394 /* liveness analysis: end of basic block: all temps are dead, globals
1395 and local temps should be in memory. */
1396 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
1400 tcg_la_func_end(s, temp_state);
1401 for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
1402 if (s->temps[i].temp_local) {
1403 temp_state[i] |= TS_MEM;
1408 /* Liveness analysis : update the opc_arg_life array to tell if a
1409 given input arguments is dead. Instructions updating dead
1410 temporaries are removed. */
1411 static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
1413 int nb_globals = s->nb_globals;
1416 tcg_la_func_end(s, temp_state);
1418 for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
1419 int i, nb_iargs, nb_oargs;
1420 TCGOpcode opc_new, opc_new2;
1422 TCGLifeData arg_life = 0;
1425 TCGOp * const op = &s->gen_op_buf[oi];
1426 TCGArg * const args = &s->gen_opparam_buf[op->args];
1427 TCGOpcode opc = op->opc;
1428 const TCGOpDef *def = &tcg_op_defs[opc];
1437 nb_oargs = op->callo;
1438 nb_iargs = op->calli;
1439 call_flags = args[nb_oargs + nb_iargs + 1];
1441 /* pure functions can be removed if their result is unused */
1442 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1443 for (i = 0; i < nb_oargs; i++) {
1445 if (temp_state[arg] != TS_DEAD) {
1446 goto do_not_remove_call;
1453 /* output args are dead */
1454 for (i = 0; i < nb_oargs; i++) {
1456 if (temp_state[arg] & TS_DEAD) {
1457 arg_life |= DEAD_ARG << i;
1459 if (temp_state[arg] & TS_MEM) {
1460 arg_life |= SYNC_ARG << i;
1462 temp_state[arg] = TS_DEAD;
1465 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1466 TCG_CALL_NO_READ_GLOBALS))) {
1467 /* globals should go back to memory */
1468 memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
1469 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1470 /* globals should be synced to memory */
1471 for (i = 0; i < nb_globals; i++) {
1472 temp_state[i] |= TS_MEM;
1476 /* record arguments that die in this helper */
1477 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1479 if (arg != TCG_CALL_DUMMY_ARG) {
1480 if (temp_state[arg] & TS_DEAD) {
1481 arg_life |= DEAD_ARG << i;
1485 /* input arguments are live for preceding opcodes */
1486 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1488 if (arg != TCG_CALL_DUMMY_ARG) {
1489 temp_state[arg] &= ~TS_DEAD;
1495 case INDEX_op_insn_start:
1497 case INDEX_op_discard:
1498 /* mark the temporary as dead */
1499 temp_state[args[0]] = TS_DEAD;
1502 case INDEX_op_add2_i32:
1503 opc_new = INDEX_op_add_i32;
1505 case INDEX_op_sub2_i32:
1506 opc_new = INDEX_op_sub_i32;
1508 case INDEX_op_add2_i64:
1509 opc_new = INDEX_op_add_i64;
1511 case INDEX_op_sub2_i64:
1512 opc_new = INDEX_op_sub_i64;
1516 /* Test if the high part of the operation is dead, but not
1517 the low part. The result can be optimized to a simple
1518 add or sub. This happens often for x86_64 guest when the
1519 cpu mode is set to 32 bit. */
1520 if (temp_state[args[1]] == TS_DEAD) {
1521 if (temp_state[args[0]] == TS_DEAD) {
1524 /* Replace the opcode and adjust the args in place,
1525 leaving 3 unused args at the end. */
1526 op->opc = opc = opc_new;
1529 /* Fall through and mark the single-word operation live. */
1535 case INDEX_op_mulu2_i32:
1536 opc_new = INDEX_op_mul_i32;
1537 opc_new2 = INDEX_op_muluh_i32;
1538 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
1540 case INDEX_op_muls2_i32:
1541 opc_new = INDEX_op_mul_i32;
1542 opc_new2 = INDEX_op_mulsh_i32;
1543 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
1545 case INDEX_op_mulu2_i64:
1546 opc_new = INDEX_op_mul_i64;
1547 opc_new2 = INDEX_op_muluh_i64;
1548 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
1550 case INDEX_op_muls2_i64:
1551 opc_new = INDEX_op_mul_i64;
1552 opc_new2 = INDEX_op_mulsh_i64;
1553 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
1558 if (temp_state[args[1]] == TS_DEAD) {
1559 if (temp_state[args[0]] == TS_DEAD) {
1560 /* Both parts of the operation are dead. */
1563 /* The high part of the operation is dead; generate the low. */
1564 op->opc = opc = opc_new;
1567 } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
1568 /* The low part of the operation is dead; generate the high. */
1569 op->opc = opc = opc_new2;
1576 /* Mark the single-word operation live. */
1581 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1582 nb_iargs = def->nb_iargs;
1583 nb_oargs = def->nb_oargs;
1585 /* Test if the operation can be removed because all
1586 its outputs are dead. We assume that nb_oargs == 0
1587 implies side effects */
1588 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1589 for (i = 0; i < nb_oargs; i++) {
1590 if (temp_state[args[i]] != TS_DEAD) {
1595 tcg_op_remove(s, op);
1598 /* output args are dead */
1599 for (i = 0; i < nb_oargs; i++) {
1601 if (temp_state[arg] & TS_DEAD) {
1602 arg_life |= DEAD_ARG << i;
1604 if (temp_state[arg] & TS_MEM) {
1605 arg_life |= SYNC_ARG << i;
1607 temp_state[arg] = TS_DEAD;
1610 /* if end of basic block, update */
1611 if (def->flags & TCG_OPF_BB_END) {
1612 tcg_la_bb_end(s, temp_state);
1613 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1614 /* globals should be synced to memory */
1615 for (i = 0; i < nb_globals; i++) {
1616 temp_state[i] |= TS_MEM;
1620 /* record arguments that die in this opcode */
1621 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1623 if (temp_state[arg] & TS_DEAD) {
1624 arg_life |= DEAD_ARG << i;
1627 /* input arguments are live for preceding opcodes */
1628 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1629 temp_state[args[i]] &= ~TS_DEAD;
1634 op->life = arg_life;
1638 /* Liveness analysis: Convert indirect regs to direct temporaries. */
1639 static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
1641 int nb_globals = s->nb_globals;
1644 bool changes = false;
1646 dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
1647 memset(dir_temps, 0, nb_globals * sizeof(int16_t));
1649 /* Create a temporary for each indirect global. */
1650 for (i = 0; i < nb_globals; ++i) {
1651 TCGTemp *its = &s->temps[i];
1652 if (its->indirect_reg) {
1653 TCGTemp *dts = tcg_temp_alloc(s);
1654 dts->type = its->type;
1655 dts->base_type = its->base_type;
1656 dir_temps[i] = temp_idx(s, dts);
1660 memset(temp_state, TS_DEAD, nb_globals);
1662 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
1663 TCGOp *op = &s->gen_op_buf[oi];
1664 TCGArg *args = &s->gen_opparam_buf[op->args];
1665 TCGOpcode opc = op->opc;
1666 const TCGOpDef *def = &tcg_op_defs[opc];
1667 TCGLifeData arg_life = op->life;
1668 int nb_iargs, nb_oargs, call_flags;
1673 if (opc == INDEX_op_call) {
1674 nb_oargs = op->callo;
1675 nb_iargs = op->calli;
1676 call_flags = args[nb_oargs + nb_iargs + 1];
1678 nb_iargs = def->nb_iargs;
1679 nb_oargs = def->nb_oargs;
1681 /* Set flags similar to how calls require. */
1682 if (def->flags & TCG_OPF_BB_END) {
1683 /* Like writing globals: save_globals */
1685 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1686 /* Like reading globals: sync_globals */
1687 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
1689 /* No effect on globals. */
1690 call_flags = (TCG_CALL_NO_READ_GLOBALS |
1691 TCG_CALL_NO_WRITE_GLOBALS);
1695 /* Make sure that input arguments are available. */
1696 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1698 /* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
1699 if (arg < nb_globals) {
1700 dir = dir_temps[arg];
1701 if (dir != 0 && temp_state[arg] == TS_DEAD) {
1702 TCGTemp *its = &s->temps[arg];
1703 TCGOpcode lopc = (its->type == TCG_TYPE_I32
1706 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
1707 TCGArg *largs = &s->gen_opparam_buf[lop->args];
1710 largs[1] = temp_idx(s, its->mem_base);
1711 largs[2] = its->mem_offset;
1713 /* Loaded, but synced with memory. */
1714 temp_state[arg] = TS_MEM;
1719 /* Perform input replacement, and mark inputs that became dead.
1720 No action is required except keeping temp_state up to date
1721 so that we reload when needed. */
1722 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1724 if (arg < nb_globals) {
1725 dir = dir_temps[arg];
1729 if (IS_DEAD_ARG(i)) {
1730 temp_state[arg] = TS_DEAD;
1736 /* Liveness analysis should ensure that the following are
1737 all correct, for call sites and basic block end points. */
1738 if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
1740 } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
1741 for (i = 0; i < nb_globals; ++i) {
1742 /* Liveness should see that globals are synced back,
1743 that is, either TS_DEAD or TS_MEM. */
1744 tcg_debug_assert(dir_temps[i] == 0
1745 || temp_state[i] != 0);
1748 for (i = 0; i < nb_globals; ++i) {
1749 /* Liveness should see that globals are saved back,
1750 that is, TS_DEAD, waiting to be reloaded. */
1751 tcg_debug_assert(dir_temps[i] == 0
1752 || temp_state[i] == TS_DEAD);
1756 /* Outputs become available. */
1757 for (i = 0; i < nb_oargs; i++) {
1759 if (arg >= nb_globals) {
1762 dir = dir_temps[arg];
1769 /* The output is now live and modified. */
1770 temp_state[arg] = 0;
1772 /* Sync outputs upon their last write. */
1773 if (NEED_SYNC_ARG(i)) {
1774 TCGTemp *its = &s->temps[arg];
1775 TCGOpcode sopc = (its->type == TCG_TYPE_I32
1778 TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
1779 TCGArg *sargs = &s->gen_opparam_buf[sop->args];
1782 sargs[1] = temp_idx(s, its->mem_base);
1783 sargs[2] = its->mem_offset;
1785 temp_state[arg] = TS_MEM;
1787 /* Drop outputs that are dead. */
1788 if (IS_DEAD_ARG(i)) {
1789 temp_state[arg] = TS_DEAD;
1797 #ifdef CONFIG_DEBUG_TCG
1798 static void dump_regs(TCGContext *s)
1804 for(i = 0; i < s->nb_temps; i++) {
1806 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1807 switch(ts->val_type) {
1809 printf("%s", tcg_target_reg_names[ts->reg]);
1812 printf("%d(%s)", (int)ts->mem_offset,
1813 tcg_target_reg_names[ts->mem_base->reg]);
1815 case TEMP_VAL_CONST:
1816 printf("$0x%" TCG_PRIlx, ts->val);
1828 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1829 if (s->reg_to_temp[i] != NULL) {
1831 tcg_target_reg_names[i],
1832 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
1837 static void check_regs(TCGContext *s)
1844 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1845 ts = s->reg_to_temp[reg];
1847 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
1848 printf("Inconsistency for register %s:\n",
1849 tcg_target_reg_names[reg]);
1854 for (k = 0; k < s->nb_temps; k++) {
1856 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
1857 && s->reg_to_temp[ts->reg] != ts) {
1858 printf("Inconsistency for temp %s:\n",
1859 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
1861 printf("reg state:\n");
1869 static void temp_allocate_frame(TCGContext *s, int temp)
1872 ts = &s->temps[temp];
1873 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1874 /* Sparc64 stack is accessed with offset of 2047 */
1875 s->current_frame_offset = (s->current_frame_offset +
1876 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1877 ~(sizeof(tcg_target_long) - 1);
1879 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1883 ts->mem_offset = s->current_frame_offset;
1884 ts->mem_base = s->frame_temp;
1885 ts->mem_allocated = 1;
1886 s->current_frame_offset += sizeof(tcg_target_long);
1889 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet);
1891 /* Mark a temporary as free or dead. If 'free_or_dead' is negative,
1892 mark it free; otherwise mark it dead. */
1893 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
1895 if (ts->fixed_reg) {
1898 if (ts->val_type == TEMP_VAL_REG) {
1899 s->reg_to_temp[ts->reg] = NULL;
1901 ts->val_type = (free_or_dead < 0
1903 || temp_idx(s, ts) < s->nb_globals
1904 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1907 /* Mark a temporary as dead. */
1908 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
1910 temp_free_or_dead(s, ts, 1);
1913 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
1914 registers needs to be allocated to store a constant. If 'free_or_dead'
1915 is non-zero, subsequently release the temporary; if it is positive, the
1916 temp is dead; if it is negative, the temp is free. */
1917 static void temp_sync(TCGContext *s, TCGTemp *ts,
1918 TCGRegSet allocated_regs, int free_or_dead)
1920 if (ts->fixed_reg) {
1923 if (!ts->mem_coherent) {
1924 if (!ts->mem_allocated) {
1925 temp_allocate_frame(s, temp_idx(s, ts));
1927 switch (ts->val_type) {
1928 case TEMP_VAL_CONST:
1929 /* If we're going to free the temp immediately, then we won't
1930 require it later in a register, so attempt to store the
1931 constant to memory directly. */
1933 && tcg_out_sti(s, ts->type, ts->val,
1934 ts->mem_base->reg, ts->mem_offset)) {
1937 temp_load(s, ts, tcg_target_available_regs[ts->type],
1942 tcg_out_st(s, ts->type, ts->reg,
1943 ts->mem_base->reg, ts->mem_offset);
1953 ts->mem_coherent = 1;
1956 temp_free_or_dead(s, ts, free_or_dead);
1960 /* free register 'reg' by spilling the corresponding temporary if necessary */
1961 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
1963 TCGTemp *ts = s->reg_to_temp[reg];
1965 temp_sync(s, ts, allocated_regs, -1);
1969 /* Allocate a register belonging to reg1 & ~reg2 */
1970 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet desired_regs,
1971 TCGRegSet allocated_regs, bool rev)
1973 int i, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
1978 tcg_regset_andnot(reg_ct, desired_regs, allocated_regs);
1979 order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
1981 /* first try free registers */
1982 for(i = 0; i < n; i++) {
1984 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
1988 /* XXX: do better spill choice */
1989 for(i = 0; i < n; i++) {
1991 if (tcg_regset_test_reg(reg_ct, reg)) {
1992 tcg_reg_free(s, reg, allocated_regs);
2000 /* Make sure the temporary is in a register. If needed, allocate the register
2001 from DESIRED while avoiding ALLOCATED. */
2002 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
2003 TCGRegSet allocated_regs)
2007 switch (ts->val_type) {
2010 case TEMP_VAL_CONST:
2011 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2012 tcg_out_movi(s, ts->type, reg, ts->val);
2013 ts->mem_coherent = 0;
2016 reg = tcg_reg_alloc(s, desired_regs, allocated_regs, ts->indirect_base);
2017 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2018 ts->mem_coherent = 1;
2025 ts->val_type = TEMP_VAL_REG;
2026 s->reg_to_temp[reg] = ts;
2029 /* Save a temporary to memory. 'allocated_regs' is used in case a
2030 temporary registers needs to be allocated to store a constant. */
2031 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
2033 /* The liveness analysis already ensures that globals are back
2034 in memory. Keep an tcg_debug_assert for safety. */
2035 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
2038 /* save globals to their canonical location and assume they can be
2039 modified be the following code. 'allocated_regs' is used in case a
2040 temporary registers needs to be allocated to store a constant. */
2041 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
2045 for (i = 0; i < s->nb_globals; i++) {
2046 temp_save(s, &s->temps[i], allocated_regs);
2050 /* sync globals to their canonical location and assume they can be
2051 read by the following code. 'allocated_regs' is used in case a
2052 temporary registers needs to be allocated to store a constant. */
2053 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
2057 for (i = 0; i < s->nb_globals; i++) {
2058 TCGTemp *ts = &s->temps[i];
2059 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
2061 || ts->mem_coherent);
2065 /* at the end of a basic block, we assume all temporaries are dead and
2066 all globals are stored at their canonical location. */
2067 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
2071 for (i = s->nb_globals; i < s->nb_temps; i++) {
2072 TCGTemp *ts = &s->temps[i];
2073 if (ts->temp_local) {
2074 temp_save(s, ts, allocated_regs);
2076 /* The liveness analysis already ensures that temps are dead.
2077 Keep an tcg_debug_assert for safety. */
2078 tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
2082 save_globals(s, allocated_regs);
2085 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
2086 tcg_target_ulong val, TCGLifeData arg_life)
2088 if (ots->fixed_reg) {
2089 /* For fixed registers, we do not do any constant propagation. */
2090 tcg_out_movi(s, ots->type, ots->reg, val);
2094 /* The movi is not explicitly generated here. */
2095 if (ots->val_type == TEMP_VAL_REG) {
2096 s->reg_to_temp[ots->reg] = NULL;
2098 ots->val_type = TEMP_VAL_CONST;
2100 ots->mem_coherent = 0;
2101 if (NEED_SYNC_ARG(0)) {
2102 temp_sync(s, ots, s->reserved_regs, IS_DEAD_ARG(0));
2103 } else if (IS_DEAD_ARG(0)) {
2108 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
2109 TCGLifeData arg_life)
2111 TCGTemp *ots = &s->temps[args[0]];
2112 tcg_target_ulong val = args[1];
2114 tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2117 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
2118 const TCGArg *args, TCGLifeData arg_life)
2120 TCGRegSet allocated_regs;
2122 TCGType otype, itype;
2124 tcg_regset_set(allocated_regs, s->reserved_regs);
2125 ots = &s->temps[args[0]];
2126 ts = &s->temps[args[1]];
2128 /* Note that otype != itype for no-op truncation. */
2132 if (ts->val_type == TEMP_VAL_CONST) {
2133 /* propagate constant or generate sti */
2134 tcg_target_ulong val = ts->val;
2135 if (IS_DEAD_ARG(1)) {
2138 tcg_reg_alloc_do_movi(s, ots, val, arg_life);
2142 /* If the source value is in memory we're going to be forced
2143 to have it in a register in order to perform the copy. Copy
2144 the SOURCE value into its own register first, that way we
2145 don't have to reload SOURCE the next time it is used. */
2146 if (ts->val_type == TEMP_VAL_MEM) {
2147 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
2150 tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
2151 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
2152 /* mov to a non-saved dead register makes no sense (even with
2153 liveness analysis disabled). */
2154 tcg_debug_assert(NEED_SYNC_ARG(0));
2155 if (!ots->mem_allocated) {
2156 temp_allocate_frame(s, args[0]);
2158 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
2159 if (IS_DEAD_ARG(1)) {
2164 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
2165 /* the mov can be suppressed */
2166 if (ots->val_type == TEMP_VAL_REG) {
2167 s->reg_to_temp[ots->reg] = NULL;
2172 if (ots->val_type != TEMP_VAL_REG) {
2173 /* When allocating a new register, make sure to not spill the
2175 tcg_regset_set_reg(allocated_regs, ts->reg);
2176 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
2177 allocated_regs, ots->indirect_base);
2179 tcg_out_mov(s, otype, ots->reg, ts->reg);
2181 ots->val_type = TEMP_VAL_REG;
2182 ots->mem_coherent = 0;
2183 s->reg_to_temp[ots->reg] = ots;
2184 if (NEED_SYNC_ARG(0)) {
2185 temp_sync(s, ots, allocated_regs, 0);
2190 static void tcg_reg_alloc_op(TCGContext *s,
2191 const TCGOpDef *def, TCGOpcode opc,
2192 const TCGArg *args, TCGLifeData arg_life)
2194 TCGRegSet i_allocated_regs;
2195 TCGRegSet o_allocated_regs;
2196 int i, k, nb_iargs, nb_oargs;
2199 const TCGArgConstraint *arg_ct;
2201 TCGArg new_args[TCG_MAX_OP_ARGS];
2202 int const_args[TCG_MAX_OP_ARGS];
2204 nb_oargs = def->nb_oargs;
2205 nb_iargs = def->nb_iargs;
2207 /* copy constants */
2208 memcpy(new_args + nb_oargs + nb_iargs,
2209 args + nb_oargs + nb_iargs,
2210 sizeof(TCGArg) * def->nb_cargs);
2212 tcg_regset_set(i_allocated_regs, s->reserved_regs);
2213 tcg_regset_set(o_allocated_regs, s->reserved_regs);
2215 /* satisfy input constraints */
2216 for(k = 0; k < nb_iargs; k++) {
2217 i = def->sorted_args[nb_oargs + k];
2219 arg_ct = &def->args_ct[i];
2220 ts = &s->temps[arg];
2222 if (ts->val_type == TEMP_VAL_CONST
2223 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
2224 /* constant is OK for instruction */
2226 new_args[i] = ts->val;
2230 temp_load(s, ts, arg_ct->u.regs, i_allocated_regs);
2232 if (arg_ct->ct & TCG_CT_IALIAS) {
2233 if (ts->fixed_reg) {
2234 /* if fixed register, we must allocate a new register
2235 if the alias is not the same register */
2236 if (arg != args[arg_ct->alias_index])
2237 goto allocate_in_reg;
2239 /* if the input is aliased to an output and if it is
2240 not dead after the instruction, we must allocate
2241 a new register and move it */
2242 if (!IS_DEAD_ARG(i)) {
2243 goto allocate_in_reg;
2245 /* check if the current register has already been allocated
2246 for another input aliased to an output */
2248 for (k2 = 0 ; k2 < k ; k2++) {
2249 i2 = def->sorted_args[nb_oargs + k2];
2250 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2251 (new_args[i2] == ts->reg)) {
2252 goto allocate_in_reg;
2258 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2259 /* nothing to do : the constraint is satisfied */
2262 /* allocate a new register matching the constraint
2263 and move the temporary register into it */
2264 reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
2266 tcg_out_mov(s, ts->type, reg, ts->reg);
2270 tcg_regset_set_reg(i_allocated_regs, reg);
2274 /* mark dead temporaries and free the associated registers */
2275 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2276 if (IS_DEAD_ARG(i)) {
2277 temp_dead(s, &s->temps[args[i]]);
2281 if (def->flags & TCG_OPF_BB_END) {
2282 tcg_reg_alloc_bb_end(s, i_allocated_regs);
2284 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2285 /* XXX: permit generic clobber register list ? */
2286 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2287 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2288 tcg_reg_free(s, i, i_allocated_regs);
2292 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2293 /* sync globals if the op has side effects and might trigger
2295 sync_globals(s, i_allocated_regs);
2298 /* satisfy the output constraints */
2299 for(k = 0; k < nb_oargs; k++) {
2300 i = def->sorted_args[k];
2302 arg_ct = &def->args_ct[i];
2303 ts = &s->temps[arg];
2304 if ((arg_ct->ct & TCG_CT_ALIAS)
2305 && !const_args[arg_ct->alias_index]) {
2306 reg = new_args[arg_ct->alias_index];
2307 } else if (arg_ct->ct & TCG_CT_NEWREG) {
2308 reg = tcg_reg_alloc(s, arg_ct->u.regs,
2309 i_allocated_regs | o_allocated_regs,
2312 /* if fixed register, we try to use it */
2314 if (ts->fixed_reg &&
2315 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2318 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
2321 tcg_regset_set_reg(o_allocated_regs, reg);
2322 /* if a fixed register is used, then a move will be done afterwards */
2323 if (!ts->fixed_reg) {
2324 if (ts->val_type == TEMP_VAL_REG) {
2325 s->reg_to_temp[ts->reg] = NULL;
2327 ts->val_type = TEMP_VAL_REG;
2329 /* temp value is modified, so the value kept in memory is
2330 potentially not the same */
2331 ts->mem_coherent = 0;
2332 s->reg_to_temp[reg] = ts;
2339 /* emit instruction */
2340 tcg_out_op(s, opc, new_args, const_args);
2342 /* move the outputs in the correct register if needed */
2343 for(i = 0; i < nb_oargs; i++) {
2344 ts = &s->temps[args[i]];
2346 if (ts->fixed_reg && ts->reg != reg) {
2347 tcg_out_mov(s, ts->type, ts->reg, reg);
2349 if (NEED_SYNC_ARG(i)) {
2350 temp_sync(s, ts, o_allocated_regs, IS_DEAD_ARG(i));
2351 } else if (IS_DEAD_ARG(i)) {
2357 #ifdef TCG_TARGET_STACK_GROWSUP
2358 #define STACK_DIR(x) (-(x))
2360 #define STACK_DIR(x) (x)
2363 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
2364 const TCGArg * const args, TCGLifeData arg_life)
2366 int flags, nb_regs, i;
2370 intptr_t stack_offset;
2371 size_t call_stack_size;
2372 tcg_insn_unit *func_addr;
2374 TCGRegSet allocated_regs;
2376 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2377 flags = args[nb_oargs + nb_iargs + 1];
2379 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2380 if (nb_regs > nb_iargs) {
2384 /* assign stack slots first */
2385 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2386 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2387 ~(TCG_TARGET_STACK_ALIGN - 1);
2388 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2389 if (allocate_args) {
2390 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2391 preallocate call stack */
2395 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2396 for(i = nb_regs; i < nb_iargs; i++) {
2397 arg = args[nb_oargs + i];
2398 #ifdef TCG_TARGET_STACK_GROWSUP
2399 stack_offset -= sizeof(tcg_target_long);
2401 if (arg != TCG_CALL_DUMMY_ARG) {
2402 ts = &s->temps[arg];
2403 temp_load(s, ts, tcg_target_available_regs[ts->type],
2405 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2407 #ifndef TCG_TARGET_STACK_GROWSUP
2408 stack_offset += sizeof(tcg_target_long);
2412 /* assign input registers */
2413 tcg_regset_set(allocated_regs, s->reserved_regs);
2414 for(i = 0; i < nb_regs; i++) {
2415 arg = args[nb_oargs + i];
2416 if (arg != TCG_CALL_DUMMY_ARG) {
2417 ts = &s->temps[arg];
2418 reg = tcg_target_call_iarg_regs[i];
2419 tcg_reg_free(s, reg, allocated_regs);
2421 if (ts->val_type == TEMP_VAL_REG) {
2422 if (ts->reg != reg) {
2423 tcg_out_mov(s, ts->type, reg, ts->reg);
2428 tcg_regset_clear(arg_set);
2429 tcg_regset_set_reg(arg_set, reg);
2430 temp_load(s, ts, arg_set, allocated_regs);
2433 tcg_regset_set_reg(allocated_regs, reg);
2437 /* mark dead temporaries and free the associated registers */
2438 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2439 if (IS_DEAD_ARG(i)) {
2440 temp_dead(s, &s->temps[args[i]]);
2444 /* clobber call registers */
2445 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2446 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2447 tcg_reg_free(s, i, allocated_regs);
2451 /* Save globals if they might be written by the helper, sync them if
2452 they might be read. */
2453 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2455 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2456 sync_globals(s, allocated_regs);
2458 save_globals(s, allocated_regs);
2461 tcg_out_call(s, func_addr);
2463 /* assign output registers and emit moves if needed */
2464 for(i = 0; i < nb_oargs; i++) {
2466 ts = &s->temps[arg];
2467 reg = tcg_target_call_oarg_regs[i];
2468 tcg_debug_assert(s->reg_to_temp[reg] == NULL);
2470 if (ts->fixed_reg) {
2471 if (ts->reg != reg) {
2472 tcg_out_mov(s, ts->type, ts->reg, reg);
2475 if (ts->val_type == TEMP_VAL_REG) {
2476 s->reg_to_temp[ts->reg] = NULL;
2478 ts->val_type = TEMP_VAL_REG;
2480 ts->mem_coherent = 0;
2481 s->reg_to_temp[reg] = ts;
2482 if (NEED_SYNC_ARG(i)) {
2483 temp_sync(s, ts, allocated_regs, IS_DEAD_ARG(i));
2484 } else if (IS_DEAD_ARG(i)) {
2491 #ifdef CONFIG_PROFILER
2493 static int64_t tcg_table_op_count[NB_OPS];
2495 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2499 for (i = 0; i < NB_OPS; i++) {
2500 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
2501 tcg_table_op_count[i]);
2505 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2507 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2512 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
2514 int i, oi, oi_next, num_insns;
2516 #ifdef CONFIG_PROFILER
2520 n = s->gen_op_buf[0].prev + 1;
2522 if (n > s->op_count_max) {
2523 s->op_count_max = n;
2528 if (n > s->temp_count_max) {
2529 s->temp_count_max = n;
2535 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
2536 && qemu_log_in_addr_range(tb->pc))) {
2545 #ifdef CONFIG_PROFILER
2546 s->opt_time -= profile_getclock();
2549 #ifdef USE_TCG_OPTIMIZATIONS
2553 #ifdef CONFIG_PROFILER
2554 s->opt_time += profile_getclock();
2555 s->la_time -= profile_getclock();
2559 uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
2561 liveness_pass_1(s, temp_state);
2563 if (s->nb_indirects > 0) {
2565 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
2566 && qemu_log_in_addr_range(tb->pc))) {
2568 qemu_log("OP before indirect lowering:\n");
2574 /* Replace indirect temps with direct temps. */
2575 if (liveness_pass_2(s, temp_state)) {
2576 /* If changes were made, re-run liveness. */
2577 liveness_pass_1(s, temp_state);
2582 #ifdef CONFIG_PROFILER
2583 s->la_time += profile_getclock();
2587 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
2588 && qemu_log_in_addr_range(tb->pc))) {
2590 qemu_log("OP after optimization and liveness analysis:\n");
2597 tcg_reg_alloc_start(s);
2599 s->code_buf = tb->tc_ptr;
2600 s->code_ptr = tb->tc_ptr;
2605 for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
2606 TCGOp * const op = &s->gen_op_buf[oi];
2607 TCGArg * const args = &s->gen_opparam_buf[op->args];
2608 TCGOpcode opc = op->opc;
2609 const TCGOpDef *def = &tcg_op_defs[opc];
2610 TCGLifeData arg_life = op->life;
2613 #ifdef CONFIG_PROFILER
2614 tcg_table_op_count[opc]++;
2618 case INDEX_op_mov_i32:
2619 case INDEX_op_mov_i64:
2620 tcg_reg_alloc_mov(s, def, args, arg_life);
2622 case INDEX_op_movi_i32:
2623 case INDEX_op_movi_i64:
2624 tcg_reg_alloc_movi(s, args, arg_life);
2626 case INDEX_op_insn_start:
2627 if (num_insns >= 0) {
2628 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2631 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2633 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2634 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
2638 s->gen_insn_data[num_insns][i] = a;
2641 case INDEX_op_discard:
2642 temp_dead(s, &s->temps[args[0]]);
2644 case INDEX_op_set_label:
2645 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2646 tcg_out_label(s, arg_label(args[0]), s->code_ptr);
2649 tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
2652 /* Sanity check that we've not introduced any unhandled opcodes. */
2653 if (def->flags & TCG_OPF_NOT_PRESENT) {
2656 /* Note: in order to speed up the code, it would be much
2657 faster to have specialized register allocator functions for
2658 some common argument patterns */
2659 tcg_reg_alloc_op(s, def, opc, args, arg_life);
2662 #ifdef CONFIG_DEBUG_TCG
2665 /* Test for (pending) buffer overflow. The assumption is that any
2666 one operation beginning below the high water mark cannot overrun
2667 the buffer completely. Thus we can test for overflow after
2668 generating code without having to check during generation. */
2669 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
2673 tcg_debug_assert(num_insns >= 0);
2674 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2676 /* Generate TB finalization at the end of block */
2677 if (!tcg_out_tb_finalize(s)) {
2681 /* flush instruction cache */
2682 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2684 return tcg_current_code_size(s);
2687 #ifdef CONFIG_PROFILER
2688 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2690 TCGContext *s = &tcg_ctx;
2691 int64_t tb_count = s->tb_count;
2692 int64_t tb_div_count = tb_count ? tb_count : 1;
2693 int64_t tot = s->interm_time + s->code_time;
2695 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2697 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2698 tb_count, s->tb_count1 - tb_count,
2699 (double)(s->tb_count1 - s->tb_count)
2700 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
2701 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2702 (double)s->op_count / tb_div_count, s->op_count_max);
2703 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2704 (double)s->del_op_count / tb_div_count);
2705 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2706 (double)s->temp_count / tb_div_count, s->temp_count_max);
2707 cpu_fprintf(f, "avg host code/TB %0.1f\n",
2708 (double)s->code_out_len / tb_div_count);
2709 cpu_fprintf(f, "avg search data/TB %0.1f\n",
2710 (double)s->search_out_len / tb_div_count);
2712 cpu_fprintf(f, "cycles/op %0.1f\n",
2713 s->op_count ? (double)tot / s->op_count : 0);
2714 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2715 s->code_in_len ? (double)tot / s->code_in_len : 0);
2716 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2717 s->code_out_len ? (double)tot / s->code_out_len : 0);
2718 cpu_fprintf(f, "cycles/search byte %0.1f\n",
2719 s->search_out_len ? (double)tot / s->search_out_len : 0);
2723 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2724 (double)s->interm_time / tot * 100.0);
2725 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2726 (double)s->code_time / tot * 100.0);
2727 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2728 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2730 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2731 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2732 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2734 cpu_fprintf(f, " avg cycles %0.1f\n",
2735 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2738 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2740 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2744 #ifdef ELF_HOST_MACHINE
2745 /* In order to use this feature, the backend needs to do three things:
2747 (1) Define ELF_HOST_MACHINE to indicate both what value to
2748 put into the ELF image and to indicate support for the feature.
2750 (2) Define tcg_register_jit. This should create a buffer containing
2751 the contents of a .debug_frame section that describes the post-
2752 prologue unwind info for the tcg machine.
2754 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2757 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2764 struct jit_code_entry {
2765 struct jit_code_entry *next_entry;
2766 struct jit_code_entry *prev_entry;
2767 const void *symfile_addr;
2768 uint64_t symfile_size;
2771 struct jit_descriptor {
2773 uint32_t action_flag;
2774 struct jit_code_entry *relevant_entry;
2775 struct jit_code_entry *first_entry;
2778 void __jit_debug_register_code(void) __attribute__((noinline));
2779 void __jit_debug_register_code(void)
2784 /* Must statically initialize the version, because GDB may check
2785 the version before we can set it. */
2786 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2788 /* End GDB interface. */
2790 static int find_string(const char *strtab, const char *str)
2792 const char *p = strtab + 1;
2795 if (strcmp(p, str) == 0) {
2802 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2803 const void *debug_frame,
2804 size_t debug_frame_size)
2806 struct __attribute__((packed)) DebugInfo {
2813 uintptr_t cu_low_pc;
2814 uintptr_t cu_high_pc;
2817 uintptr_t fn_low_pc;
2818 uintptr_t fn_high_pc;
2827 struct DebugInfo di;
2832 struct ElfImage *img;
2834 static const struct ElfImage img_template = {
2836 .e_ident[EI_MAG0] = ELFMAG0,
2837 .e_ident[EI_MAG1] = ELFMAG1,
2838 .e_ident[EI_MAG2] = ELFMAG2,
2839 .e_ident[EI_MAG3] = ELFMAG3,
2840 .e_ident[EI_CLASS] = ELF_CLASS,
2841 .e_ident[EI_DATA] = ELF_DATA,
2842 .e_ident[EI_VERSION] = EV_CURRENT,
2844 .e_machine = ELF_HOST_MACHINE,
2845 .e_version = EV_CURRENT,
2846 .e_phoff = offsetof(struct ElfImage, phdr),
2847 .e_shoff = offsetof(struct ElfImage, shdr),
2848 .e_ehsize = sizeof(ElfW(Shdr)),
2849 .e_phentsize = sizeof(ElfW(Phdr)),
2851 .e_shentsize = sizeof(ElfW(Shdr)),
2852 .e_shnum = ARRAY_SIZE(img->shdr),
2853 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2854 #ifdef ELF_HOST_FLAGS
2855 .e_flags = ELF_HOST_FLAGS,
2858 .e_ident[EI_OSABI] = ELF_OSABI,
2866 [0] = { .sh_type = SHT_NULL },
2867 /* Trick: The contents of code_gen_buffer are not present in
2868 this fake ELF file; that got allocated elsewhere. Therefore
2869 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2870 will not look for contents. We can record any address. */
2872 .sh_type = SHT_NOBITS,
2873 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2875 [2] = { /* .debug_info */
2876 .sh_type = SHT_PROGBITS,
2877 .sh_offset = offsetof(struct ElfImage, di),
2878 .sh_size = sizeof(struct DebugInfo),
2880 [3] = { /* .debug_abbrev */
2881 .sh_type = SHT_PROGBITS,
2882 .sh_offset = offsetof(struct ElfImage, da),
2883 .sh_size = sizeof(img->da),
2885 [4] = { /* .debug_frame */
2886 .sh_type = SHT_PROGBITS,
2887 .sh_offset = sizeof(struct ElfImage),
2889 [5] = { /* .symtab */
2890 .sh_type = SHT_SYMTAB,
2891 .sh_offset = offsetof(struct ElfImage, sym),
2892 .sh_size = sizeof(img->sym),
2894 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2895 .sh_entsize = sizeof(ElfW(Sym)),
2897 [6] = { /* .strtab */
2898 .sh_type = SHT_STRTAB,
2899 .sh_offset = offsetof(struct ElfImage, str),
2900 .sh_size = sizeof(img->str),
2904 [1] = { /* code_gen_buffer */
2905 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2910 .len = sizeof(struct DebugInfo) - 4,
2912 .ptr_size = sizeof(void *),
2914 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2916 .fn_name = "code_gen_buffer"
2919 1, /* abbrev number (the cu) */
2920 0x11, 1, /* DW_TAG_compile_unit, has children */
2921 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2922 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2923 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2924 0, 0, /* end of abbrev */
2925 2, /* abbrev number (the fn) */
2926 0x2e, 0, /* DW_TAG_subprogram, no children */
2927 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2928 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2929 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2930 0, 0, /* end of abbrev */
2931 0 /* no more abbrev */
2933 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2934 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2937 /* We only need a single jit entry; statically allocate it. */
2938 static struct jit_code_entry one_entry;
2940 uintptr_t buf = (uintptr_t)buf_ptr;
2941 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2942 DebugFrameHeader *dfh;
2944 img = g_malloc(img_size);
2945 *img = img_template;
2947 img->phdr.p_vaddr = buf;
2948 img->phdr.p_paddr = buf;
2949 img->phdr.p_memsz = buf_size;
2951 img->shdr[1].sh_name = find_string(img->str, ".text");
2952 img->shdr[1].sh_addr = buf;
2953 img->shdr[1].sh_size = buf_size;
2955 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2956 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2958 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2959 img->shdr[4].sh_size = debug_frame_size;
2961 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2962 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2964 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2965 img->sym[1].st_value = buf;
2966 img->sym[1].st_size = buf_size;
2968 img->di.cu_low_pc = buf;
2969 img->di.cu_high_pc = buf + buf_size;
2970 img->di.fn_low_pc = buf;
2971 img->di.fn_high_pc = buf + buf_size;
2973 dfh = (DebugFrameHeader *)(img + 1);
2974 memcpy(dfh, debug_frame, debug_frame_size);
2975 dfh->fde.func_start = buf;
2976 dfh->fde.func_len = buf_size;
2979 /* Enable this block to be able to debug the ELF image file creation.
2980 One can use readelf, objdump, or other inspection utilities. */
2982 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2984 if (fwrite(img, img_size, 1, f) != img_size) {
2985 /* Avoid stupid unused return value warning for fwrite. */
2992 one_entry.symfile_addr = img;
2993 one_entry.symfile_size = img_size;
2995 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2996 __jit_debug_descriptor.relevant_entry = &one_entry;
2997 __jit_debug_descriptor.first_entry = &one_entry;
2998 __jit_debug_register_code();
3001 /* No support for the feature. Provide the entry point expected by exec.c,
3002 and implement the internal function we declared earlier. */
3004 static void tcg_register_jit_int(void *buf, size_t size,
3005 const void *debug_frame,
3006 size_t debug_frame_size)
3010 void tcg_register_jit(void *buf, size_t buf_size)
3013 #endif /* ELF_HOST_MACHINE */