2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
29 #include "qemu/osdep.h"
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu-common.h"
40 #include "qemu/host-utils.h"
41 #include "qemu/timer.h"
43 /* Note: the long term plan is to reduce the dependencies on the QEMU
44 CPU definitions. Currently they are used for qemu_ld/st
46 #define NO_CPU_IO_DEFS
51 #if UINTPTR_MAX == UINT32_MAX
52 # define ELF_CLASS ELFCLASS32
54 # define ELF_CLASS ELFCLASS64
56 #ifdef HOST_WORDS_BIGENDIAN
57 # define ELF_DATA ELFDATA2MSB
59 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint8_t return_column;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
87 } DebugFrameFDEHeader;
89 typedef struct QEMU_PACKED {
91 DebugFrameFDEHeader fde;
94 static void tcg_register_jit_int(void *buf, size_t size,
95 const void *debug_frame,
96 size_t debug_frame_size)
97 __attribute__((unused));
99 /* Forward declarations for functions declared and used in tcg-target.c. */
100 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105 TCGReg ret, tcg_target_long arg);
106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
107 const int *const_args);
108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
111 static int tcg_target_const_match(tcg_target_long val, TCGType type,
112 const TCGArgConstraint *arg_ct);
113 static void tcg_out_tb_init(TCGContext *s);
114 static bool tcg_out_tb_finalize(TCGContext *s);
118 static TCGRegSet tcg_target_available_regs[2];
119 static TCGRegSet tcg_target_call_clobber_regs;
121 #if TCG_TARGET_INSN_UNIT_SIZE == 1
122 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
127 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
134 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
135 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
137 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
140 tcg_insn_unit *p = s->code_ptr;
141 memcpy(p, &v, sizeof(v));
142 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
146 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
149 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
152 memcpy(p, &v, sizeof(v));
157 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
158 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
160 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
163 tcg_insn_unit *p = s->code_ptr;
164 memcpy(p, &v, sizeof(v));
165 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
169 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
172 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
175 memcpy(p, &v, sizeof(v));
180 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
181 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
183 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
186 tcg_insn_unit *p = s->code_ptr;
187 memcpy(p, &v, sizeof(v));
188 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
192 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
195 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
198 memcpy(p, &v, sizeof(v));
203 /* label relocation processing */
205 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
206 TCGLabel *l, intptr_t addend)
211 /* FIXME: This may break relocations on RISC targets that
212 modify instruction fields in place. The caller may not have
213 written the initial value. */
214 patch_reloc(code_ptr, type, l->u.value, addend);
216 /* add a new relocation entry */
217 r = tcg_malloc(sizeof(TCGRelocation));
221 r->next = l->u.first_reloc;
222 l->u.first_reloc = r;
226 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
228 intptr_t value = (intptr_t)ptr;
231 assert(!l->has_value);
233 for (r = l->u.first_reloc; r != NULL; r = r->next) {
234 patch_reloc(r->ptr, r->type, value, r->addend);
238 l->u.value_ptr = ptr;
241 TCGLabel *gen_new_label(void)
243 TCGContext *s = &tcg_ctx;
244 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
253 #include "tcg-target.c"
255 /* pool based memory allocation */
256 void *tcg_malloc_internal(TCGContext *s, int size)
261 if (size > TCG_POOL_CHUNK_SIZE) {
262 /* big malloc: insert a new pool (XXX: could optimize) */
263 p = g_malloc(sizeof(TCGPool) + size);
265 p->next = s->pool_first_large;
266 s->pool_first_large = p;
277 pool_size = TCG_POOL_CHUNK_SIZE;
278 p = g_malloc(sizeof(TCGPool) + pool_size);
282 s->pool_current->next = p;
291 s->pool_cur = p->data + size;
292 s->pool_end = p->data + p->size;
296 void tcg_pool_reset(TCGContext *s)
299 for (p = s->pool_first_large; p; p = t) {
303 s->pool_first_large = NULL;
304 s->pool_cur = s->pool_end = NULL;
305 s->pool_current = NULL;
308 typedef struct TCGHelperInfo {
315 #include "exec/helper-proto.h"
317 static const TCGHelperInfo all_helpers[] = {
318 #include "exec/helper-tcg.h"
321 void tcg_context_init(TCGContext *s)
323 int op, total_args, n, i;
325 TCGArgConstraint *args_ct;
327 GHashTable *helper_table;
329 memset(s, 0, sizeof(*s));
332 /* Count total number of arguments and allocate the corresponding
335 for(op = 0; op < NB_OPS; op++) {
336 def = &tcg_op_defs[op];
337 n = def->nb_iargs + def->nb_oargs;
341 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
342 sorted_args = g_malloc(sizeof(int) * total_args);
344 for(op = 0; op < NB_OPS; op++) {
345 def = &tcg_op_defs[op];
346 def->args_ct = args_ct;
347 def->sorted_args = sorted_args;
348 n = def->nb_iargs + def->nb_oargs;
353 /* Register helpers. */
354 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
355 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
357 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
358 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
359 (gpointer)&all_helpers[i]);
365 void tcg_prologue_init(TCGContext *s)
367 size_t prologue_size, total_size;
370 /* Put the prologue at the beginning of code_gen_buffer. */
371 buf0 = s->code_gen_buffer;
374 s->code_gen_prologue = buf0;
376 /* Generate the prologue. */
377 tcg_target_qemu_prologue(s);
379 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
381 /* Deduct the prologue from the buffer. */
382 prologue_size = tcg_current_code_size(s);
383 s->code_gen_ptr = buf1;
384 s->code_gen_buffer = buf1;
386 total_size = s->code_gen_buffer_size - prologue_size;
387 s->code_gen_buffer_size = total_size;
389 /* Compute a high-water mark, at which we voluntarily flush the buffer
390 and start over. The size here is arbitrary, significantly larger
391 than we expect the code generation for any one opcode to require. */
392 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
394 tcg_register_jit(s->code_gen_buffer, total_size);
397 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
398 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
399 log_disas(buf0, prologue_size);
406 void tcg_func_start(TCGContext *s)
409 s->nb_temps = s->nb_globals;
411 /* No temps have been previously allocated for size or locality. */
412 memset(s->free_temps, 0, sizeof(s->free_temps));
415 s->current_frame_offset = s->frame_start;
417 #ifdef CONFIG_DEBUG_TCG
418 s->goto_tb_issue_mask = 0;
421 s->gen_first_op_idx = 0;
422 s->gen_last_op_idx = -1;
423 s->gen_next_op_idx = 0;
424 s->gen_next_parm_idx = 0;
426 s->be = tcg_malloc(sizeof(TCGBackendData));
429 static inline int temp_idx(TCGContext *s, TCGTemp *ts)
431 ptrdiff_t n = ts - s->temps;
432 tcg_debug_assert(n >= 0 && n < s->nb_temps);
436 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
438 int n = s->nb_temps++;
439 tcg_debug_assert(n < TCG_MAX_TEMPS);
440 return memset(&s->temps[n], 0, sizeof(TCGTemp));
443 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
445 tcg_debug_assert(s->nb_globals == s->nb_temps);
447 return tcg_temp_alloc(s);
450 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
451 TCGReg reg, const char *name)
455 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
459 ts = tcg_global_alloc(s);
460 ts->base_type = type;
465 tcg_regset_set_reg(s->reserved_regs, reg);
467 return temp_idx(s, ts);
470 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
473 s->frame_start = start;
474 s->frame_end = start + size;
475 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
476 s->frame_temp = &s->temps[idx];
479 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name)
481 TCGContext *s = &tcg_ctx;
484 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
487 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
488 return MAKE_TCGV_I32(idx);
491 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name)
493 TCGContext *s = &tcg_ctx;
496 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
499 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
500 return MAKE_TCGV_I64(idx);
503 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
504 intptr_t offset, const char *name)
506 TCGContext *s = &tcg_ctx;
507 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)];
508 TCGTemp *ts = tcg_global_alloc(s);
510 #ifdef HOST_WORDS_BIGENDIAN
514 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
515 TCGTemp *ts2 = tcg_global_alloc(s);
518 ts->base_type = TCG_TYPE_I64;
519 ts->type = TCG_TYPE_I32;
520 ts->mem_allocated = 1;
521 ts->mem_base = base_ts;
522 ts->mem_offset = offset + bigendian * 4;
523 pstrcpy(buf, sizeof(buf), name);
524 pstrcat(buf, sizeof(buf), "_0");
525 ts->name = strdup(buf);
527 tcg_debug_assert(ts2 == ts + 1);
528 ts2->base_type = TCG_TYPE_I64;
529 ts2->type = TCG_TYPE_I32;
530 ts2->mem_allocated = 1;
531 ts2->mem_base = base_ts;
532 ts2->mem_offset = offset + (1 - bigendian) * 4;
533 pstrcpy(buf, sizeof(buf), name);
534 pstrcat(buf, sizeof(buf), "_1");
535 ts->name = strdup(buf);
537 ts->base_type = type;
539 ts->mem_allocated = 1;
540 ts->mem_base = base_ts;
541 ts->mem_offset = offset;
544 return temp_idx(s, ts);
547 static int tcg_temp_new_internal(TCGType type, int temp_local)
549 TCGContext *s = &tcg_ctx;
553 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
554 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
555 if (idx < TCG_MAX_TEMPS) {
556 /* There is already an available temp with the right type. */
557 clear_bit(idx, s->free_temps[k].l);
560 ts->temp_allocated = 1;
561 tcg_debug_assert(ts->base_type == type);
562 tcg_debug_assert(ts->temp_local == temp_local);
564 ts = tcg_temp_alloc(s);
565 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
566 TCGTemp *ts2 = tcg_temp_alloc(s);
568 ts->base_type = type;
569 ts->type = TCG_TYPE_I32;
570 ts->temp_allocated = 1;
571 ts->temp_local = temp_local;
573 tcg_debug_assert(ts2 == ts + 1);
574 ts2->base_type = TCG_TYPE_I64;
575 ts2->type = TCG_TYPE_I32;
576 ts2->temp_allocated = 1;
577 ts2->temp_local = temp_local;
579 ts->base_type = type;
581 ts->temp_allocated = 1;
582 ts->temp_local = temp_local;
584 idx = temp_idx(s, ts);
587 #if defined(CONFIG_DEBUG_TCG)
593 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
597 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
598 return MAKE_TCGV_I32(idx);
601 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
605 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
606 return MAKE_TCGV_I64(idx);
609 static void tcg_temp_free_internal(int idx)
611 TCGContext *s = &tcg_ctx;
615 #if defined(CONFIG_DEBUG_TCG)
617 if (s->temps_in_use < 0) {
618 fprintf(stderr, "More temporaries freed than allocated!\n");
622 assert(idx >= s->nb_globals && idx < s->nb_temps);
624 assert(ts->temp_allocated != 0);
625 ts->temp_allocated = 0;
627 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
628 set_bit(idx, s->free_temps[k].l);
631 void tcg_temp_free_i32(TCGv_i32 arg)
633 tcg_temp_free_internal(GET_TCGV_I32(arg));
636 void tcg_temp_free_i64(TCGv_i64 arg)
638 tcg_temp_free_internal(GET_TCGV_I64(arg));
641 TCGv_i32 tcg_const_i32(int32_t val)
644 t0 = tcg_temp_new_i32();
645 tcg_gen_movi_i32(t0, val);
649 TCGv_i64 tcg_const_i64(int64_t val)
652 t0 = tcg_temp_new_i64();
653 tcg_gen_movi_i64(t0, val);
657 TCGv_i32 tcg_const_local_i32(int32_t val)
660 t0 = tcg_temp_local_new_i32();
661 tcg_gen_movi_i32(t0, val);
665 TCGv_i64 tcg_const_local_i64(int64_t val)
668 t0 = tcg_temp_local_new_i64();
669 tcg_gen_movi_i64(t0, val);
673 #if defined(CONFIG_DEBUG_TCG)
674 void tcg_clear_temp_count(void)
676 TCGContext *s = &tcg_ctx;
680 int tcg_check_temp_count(void)
682 TCGContext *s = &tcg_ctx;
683 if (s->temps_in_use) {
684 /* Clear the count so that we don't give another
685 * warning immediately next time around.
694 /* Note: we convert the 64 bit args to 32 bit and do some alignment
695 and endian swap. Maybe it would be better to do the alignment
696 and endian swap in tcg_reg_alloc_call(). */
697 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
698 int nargs, TCGArg *args)
700 int i, real_args, nb_rets, pi, pi_first;
701 unsigned sizemask, flags;
704 info = g_hash_table_lookup(s->helpers, (gpointer)func);
706 sizemask = info->sizemask;
708 #if defined(__sparc__) && !defined(__arch64__) \
709 && !defined(CONFIG_TCG_INTERPRETER)
710 /* We have 64-bit values in one register, but need to pass as two
711 separate parameters. Split them. */
712 int orig_sizemask = sizemask;
713 int orig_nargs = nargs;
716 TCGV_UNUSED_I64(retl);
717 TCGV_UNUSED_I64(reth);
719 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
720 for (i = real_args = 0; i < nargs; ++i) {
721 int is_64bit = sizemask & (1 << (i+1)*2);
723 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
724 TCGv_i32 h = tcg_temp_new_i32();
725 TCGv_i32 l = tcg_temp_new_i32();
726 tcg_gen_extr_i64_i32(l, h, orig);
727 split_args[real_args++] = GET_TCGV_I32(h);
728 split_args[real_args++] = GET_TCGV_I32(l);
730 split_args[real_args++] = args[i];
737 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
738 for (i = 0; i < nargs; ++i) {
739 int is_64bit = sizemask & (1 << (i+1)*2);
740 int is_signed = sizemask & (2 << (i+1)*2);
742 TCGv_i64 temp = tcg_temp_new_i64();
743 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
745 tcg_gen_ext32s_i64(temp, orig);
747 tcg_gen_ext32u_i64(temp, orig);
749 args[i] = GET_TCGV_I64(temp);
752 #endif /* TCG_TARGET_EXTEND_ARGS */
754 pi_first = pi = s->gen_next_parm_idx;
755 if (ret != TCG_CALL_DUMMY_ARG) {
756 #if defined(__sparc__) && !defined(__arch64__) \
757 && !defined(CONFIG_TCG_INTERPRETER)
758 if (orig_sizemask & 1) {
759 /* The 32-bit ABI is going to return the 64-bit value in
760 the %o0/%o1 register pair. Prepare for this by using
761 two return temporaries, and reassemble below. */
762 retl = tcg_temp_new_i64();
763 reth = tcg_temp_new_i64();
764 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
765 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
768 s->gen_opparam_buf[pi++] = ret;
772 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
773 #ifdef HOST_WORDS_BIGENDIAN
774 s->gen_opparam_buf[pi++] = ret + 1;
775 s->gen_opparam_buf[pi++] = ret;
777 s->gen_opparam_buf[pi++] = ret;
778 s->gen_opparam_buf[pi++] = ret + 1;
782 s->gen_opparam_buf[pi++] = ret;
790 for (i = 0; i < nargs; i++) {
791 int is_64bit = sizemask & (1 << (i+1)*2);
792 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
793 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
794 /* some targets want aligned 64 bit args */
796 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
800 /* If stack grows up, then we will be placing successive
801 arguments at lower addresses, which means we need to
802 reverse the order compared to how we would normally
803 treat either big or little-endian. For those arguments
804 that will wind up in registers, this still works for
805 HPPA (the only current STACK_GROWSUP target) since the
806 argument registers are *also* allocated in decreasing
807 order. If another such target is added, this logic may
808 have to get more complicated to differentiate between
809 stack arguments and register arguments. */
810 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
811 s->gen_opparam_buf[pi++] = args[i] + 1;
812 s->gen_opparam_buf[pi++] = args[i];
814 s->gen_opparam_buf[pi++] = args[i];
815 s->gen_opparam_buf[pi++] = args[i] + 1;
821 s->gen_opparam_buf[pi++] = args[i];
824 s->gen_opparam_buf[pi++] = (uintptr_t)func;
825 s->gen_opparam_buf[pi++] = flags;
827 i = s->gen_next_op_idx;
828 tcg_debug_assert(i < OPC_BUF_SIZE);
829 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
831 /* Set links for sequential allocation during translation. */
832 s->gen_op_buf[i] = (TCGOp){
833 .opc = INDEX_op_call,
841 /* Make sure the calli field didn't overflow. */
842 tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
844 s->gen_last_op_idx = i;
845 s->gen_next_op_idx = i + 1;
846 s->gen_next_parm_idx = pi;
848 #if defined(__sparc__) && !defined(__arch64__) \
849 && !defined(CONFIG_TCG_INTERPRETER)
850 /* Free all of the parts we allocated above. */
851 for (i = real_args = 0; i < orig_nargs; ++i) {
852 int is_64bit = orig_sizemask & (1 << (i+1)*2);
854 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
855 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
856 tcg_temp_free_i32(h);
857 tcg_temp_free_i32(l);
862 if (orig_sizemask & 1) {
863 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
864 Note that describing these as TCGv_i64 eliminates an unnecessary
865 zero-extension that tcg_gen_concat_i32_i64 would create. */
866 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
867 tcg_temp_free_i64(retl);
868 tcg_temp_free_i64(reth);
870 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
871 for (i = 0; i < nargs; ++i) {
872 int is_64bit = sizemask & (1 << (i+1)*2);
874 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
875 tcg_temp_free_i64(temp);
878 #endif /* TCG_TARGET_EXTEND_ARGS */
881 static void tcg_reg_alloc_start(TCGContext *s)
885 for(i = 0; i < s->nb_globals; i++) {
888 ts->val_type = TEMP_VAL_REG;
890 ts->val_type = TEMP_VAL_MEM;
893 for(i = s->nb_globals; i < s->nb_temps; i++) {
895 if (ts->temp_local) {
896 ts->val_type = TEMP_VAL_MEM;
898 ts->val_type = TEMP_VAL_DEAD;
900 ts->mem_allocated = 0;
903 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
904 s->reg_to_temp[i] = -1;
908 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size,
913 assert(idx >= 0 && idx < s->nb_temps);
915 if (idx < s->nb_globals) {
916 pstrcpy(buf, buf_size, ts->name);
919 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
921 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
926 /* Find helper name. */
927 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
929 const char *ret = NULL;
931 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
939 static const char * const cond_name[] =
941 [TCG_COND_NEVER] = "never",
942 [TCG_COND_ALWAYS] = "always",
943 [TCG_COND_EQ] = "eq",
944 [TCG_COND_NE] = "ne",
945 [TCG_COND_LT] = "lt",
946 [TCG_COND_GE] = "ge",
947 [TCG_COND_LE] = "le",
948 [TCG_COND_GT] = "gt",
949 [TCG_COND_LTU] = "ltu",
950 [TCG_COND_GEU] = "geu",
951 [TCG_COND_LEU] = "leu",
952 [TCG_COND_GTU] = "gtu"
955 static const char * const ldst_name[] =
971 void tcg_dump_ops(TCGContext *s)
977 for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
978 int i, k, nb_oargs, nb_iargs, nb_cargs;
983 op = &s->gen_op_buf[oi];
985 def = &tcg_op_defs[c];
986 args = &s->gen_opparam_buf[op->args];
988 if (c == INDEX_op_insn_start) {
989 qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
991 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
993 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
994 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
998 qemu_log(" " TARGET_FMT_lx, a);
1000 } else if (c == INDEX_op_call) {
1001 /* variable number of arguments */
1002 nb_oargs = op->callo;
1003 nb_iargs = op->calli;
1004 nb_cargs = def->nb_cargs;
1006 /* function name, flags, out args */
1007 qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1008 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1009 args[nb_oargs + nb_iargs + 1], nb_oargs);
1010 for (i = 0; i < nb_oargs; i++) {
1011 qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1014 for (i = 0; i < nb_iargs; i++) {
1015 TCGArg arg = args[nb_oargs + i];
1016 const char *t = "<dummy>";
1017 if (arg != TCG_CALL_DUMMY_ARG) {
1018 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1023 qemu_log(" %s ", def->name);
1025 nb_oargs = def->nb_oargs;
1026 nb_iargs = def->nb_iargs;
1027 nb_cargs = def->nb_cargs;
1030 for (i = 0; i < nb_oargs; i++) {
1034 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1037 for (i = 0; i < nb_iargs; i++) {
1041 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1045 case INDEX_op_brcond_i32:
1046 case INDEX_op_setcond_i32:
1047 case INDEX_op_movcond_i32:
1048 case INDEX_op_brcond2_i32:
1049 case INDEX_op_setcond2_i32:
1050 case INDEX_op_brcond_i64:
1051 case INDEX_op_setcond_i64:
1052 case INDEX_op_movcond_i64:
1053 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1054 qemu_log(",%s", cond_name[args[k++]]);
1056 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1060 case INDEX_op_qemu_ld_i32:
1061 case INDEX_op_qemu_st_i32:
1062 case INDEX_op_qemu_ld_i64:
1063 case INDEX_op_qemu_st_i64:
1065 TCGMemOpIdx oi = args[k++];
1066 TCGMemOp op = get_memop(oi);
1067 unsigned ix = get_mmuidx(oi);
1069 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1070 qemu_log(",$0x%x,%u", op, ix);
1072 const char *s_al = "", *s_op;
1073 if (op & MO_AMASK) {
1074 if ((op & MO_AMASK) == MO_ALIGN) {
1080 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1081 qemu_log(",%s%s,%u", s_al, s_op, ix);
1091 case INDEX_op_set_label:
1093 case INDEX_op_brcond_i32:
1094 case INDEX_op_brcond_i64:
1095 case INDEX_op_brcond2_i32:
1096 qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
1102 for (; i < nb_cargs; i++, k++) {
1103 qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
1110 /* we give more priority to constraints with less registers */
1111 static int get_constraint_priority(const TCGOpDef *def, int k)
1113 const TCGArgConstraint *arg_ct;
1116 arg_ct = &def->args_ct[k];
1117 if (arg_ct->ct & TCG_CT_ALIAS) {
1118 /* an alias is equivalent to a single register */
1121 if (!(arg_ct->ct & TCG_CT_REG))
1124 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1125 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1129 return TCG_TARGET_NB_REGS - n + 1;
1132 /* sort from highest priority to lowest */
1133 static void sort_constraints(TCGOpDef *def, int start, int n)
1135 int i, j, p1, p2, tmp;
1137 for(i = 0; i < n; i++)
1138 def->sorted_args[start + i] = start + i;
1141 for(i = 0; i < n - 1; i++) {
1142 for(j = i + 1; j < n; j++) {
1143 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1144 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1146 tmp = def->sorted_args[start + i];
1147 def->sorted_args[start + i] = def->sorted_args[start + j];
1148 def->sorted_args[start + j] = tmp;
1154 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1162 if (tdefs->op == (TCGOpcode)-1)
1165 assert((unsigned)op < NB_OPS);
1166 def = &tcg_op_defs[op];
1167 #if defined(CONFIG_DEBUG_TCG)
1168 /* Duplicate entry in op definitions? */
1172 nb_args = def->nb_iargs + def->nb_oargs;
1173 for(i = 0; i < nb_args; i++) {
1174 ct_str = tdefs->args_ct_str[i];
1175 /* Incomplete TCGTargetOpDef entry? */
1176 assert(ct_str != NULL);
1177 tcg_regset_clear(def->args_ct[i].u.regs);
1178 def->args_ct[i].ct = 0;
1179 if (ct_str[0] >= '0' && ct_str[0] <= '9') {
1181 oarg = ct_str[0] - '0';
1182 assert(oarg < def->nb_oargs);
1183 assert(def->args_ct[oarg].ct & TCG_CT_REG);
1184 /* TCG_CT_ALIAS is for the output arguments. The input
1185 argument is tagged with TCG_CT_IALIAS. */
1186 def->args_ct[i] = def->args_ct[oarg];
1187 def->args_ct[oarg].ct = TCG_CT_ALIAS;
1188 def->args_ct[oarg].alias_index = i;
1189 def->args_ct[i].ct |= TCG_CT_IALIAS;
1190 def->args_ct[i].alias_index = oarg;
1193 if (*ct_str == '\0')
1197 def->args_ct[i].ct |= TCG_CT_CONST;
1201 if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
1202 fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1203 ct_str, i, def->name);
1211 /* TCGTargetOpDef entry with too much information? */
1212 assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1214 /* sort the constraints (XXX: this is just an heuristic) */
1215 sort_constraints(def, 0, def->nb_oargs);
1216 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1222 printf("%s: sorted=", def->name);
1223 for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
1224 printf(" %d", def->sorted_args[i]);
1231 #if defined(CONFIG_DEBUG_TCG)
1233 for (op = 0; op < tcg_op_defs_max; op++) {
1234 const TCGOpDef *def = &tcg_op_defs[op];
1235 if (def->flags & TCG_OPF_NOT_PRESENT) {
1236 /* Wrong entry in op definitions? */
1238 fprintf(stderr, "Invalid op definition for %s\n", def->name);
1242 /* Missing entry in op definitions? */
1244 fprintf(stderr, "Missing op definition for %s\n", def->name);
1255 void tcg_op_remove(TCGContext *s, TCGOp *op)
1257 int next = op->next;
1258 int prev = op->prev;
1261 s->gen_op_buf[next].prev = prev;
1263 s->gen_last_op_idx = prev;
1266 s->gen_op_buf[prev].next = next;
1268 s->gen_first_op_idx = next;
1271 memset(op, -1, sizeof(*op));
1273 #ifdef CONFIG_PROFILER
1278 #ifdef USE_LIVENESS_ANALYSIS
1279 /* liveness analysis: end of function: all temps are dead, and globals
1280 should be in memory. */
1281 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
1284 memset(dead_temps, 1, s->nb_temps);
1285 memset(mem_temps, 1, s->nb_globals);
1286 memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
1289 /* liveness analysis: end of basic block: all temps are dead, globals
1290 and local temps should be in memory. */
1291 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
1296 memset(dead_temps, 1, s->nb_temps);
1297 memset(mem_temps, 1, s->nb_globals);
1298 for(i = s->nb_globals; i < s->nb_temps; i++) {
1299 mem_temps[i] = s->temps[i].temp_local;
1303 /* Liveness analysis : update the opc_dead_args array to tell if a
1304 given input arguments is dead. Instructions updating dead
1305 temporaries are removed. */
1306 static void tcg_liveness_analysis(TCGContext *s)
1308 uint8_t *dead_temps, *mem_temps;
1309 int oi, oi_prev, nb_ops;
1311 nb_ops = s->gen_next_op_idx;
1312 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1313 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1315 dead_temps = tcg_malloc(s->nb_temps);
1316 mem_temps = tcg_malloc(s->nb_temps);
1317 tcg_la_func_end(s, dead_temps, mem_temps);
1319 for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
1320 int i, nb_iargs, nb_oargs;
1321 TCGOpcode opc_new, opc_new2;
1327 TCGOp * const op = &s->gen_op_buf[oi];
1328 TCGArg * const args = &s->gen_opparam_buf[op->args];
1329 TCGOpcode opc = op->opc;
1330 const TCGOpDef *def = &tcg_op_defs[opc];
1339 nb_oargs = op->callo;
1340 nb_iargs = op->calli;
1341 call_flags = args[nb_oargs + nb_iargs + 1];
1343 /* pure functions can be removed if their result is unused */
1344 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1345 for (i = 0; i < nb_oargs; i++) {
1347 if (!dead_temps[arg] || mem_temps[arg]) {
1348 goto do_not_remove_call;
1355 /* output args are dead */
1358 for (i = 0; i < nb_oargs; i++) {
1360 if (dead_temps[arg]) {
1361 dead_args |= (1 << i);
1363 if (mem_temps[arg]) {
1364 sync_args |= (1 << i);
1366 dead_temps[arg] = 1;
1370 if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1371 /* globals should be synced to memory */
1372 memset(mem_temps, 1, s->nb_globals);
1374 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1375 TCG_CALL_NO_READ_GLOBALS))) {
1376 /* globals should go back to memory */
1377 memset(dead_temps, 1, s->nb_globals);
1380 /* record arguments that die in this helper */
1381 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1383 if (arg != TCG_CALL_DUMMY_ARG) {
1384 if (dead_temps[arg]) {
1385 dead_args |= (1 << i);
1389 /* input arguments are live for preceding opcodes */
1390 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1392 dead_temps[arg] = 0;
1394 s->op_dead_args[oi] = dead_args;
1395 s->op_sync_args[oi] = sync_args;
1399 case INDEX_op_insn_start:
1401 case INDEX_op_discard:
1402 /* mark the temporary as dead */
1403 dead_temps[args[0]] = 1;
1404 mem_temps[args[0]] = 0;
1407 case INDEX_op_add2_i32:
1408 opc_new = INDEX_op_add_i32;
1410 case INDEX_op_sub2_i32:
1411 opc_new = INDEX_op_sub_i32;
1413 case INDEX_op_add2_i64:
1414 opc_new = INDEX_op_add_i64;
1416 case INDEX_op_sub2_i64:
1417 opc_new = INDEX_op_sub_i64;
1421 /* Test if the high part of the operation is dead, but not
1422 the low part. The result can be optimized to a simple
1423 add or sub. This happens often for x86_64 guest when the
1424 cpu mode is set to 32 bit. */
1425 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1426 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1429 /* Replace the opcode and adjust the args in place,
1430 leaving 3 unused args at the end. */
1431 op->opc = opc = opc_new;
1434 /* Fall through and mark the single-word operation live. */
1440 case INDEX_op_mulu2_i32:
1441 opc_new = INDEX_op_mul_i32;
1442 opc_new2 = INDEX_op_muluh_i32;
1443 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
1445 case INDEX_op_muls2_i32:
1446 opc_new = INDEX_op_mul_i32;
1447 opc_new2 = INDEX_op_mulsh_i32;
1448 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
1450 case INDEX_op_mulu2_i64:
1451 opc_new = INDEX_op_mul_i64;
1452 opc_new2 = INDEX_op_muluh_i64;
1453 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
1455 case INDEX_op_muls2_i64:
1456 opc_new = INDEX_op_mul_i64;
1457 opc_new2 = INDEX_op_mulsh_i64;
1458 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
1463 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1464 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1465 /* Both parts of the operation are dead. */
1468 /* The high part of the operation is dead; generate the low. */
1469 op->opc = opc = opc_new;
1472 } else if (have_opc_new2 && dead_temps[args[0]]
1473 && !mem_temps[args[0]]) {
1474 /* The low part of the operation is dead; generate the high. */
1475 op->opc = opc = opc_new2;
1482 /* Mark the single-word operation live. */
1487 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1488 nb_iargs = def->nb_iargs;
1489 nb_oargs = def->nb_oargs;
1491 /* Test if the operation can be removed because all
1492 its outputs are dead. We assume that nb_oargs == 0
1493 implies side effects */
1494 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1495 for (i = 0; i < nb_oargs; i++) {
1497 if (!dead_temps[arg] || mem_temps[arg]) {
1502 tcg_op_remove(s, op);
1505 /* output args are dead */
1508 for (i = 0; i < nb_oargs; i++) {
1510 if (dead_temps[arg]) {
1511 dead_args |= (1 << i);
1513 if (mem_temps[arg]) {
1514 sync_args |= (1 << i);
1516 dead_temps[arg] = 1;
1520 /* if end of basic block, update */
1521 if (def->flags & TCG_OPF_BB_END) {
1522 tcg_la_bb_end(s, dead_temps, mem_temps);
1523 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1524 /* globals should be synced to memory */
1525 memset(mem_temps, 1, s->nb_globals);
1528 /* record arguments that die in this opcode */
1529 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1531 if (dead_temps[arg]) {
1532 dead_args |= (1 << i);
1535 /* input arguments are live for preceding opcodes */
1536 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1538 dead_temps[arg] = 0;
1540 s->op_dead_args[oi] = dead_args;
1541 s->op_sync_args[oi] = sync_args;
1548 /* dummy liveness analysis */
1549 static void tcg_liveness_analysis(TCGContext *s)
1551 int nb_ops = s->gen_next_op_idx;
1553 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1554 memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
1555 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1556 memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
1561 static void dump_regs(TCGContext *s)
1567 for(i = 0; i < s->nb_temps; i++) {
1569 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1570 switch(ts->val_type) {
1572 printf("%s", tcg_target_reg_names[ts->reg]);
1575 printf("%d(%s)", (int)ts->mem_offset,
1576 tcg_target_reg_names[ts->mem_base->reg]);
1578 case TEMP_VAL_CONST:
1579 printf("$0x%" TCG_PRIlx, ts->val);
1591 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1592 if (s->reg_to_temp[i] >= 0) {
1594 tcg_target_reg_names[i],
1595 tcg_get_arg_str_idx(s, buf, sizeof(buf), s->reg_to_temp[i]));
1600 static void check_regs(TCGContext *s)
1607 for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1608 k = s->reg_to_temp[reg];
1611 if (ts->val_type != TEMP_VAL_REG ||
1613 printf("Inconsistency for register %s:\n",
1614 tcg_target_reg_names[reg]);
1619 for(k = 0; k < s->nb_temps; k++) {
1621 if (ts->val_type == TEMP_VAL_REG &&
1623 s->reg_to_temp[ts->reg] != k) {
1624 printf("Inconsistency for temp %s:\n",
1625 tcg_get_arg_str_idx(s, buf, sizeof(buf), k));
1627 printf("reg state:\n");
1635 static void temp_allocate_frame(TCGContext *s, int temp)
1638 ts = &s->temps[temp];
1639 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1640 /* Sparc64 stack is accessed with offset of 2047 */
1641 s->current_frame_offset = (s->current_frame_offset +
1642 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1643 ~(sizeof(tcg_target_long) - 1);
1645 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1649 ts->mem_offset = s->current_frame_offset;
1650 ts->mem_base = s->frame_temp;
1651 ts->mem_allocated = 1;
1652 s->current_frame_offset += sizeof(tcg_target_long);
1655 /* sync register 'reg' by saving it to the corresponding temporary */
1656 static inline void tcg_reg_sync(TCGContext *s, TCGReg reg)
1661 temp = s->reg_to_temp[reg];
1662 ts = &s->temps[temp];
1663 assert(ts->val_type == TEMP_VAL_REG);
1664 if (!ts->mem_coherent && !ts->fixed_reg) {
1665 if (!ts->mem_allocated) {
1666 temp_allocate_frame(s, temp);
1668 tcg_out_st(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
1670 ts->mem_coherent = 1;
1673 /* free register 'reg' by spilling the corresponding temporary if necessary */
1674 static void tcg_reg_free(TCGContext *s, TCGReg reg)
1678 temp = s->reg_to_temp[reg];
1680 tcg_reg_sync(s, reg);
1681 s->temps[temp].val_type = TEMP_VAL_MEM;
1682 s->reg_to_temp[reg] = -1;
1686 /* Allocate a register belonging to reg1 & ~reg2 */
1687 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
1693 tcg_regset_andnot(reg_ct, reg1, reg2);
1695 /* first try free registers */
1696 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1697 reg = tcg_target_reg_alloc_order[i];
1698 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == -1)
1702 /* XXX: do better spill choice */
1703 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1704 reg = tcg_target_reg_alloc_order[i];
1705 if (tcg_regset_test_reg(reg_ct, reg)) {
1706 tcg_reg_free(s, reg);
1714 /* mark a temporary as dead. */
1715 static inline void temp_dead(TCGContext *s, int temp)
1719 ts = &s->temps[temp];
1720 if (!ts->fixed_reg) {
1721 if (ts->val_type == TEMP_VAL_REG) {
1722 s->reg_to_temp[ts->reg] = -1;
1724 if (temp < s->nb_globals || ts->temp_local) {
1725 ts->val_type = TEMP_VAL_MEM;
1727 ts->val_type = TEMP_VAL_DEAD;
1732 /* sync a temporary to memory. 'allocated_regs' is used in case a
1733 temporary registers needs to be allocated to store a constant. */
1734 static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs)
1738 ts = &s->temps[temp];
1739 if (!ts->fixed_reg) {
1740 switch(ts->val_type) {
1741 case TEMP_VAL_CONST:
1742 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
1744 ts->val_type = TEMP_VAL_REG;
1745 s->reg_to_temp[ts->reg] = temp;
1746 ts->mem_coherent = 0;
1747 tcg_out_movi(s, ts->type, ts->reg, ts->val);
1750 tcg_reg_sync(s, ts->reg);
1761 /* save a temporary to memory. 'allocated_regs' is used in case a
1762 temporary registers needs to be allocated to store a constant. */
1763 static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
1765 #ifdef USE_LIVENESS_ANALYSIS
1766 /* The liveness analysis already ensures that globals are back
1767 in memory. Keep an assert for safety. */
1768 assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
1770 temp_sync(s, temp, allocated_regs);
1775 /* save globals to their canonical location and assume they can be
1776 modified be the following code. 'allocated_regs' is used in case a
1777 temporary registers needs to be allocated to store a constant. */
1778 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
1782 for(i = 0; i < s->nb_globals; i++) {
1783 temp_save(s, i, allocated_regs);
1787 /* sync globals to their canonical location and assume they can be
1788 read by the following code. 'allocated_regs' is used in case a
1789 temporary registers needs to be allocated to store a constant. */
1790 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
1794 for (i = 0; i < s->nb_globals; i++) {
1795 #ifdef USE_LIVENESS_ANALYSIS
1796 assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
1797 s->temps[i].mem_coherent);
1799 temp_sync(s, i, allocated_regs);
1804 /* at the end of a basic block, we assume all temporaries are dead and
1805 all globals are stored at their canonical location. */
1806 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
1811 for(i = s->nb_globals; i < s->nb_temps; i++) {
1813 if (ts->temp_local) {
1814 temp_save(s, i, allocated_regs);
1816 #ifdef USE_LIVENESS_ANALYSIS
1817 /* The liveness analysis already ensures that temps are dead.
1818 Keep an assert for safety. */
1819 assert(ts->val_type == TEMP_VAL_DEAD);
1826 save_globals(s, allocated_regs);
1829 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
1830 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
1832 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
1833 uint16_t dead_args, uint8_t sync_args)
1836 tcg_target_ulong val;
1838 ots = &s->temps[args[0]];
1841 if (ots->fixed_reg) {
1842 /* for fixed registers, we do not do any constant
1844 tcg_out_movi(s, ots->type, ots->reg, val);
1846 /* The movi is not explicitly generated here */
1847 if (ots->val_type == TEMP_VAL_REG)
1848 s->reg_to_temp[ots->reg] = -1;
1849 ots->val_type = TEMP_VAL_CONST;
1852 if (NEED_SYNC_ARG(0)) {
1853 temp_sync(s, args[0], s->reserved_regs);
1855 if (IS_DEAD_ARG(0)) {
1856 temp_dead(s, args[0]);
1860 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
1861 const TCGArg *args, uint16_t dead_args,
1864 TCGRegSet allocated_regs;
1866 TCGType otype, itype;
1868 tcg_regset_set(allocated_regs, s->reserved_regs);
1869 ots = &s->temps[args[0]];
1870 ts = &s->temps[args[1]];
1872 /* Note that otype != itype for no-op truncation. */
1876 /* If the source value is not in a register, and we're going to be
1877 forced to have it in a register in order to perform the copy,
1878 then copy the SOURCE value into its own register first. That way
1879 we don't have to reload SOURCE the next time it is used. */
1880 if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
1881 || ts->val_type == TEMP_VAL_MEM) {
1882 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype],
1884 if (ts->val_type == TEMP_VAL_MEM) {
1885 tcg_out_ld(s, itype, ts->reg, ts->mem_base->reg, ts->mem_offset);
1886 ts->mem_coherent = 1;
1887 } else if (ts->val_type == TEMP_VAL_CONST) {
1888 tcg_out_movi(s, itype, ts->reg, ts->val);
1889 ts->mem_coherent = 0;
1891 s->reg_to_temp[ts->reg] = args[1];
1892 ts->val_type = TEMP_VAL_REG;
1895 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
1896 /* mov to a non-saved dead register makes no sense (even with
1897 liveness analysis disabled). */
1898 assert(NEED_SYNC_ARG(0));
1899 /* The code above should have moved the temp to a register. */
1900 assert(ts->val_type == TEMP_VAL_REG);
1901 if (!ots->mem_allocated) {
1902 temp_allocate_frame(s, args[0]);
1904 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
1905 if (IS_DEAD_ARG(1)) {
1906 temp_dead(s, args[1]);
1908 temp_dead(s, args[0]);
1909 } else if (ts->val_type == TEMP_VAL_CONST) {
1910 /* propagate constant */
1911 if (ots->val_type == TEMP_VAL_REG) {
1912 s->reg_to_temp[ots->reg] = -1;
1914 ots->val_type = TEMP_VAL_CONST;
1916 if (IS_DEAD_ARG(1)) {
1917 temp_dead(s, args[1]);
1920 /* The code in the first if block should have moved the
1921 temp to a register. */
1922 assert(ts->val_type == TEMP_VAL_REG);
1923 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
1924 /* the mov can be suppressed */
1925 if (ots->val_type == TEMP_VAL_REG) {
1926 s->reg_to_temp[ots->reg] = -1;
1929 temp_dead(s, args[1]);
1931 if (ots->val_type != TEMP_VAL_REG) {
1932 /* When allocating a new register, make sure to not spill the
1934 tcg_regset_set_reg(allocated_regs, ts->reg);
1935 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
1938 tcg_out_mov(s, otype, ots->reg, ts->reg);
1940 ots->val_type = TEMP_VAL_REG;
1941 ots->mem_coherent = 0;
1942 s->reg_to_temp[ots->reg] = args[0];
1943 if (NEED_SYNC_ARG(0)) {
1944 tcg_reg_sync(s, ots->reg);
1949 static void tcg_reg_alloc_op(TCGContext *s,
1950 const TCGOpDef *def, TCGOpcode opc,
1951 const TCGArg *args, uint16_t dead_args,
1954 TCGRegSet allocated_regs;
1955 int i, k, nb_iargs, nb_oargs;
1958 const TCGArgConstraint *arg_ct;
1960 TCGArg new_args[TCG_MAX_OP_ARGS];
1961 int const_args[TCG_MAX_OP_ARGS];
1963 nb_oargs = def->nb_oargs;
1964 nb_iargs = def->nb_iargs;
1966 /* copy constants */
1967 memcpy(new_args + nb_oargs + nb_iargs,
1968 args + nb_oargs + nb_iargs,
1969 sizeof(TCGArg) * def->nb_cargs);
1971 /* satisfy input constraints */
1972 tcg_regset_set(allocated_regs, s->reserved_regs);
1973 for(k = 0; k < nb_iargs; k++) {
1974 i = def->sorted_args[nb_oargs + k];
1976 arg_ct = &def->args_ct[i];
1977 ts = &s->temps[arg];
1978 if (ts->val_type == TEMP_VAL_MEM) {
1979 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
1980 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
1981 ts->val_type = TEMP_VAL_REG;
1983 ts->mem_coherent = 1;
1984 s->reg_to_temp[reg] = arg;
1985 } else if (ts->val_type == TEMP_VAL_CONST) {
1986 if (tcg_target_const_match(ts->val, ts->type, arg_ct)) {
1987 /* constant is OK for instruction */
1989 new_args[i] = ts->val;
1992 /* need to move to a register */
1993 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
1994 tcg_out_movi(s, ts->type, reg, ts->val);
1995 ts->val_type = TEMP_VAL_REG;
1997 ts->mem_coherent = 0;
1998 s->reg_to_temp[reg] = arg;
2001 assert(ts->val_type == TEMP_VAL_REG);
2002 if (arg_ct->ct & TCG_CT_IALIAS) {
2003 if (ts->fixed_reg) {
2004 /* if fixed register, we must allocate a new register
2005 if the alias is not the same register */
2006 if (arg != args[arg_ct->alias_index])
2007 goto allocate_in_reg;
2009 /* if the input is aliased to an output and if it is
2010 not dead after the instruction, we must allocate
2011 a new register and move it */
2012 if (!IS_DEAD_ARG(i)) {
2013 goto allocate_in_reg;
2015 /* check if the current register has already been allocated
2016 for another input aliased to an output */
2018 for (k2 = 0 ; k2 < k ; k2++) {
2019 i2 = def->sorted_args[nb_oargs + k2];
2020 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2021 (new_args[i2] == ts->reg)) {
2022 goto allocate_in_reg;
2028 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2029 /* nothing to do : the constraint is satisfied */
2032 /* allocate a new register matching the constraint
2033 and move the temporary register into it */
2034 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2035 tcg_out_mov(s, ts->type, reg, ts->reg);
2039 tcg_regset_set_reg(allocated_regs, reg);
2043 /* mark dead temporaries and free the associated registers */
2044 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2045 if (IS_DEAD_ARG(i)) {
2046 temp_dead(s, args[i]);
2050 if (def->flags & TCG_OPF_BB_END) {
2051 tcg_reg_alloc_bb_end(s, allocated_regs);
2053 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2054 /* XXX: permit generic clobber register list ? */
2055 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2056 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2061 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2062 /* sync globals if the op has side effects and might trigger
2064 sync_globals(s, allocated_regs);
2067 /* satisfy the output constraints */
2068 tcg_regset_set(allocated_regs, s->reserved_regs);
2069 for(k = 0; k < nb_oargs; k++) {
2070 i = def->sorted_args[k];
2072 arg_ct = &def->args_ct[i];
2073 ts = &s->temps[arg];
2074 if (arg_ct->ct & TCG_CT_ALIAS) {
2075 reg = new_args[arg_ct->alias_index];
2077 /* if fixed register, we try to use it */
2079 if (ts->fixed_reg &&
2080 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2083 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2085 tcg_regset_set_reg(allocated_regs, reg);
2086 /* if a fixed register is used, then a move will be done afterwards */
2087 if (!ts->fixed_reg) {
2088 if (ts->val_type == TEMP_VAL_REG) {
2089 s->reg_to_temp[ts->reg] = -1;
2091 ts->val_type = TEMP_VAL_REG;
2093 /* temp value is modified, so the value kept in memory is
2094 potentially not the same */
2095 ts->mem_coherent = 0;
2096 s->reg_to_temp[reg] = arg;
2103 /* emit instruction */
2104 tcg_out_op(s, opc, new_args, const_args);
2106 /* move the outputs in the correct register if needed */
2107 for(i = 0; i < nb_oargs; i++) {
2108 ts = &s->temps[args[i]];
2110 if (ts->fixed_reg && ts->reg != reg) {
2111 tcg_out_mov(s, ts->type, ts->reg, reg);
2113 if (NEED_SYNC_ARG(i)) {
2114 tcg_reg_sync(s, reg);
2116 if (IS_DEAD_ARG(i)) {
2117 temp_dead(s, args[i]);
2122 #ifdef TCG_TARGET_STACK_GROWSUP
2123 #define STACK_DIR(x) (-(x))
2125 #define STACK_DIR(x) (x)
2128 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
2129 const TCGArg * const args, uint16_t dead_args,
2132 int flags, nb_regs, i;
2136 intptr_t stack_offset;
2137 size_t call_stack_size;
2138 tcg_insn_unit *func_addr;
2140 TCGRegSet allocated_regs;
2142 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2143 flags = args[nb_oargs + nb_iargs + 1];
2145 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2146 if (nb_regs > nb_iargs) {
2150 /* assign stack slots first */
2151 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2152 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2153 ~(TCG_TARGET_STACK_ALIGN - 1);
2154 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2155 if (allocate_args) {
2156 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2157 preallocate call stack */
2161 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2162 for(i = nb_regs; i < nb_iargs; i++) {
2163 arg = args[nb_oargs + i];
2164 #ifdef TCG_TARGET_STACK_GROWSUP
2165 stack_offset -= sizeof(tcg_target_long);
2167 if (arg != TCG_CALL_DUMMY_ARG) {
2168 ts = &s->temps[arg];
2169 if (ts->val_type == TEMP_VAL_REG) {
2170 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2171 } else if (ts->val_type == TEMP_VAL_MEM) {
2172 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2174 /* XXX: not correct if reading values from the stack */
2175 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2176 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2177 } else if (ts->val_type == TEMP_VAL_CONST) {
2178 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2180 /* XXX: sign extend may be needed on some targets */
2181 tcg_out_movi(s, ts->type, reg, ts->val);
2182 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2187 #ifndef TCG_TARGET_STACK_GROWSUP
2188 stack_offset += sizeof(tcg_target_long);
2192 /* assign input registers */
2193 tcg_regset_set(allocated_regs, s->reserved_regs);
2194 for(i = 0; i < nb_regs; i++) {
2195 arg = args[nb_oargs + i];
2196 if (arg != TCG_CALL_DUMMY_ARG) {
2197 ts = &s->temps[arg];
2198 reg = tcg_target_call_iarg_regs[i];
2199 tcg_reg_free(s, reg);
2200 if (ts->val_type == TEMP_VAL_REG) {
2201 if (ts->reg != reg) {
2202 tcg_out_mov(s, ts->type, reg, ts->reg);
2204 } else if (ts->val_type == TEMP_VAL_MEM) {
2205 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2206 } else if (ts->val_type == TEMP_VAL_CONST) {
2207 /* XXX: sign extend ? */
2208 tcg_out_movi(s, ts->type, reg, ts->val);
2212 tcg_regset_set_reg(allocated_regs, reg);
2216 /* mark dead temporaries and free the associated registers */
2217 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2218 if (IS_DEAD_ARG(i)) {
2219 temp_dead(s, args[i]);
2223 /* clobber call registers */
2224 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2225 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2230 /* Save globals if they might be written by the helper, sync them if
2231 they might be read. */
2232 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2234 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2235 sync_globals(s, allocated_regs);
2237 save_globals(s, allocated_regs);
2240 tcg_out_call(s, func_addr);
2242 /* assign output registers and emit moves if needed */
2243 for(i = 0; i < nb_oargs; i++) {
2245 ts = &s->temps[arg];
2246 reg = tcg_target_call_oarg_regs[i];
2247 assert(s->reg_to_temp[reg] == -1);
2249 if (ts->fixed_reg) {
2250 if (ts->reg != reg) {
2251 tcg_out_mov(s, ts->type, ts->reg, reg);
2254 if (ts->val_type == TEMP_VAL_REG) {
2255 s->reg_to_temp[ts->reg] = -1;
2257 ts->val_type = TEMP_VAL_REG;
2259 ts->mem_coherent = 0;
2260 s->reg_to_temp[reg] = arg;
2261 if (NEED_SYNC_ARG(i)) {
2262 tcg_reg_sync(s, reg);
2264 if (IS_DEAD_ARG(i)) {
2265 temp_dead(s, args[i]);
2271 #ifdef CONFIG_PROFILER
2273 static int64_t tcg_table_op_count[NB_OPS];
2275 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2279 for (i = 0; i < NB_OPS; i++) {
2280 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
2281 tcg_table_op_count[i]);
2285 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2287 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2292 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
2294 int i, oi, oi_next, num_insns;
2296 #ifdef CONFIG_PROFILER
2300 n = s->gen_last_op_idx + 1;
2302 if (n > s->op_count_max) {
2303 s->op_count_max = n;
2308 if (n > s->temp_count_max) {
2309 s->temp_count_max = n;
2315 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
2322 #ifdef CONFIG_PROFILER
2323 s->opt_time -= profile_getclock();
2326 #ifdef USE_TCG_OPTIMIZATIONS
2330 #ifdef CONFIG_PROFILER
2331 s->opt_time += profile_getclock();
2332 s->la_time -= profile_getclock();
2335 tcg_liveness_analysis(s);
2337 #ifdef CONFIG_PROFILER
2338 s->la_time += profile_getclock();
2342 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
2343 qemu_log("OP after optimization and liveness analysis:\n");
2349 tcg_reg_alloc_start(s);
2351 s->code_buf = gen_code_buf;
2352 s->code_ptr = gen_code_buf;
2357 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
2358 TCGOp * const op = &s->gen_op_buf[oi];
2359 TCGArg * const args = &s->gen_opparam_buf[op->args];
2360 TCGOpcode opc = op->opc;
2361 const TCGOpDef *def = &tcg_op_defs[opc];
2362 uint16_t dead_args = s->op_dead_args[oi];
2363 uint8_t sync_args = s->op_sync_args[oi];
2366 #ifdef CONFIG_PROFILER
2367 tcg_table_op_count[opc]++;
2371 case INDEX_op_mov_i32:
2372 case INDEX_op_mov_i64:
2373 tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
2375 case INDEX_op_movi_i32:
2376 case INDEX_op_movi_i64:
2377 tcg_reg_alloc_movi(s, args, dead_args, sync_args);
2379 case INDEX_op_insn_start:
2380 if (num_insns >= 0) {
2381 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2384 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2386 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2387 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
2391 s->gen_insn_data[num_insns][i] = a;
2394 case INDEX_op_discard:
2395 temp_dead(s, args[0]);
2397 case INDEX_op_set_label:
2398 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2399 tcg_out_label(s, arg_label(args[0]), s->code_ptr);
2402 tcg_reg_alloc_call(s, op->callo, op->calli, args,
2403 dead_args, sync_args);
2406 /* Sanity check that we've not introduced any unhandled opcodes. */
2407 if (def->flags & TCG_OPF_NOT_PRESENT) {
2410 /* Note: in order to speed up the code, it would be much
2411 faster to have specialized register allocator functions for
2412 some common argument patterns */
2413 tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
2419 /* Test for (pending) buffer overflow. The assumption is that any
2420 one operation beginning below the high water mark cannot overrun
2421 the buffer completely. Thus we can test for overflow after
2422 generating code without having to check during generation. */
2423 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
2427 tcg_debug_assert(num_insns >= 0);
2428 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2430 /* Generate TB finalization at the end of block */
2431 if (!tcg_out_tb_finalize(s)) {
2435 /* flush instruction cache */
2436 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2438 return tcg_current_code_size(s);
2441 #ifdef CONFIG_PROFILER
2442 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2444 TCGContext *s = &tcg_ctx;
2445 int64_t tb_count = s->tb_count;
2446 int64_t tb_div_count = tb_count ? tb_count : 1;
2447 int64_t tot = s->interm_time + s->code_time;
2449 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2451 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2452 tb_count, s->tb_count1 - tb_count,
2453 (double)(s->tb_count1 - s->tb_count)
2454 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
2455 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2456 (double)s->op_count / tb_div_count, s->op_count_max);
2457 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2458 (double)s->del_op_count / tb_div_count);
2459 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2460 (double)s->temp_count / tb_div_count, s->temp_count_max);
2461 cpu_fprintf(f, "avg host code/TB %0.1f\n",
2462 (double)s->code_out_len / tb_div_count);
2463 cpu_fprintf(f, "avg search data/TB %0.1f\n",
2464 (double)s->search_out_len / tb_div_count);
2466 cpu_fprintf(f, "cycles/op %0.1f\n",
2467 s->op_count ? (double)tot / s->op_count : 0);
2468 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2469 s->code_in_len ? (double)tot / s->code_in_len : 0);
2470 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2471 s->code_out_len ? (double)tot / s->code_out_len : 0);
2472 cpu_fprintf(f, "cycles/search byte %0.1f\n",
2473 s->search_out_len ? (double)tot / s->search_out_len : 0);
2477 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2478 (double)s->interm_time / tot * 100.0);
2479 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2480 (double)s->code_time / tot * 100.0);
2481 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2482 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2484 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2485 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2486 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2488 cpu_fprintf(f, " avg cycles %0.1f\n",
2489 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2492 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2494 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2498 #ifdef ELF_HOST_MACHINE
2499 /* In order to use this feature, the backend needs to do three things:
2501 (1) Define ELF_HOST_MACHINE to indicate both what value to
2502 put into the ELF image and to indicate support for the feature.
2504 (2) Define tcg_register_jit. This should create a buffer containing
2505 the contents of a .debug_frame section that describes the post-
2506 prologue unwind info for the tcg machine.
2508 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2511 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2518 struct jit_code_entry {
2519 struct jit_code_entry *next_entry;
2520 struct jit_code_entry *prev_entry;
2521 const void *symfile_addr;
2522 uint64_t symfile_size;
2525 struct jit_descriptor {
2527 uint32_t action_flag;
2528 struct jit_code_entry *relevant_entry;
2529 struct jit_code_entry *first_entry;
2532 void __jit_debug_register_code(void) __attribute__((noinline));
2533 void __jit_debug_register_code(void)
2538 /* Must statically initialize the version, because GDB may check
2539 the version before we can set it. */
2540 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2542 /* End GDB interface. */
2544 static int find_string(const char *strtab, const char *str)
2546 const char *p = strtab + 1;
2549 if (strcmp(p, str) == 0) {
2556 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2557 const void *debug_frame,
2558 size_t debug_frame_size)
2560 struct __attribute__((packed)) DebugInfo {
2567 uintptr_t cu_low_pc;
2568 uintptr_t cu_high_pc;
2571 uintptr_t fn_low_pc;
2572 uintptr_t fn_high_pc;
2581 struct DebugInfo di;
2586 struct ElfImage *img;
2588 static const struct ElfImage img_template = {
2590 .e_ident[EI_MAG0] = ELFMAG0,
2591 .e_ident[EI_MAG1] = ELFMAG1,
2592 .e_ident[EI_MAG2] = ELFMAG2,
2593 .e_ident[EI_MAG3] = ELFMAG3,
2594 .e_ident[EI_CLASS] = ELF_CLASS,
2595 .e_ident[EI_DATA] = ELF_DATA,
2596 .e_ident[EI_VERSION] = EV_CURRENT,
2598 .e_machine = ELF_HOST_MACHINE,
2599 .e_version = EV_CURRENT,
2600 .e_phoff = offsetof(struct ElfImage, phdr),
2601 .e_shoff = offsetof(struct ElfImage, shdr),
2602 .e_ehsize = sizeof(ElfW(Shdr)),
2603 .e_phentsize = sizeof(ElfW(Phdr)),
2605 .e_shentsize = sizeof(ElfW(Shdr)),
2606 .e_shnum = ARRAY_SIZE(img->shdr),
2607 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2608 #ifdef ELF_HOST_FLAGS
2609 .e_flags = ELF_HOST_FLAGS,
2612 .e_ident[EI_OSABI] = ELF_OSABI,
2620 [0] = { .sh_type = SHT_NULL },
2621 /* Trick: The contents of code_gen_buffer are not present in
2622 this fake ELF file; that got allocated elsewhere. Therefore
2623 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2624 will not look for contents. We can record any address. */
2626 .sh_type = SHT_NOBITS,
2627 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2629 [2] = { /* .debug_info */
2630 .sh_type = SHT_PROGBITS,
2631 .sh_offset = offsetof(struct ElfImage, di),
2632 .sh_size = sizeof(struct DebugInfo),
2634 [3] = { /* .debug_abbrev */
2635 .sh_type = SHT_PROGBITS,
2636 .sh_offset = offsetof(struct ElfImage, da),
2637 .sh_size = sizeof(img->da),
2639 [4] = { /* .debug_frame */
2640 .sh_type = SHT_PROGBITS,
2641 .sh_offset = sizeof(struct ElfImage),
2643 [5] = { /* .symtab */
2644 .sh_type = SHT_SYMTAB,
2645 .sh_offset = offsetof(struct ElfImage, sym),
2646 .sh_size = sizeof(img->sym),
2648 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2649 .sh_entsize = sizeof(ElfW(Sym)),
2651 [6] = { /* .strtab */
2652 .sh_type = SHT_STRTAB,
2653 .sh_offset = offsetof(struct ElfImage, str),
2654 .sh_size = sizeof(img->str),
2658 [1] = { /* code_gen_buffer */
2659 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2664 .len = sizeof(struct DebugInfo) - 4,
2666 .ptr_size = sizeof(void *),
2668 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2670 .fn_name = "code_gen_buffer"
2673 1, /* abbrev number (the cu) */
2674 0x11, 1, /* DW_TAG_compile_unit, has children */
2675 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2676 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2677 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2678 0, 0, /* end of abbrev */
2679 2, /* abbrev number (the fn) */
2680 0x2e, 0, /* DW_TAG_subprogram, no children */
2681 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2682 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2683 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2684 0, 0, /* end of abbrev */
2685 0 /* no more abbrev */
2687 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2688 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2691 /* We only need a single jit entry; statically allocate it. */
2692 static struct jit_code_entry one_entry;
2694 uintptr_t buf = (uintptr_t)buf_ptr;
2695 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2696 DebugFrameHeader *dfh;
2698 img = g_malloc(img_size);
2699 *img = img_template;
2701 img->phdr.p_vaddr = buf;
2702 img->phdr.p_paddr = buf;
2703 img->phdr.p_memsz = buf_size;
2705 img->shdr[1].sh_name = find_string(img->str, ".text");
2706 img->shdr[1].sh_addr = buf;
2707 img->shdr[1].sh_size = buf_size;
2709 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2710 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2712 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2713 img->shdr[4].sh_size = debug_frame_size;
2715 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2716 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2718 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2719 img->sym[1].st_value = buf;
2720 img->sym[1].st_size = buf_size;
2722 img->di.cu_low_pc = buf;
2723 img->di.cu_high_pc = buf + buf_size;
2724 img->di.fn_low_pc = buf;
2725 img->di.fn_high_pc = buf + buf_size;
2727 dfh = (DebugFrameHeader *)(img + 1);
2728 memcpy(dfh, debug_frame, debug_frame_size);
2729 dfh->fde.func_start = buf;
2730 dfh->fde.func_len = buf_size;
2733 /* Enable this block to be able to debug the ELF image file creation.
2734 One can use readelf, objdump, or other inspection utilities. */
2736 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2738 if (fwrite(img, img_size, 1, f) != img_size) {
2739 /* Avoid stupid unused return value warning for fwrite. */
2746 one_entry.symfile_addr = img;
2747 one_entry.symfile_size = img_size;
2749 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2750 __jit_debug_descriptor.relevant_entry = &one_entry;
2751 __jit_debug_descriptor.first_entry = &one_entry;
2752 __jit_debug_register_code();
2755 /* No support for the feature. Provide the entry point expected by exec.c,
2756 and implement the internal function we declared earlier. */
2758 static void tcg_register_jit_int(void *buf, size_t size,
2759 const void *debug_frame,
2760 size_t debug_frame_size)
2764 void tcg_register_jit(void *buf, size_t buf_size)
2767 #endif /* ELF_HOST_MACHINE */