2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
29 #include "qemu/osdep.h"
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu-common.h"
40 #include "qemu/host-utils.h"
41 #include "qemu/timer.h"
43 /* Note: the long term plan is to reduce the dependencies on the QEMU
44 CPU definitions. Currently they are used for qemu_ld/st
46 #define NO_CPU_IO_DEFS
51 #if UINTPTR_MAX == UINT32_MAX
52 # define ELF_CLASS ELFCLASS32
54 # define ELF_CLASS ELFCLASS64
56 #ifdef HOST_WORDS_BIGENDIAN
57 # define ELF_DATA ELFDATA2MSB
59 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint8_t return_column;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
87 } DebugFrameFDEHeader;
89 typedef struct QEMU_PACKED {
91 DebugFrameFDEHeader fde;
94 static void tcg_register_jit_int(void *buf, size_t size,
95 const void *debug_frame,
96 size_t debug_frame_size)
97 __attribute__((unused));
99 /* Forward declarations for functions declared and used in tcg-target.c. */
100 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105 TCGReg ret, tcg_target_long arg);
106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
107 const int *const_args);
108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
111 static int tcg_target_const_match(tcg_target_long val, TCGType type,
112 const TCGArgConstraint *arg_ct);
113 static void tcg_out_tb_init(TCGContext *s);
114 static bool tcg_out_tb_finalize(TCGContext *s);
118 static TCGRegSet tcg_target_available_regs[2];
119 static TCGRegSet tcg_target_call_clobber_regs;
121 #if TCG_TARGET_INSN_UNIT_SIZE == 1
122 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
127 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
134 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
135 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
137 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
140 tcg_insn_unit *p = s->code_ptr;
141 memcpy(p, &v, sizeof(v));
142 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
146 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
149 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
152 memcpy(p, &v, sizeof(v));
157 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
158 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
160 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
163 tcg_insn_unit *p = s->code_ptr;
164 memcpy(p, &v, sizeof(v));
165 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
169 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
172 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
175 memcpy(p, &v, sizeof(v));
180 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
181 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
183 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
186 tcg_insn_unit *p = s->code_ptr;
187 memcpy(p, &v, sizeof(v));
188 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
192 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
195 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
198 memcpy(p, &v, sizeof(v));
203 /* label relocation processing */
205 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
206 TCGLabel *l, intptr_t addend)
211 /* FIXME: This may break relocations on RISC targets that
212 modify instruction fields in place. The caller may not have
213 written the initial value. */
214 patch_reloc(code_ptr, type, l->u.value, addend);
216 /* add a new relocation entry */
217 r = tcg_malloc(sizeof(TCGRelocation));
221 r->next = l->u.first_reloc;
222 l->u.first_reloc = r;
226 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
228 intptr_t value = (intptr_t)ptr;
231 assert(!l->has_value);
233 for (r = l->u.first_reloc; r != NULL; r = r->next) {
234 patch_reloc(r->ptr, r->type, value, r->addend);
238 l->u.value_ptr = ptr;
241 TCGLabel *gen_new_label(void)
243 TCGContext *s = &tcg_ctx;
244 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
253 #include "tcg-target.c"
255 /* pool based memory allocation */
256 void *tcg_malloc_internal(TCGContext *s, int size)
261 if (size > TCG_POOL_CHUNK_SIZE) {
262 /* big malloc: insert a new pool (XXX: could optimize) */
263 p = g_malloc(sizeof(TCGPool) + size);
265 p->next = s->pool_first_large;
266 s->pool_first_large = p;
277 pool_size = TCG_POOL_CHUNK_SIZE;
278 p = g_malloc(sizeof(TCGPool) + pool_size);
282 s->pool_current->next = p;
291 s->pool_cur = p->data + size;
292 s->pool_end = p->data + p->size;
296 void tcg_pool_reset(TCGContext *s)
299 for (p = s->pool_first_large; p; p = t) {
303 s->pool_first_large = NULL;
304 s->pool_cur = s->pool_end = NULL;
305 s->pool_current = NULL;
308 typedef struct TCGHelperInfo {
315 #include "exec/helper-proto.h"
317 static const TCGHelperInfo all_helpers[] = {
318 #include "exec/helper-tcg.h"
321 void tcg_context_init(TCGContext *s)
323 int op, total_args, n, i;
325 TCGArgConstraint *args_ct;
327 GHashTable *helper_table;
329 memset(s, 0, sizeof(*s));
332 /* Count total number of arguments and allocate the corresponding
335 for(op = 0; op < NB_OPS; op++) {
336 def = &tcg_op_defs[op];
337 n = def->nb_iargs + def->nb_oargs;
341 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
342 sorted_args = g_malloc(sizeof(int) * total_args);
344 for(op = 0; op < NB_OPS; op++) {
345 def = &tcg_op_defs[op];
346 def->args_ct = args_ct;
347 def->sorted_args = sorted_args;
348 n = def->nb_iargs + def->nb_oargs;
353 /* Register helpers. */
354 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
355 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
357 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
358 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
359 (gpointer)&all_helpers[i]);
365 void tcg_prologue_init(TCGContext *s)
367 size_t prologue_size, total_size;
370 /* Put the prologue at the beginning of code_gen_buffer. */
371 buf0 = s->code_gen_buffer;
374 s->code_gen_prologue = buf0;
376 /* Generate the prologue. */
377 tcg_target_qemu_prologue(s);
379 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
381 /* Deduct the prologue from the buffer. */
382 prologue_size = tcg_current_code_size(s);
383 s->code_gen_ptr = buf1;
384 s->code_gen_buffer = buf1;
386 total_size = s->code_gen_buffer_size - prologue_size;
387 s->code_gen_buffer_size = total_size;
389 /* Compute a high-water mark, at which we voluntarily flush the buffer
390 and start over. The size here is arbitrary, significantly larger
391 than we expect the code generation for any one opcode to require. */
392 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
394 tcg_register_jit(s->code_gen_buffer, total_size);
397 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
398 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
399 log_disas(buf0, prologue_size);
406 void tcg_func_start(TCGContext *s)
409 s->nb_temps = s->nb_globals;
411 /* No temps have been previously allocated for size or locality. */
412 memset(s->free_temps, 0, sizeof(s->free_temps));
415 s->current_frame_offset = s->frame_start;
417 #ifdef CONFIG_DEBUG_TCG
418 s->goto_tb_issue_mask = 0;
421 s->gen_first_op_idx = 0;
422 s->gen_last_op_idx = -1;
423 s->gen_next_op_idx = 0;
424 s->gen_next_parm_idx = 0;
426 s->be = tcg_malloc(sizeof(TCGBackendData));
429 static inline int temp_idx(TCGContext *s, TCGTemp *ts)
431 ptrdiff_t n = ts - s->temps;
432 tcg_debug_assert(n >= 0 && n < s->nb_temps);
436 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
438 int n = s->nb_temps++;
439 tcg_debug_assert(n < TCG_MAX_TEMPS);
440 return memset(&s->temps[n], 0, sizeof(TCGTemp));
443 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
445 tcg_debug_assert(s->nb_globals == s->nb_temps);
447 return tcg_temp_alloc(s);
450 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
451 TCGReg reg, const char *name)
455 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
459 ts = tcg_global_alloc(s);
460 ts->base_type = type;
465 tcg_regset_set_reg(s->reserved_regs, reg);
467 return temp_idx(s, ts);
470 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
473 s->frame_start = start;
474 s->frame_end = start + size;
475 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
476 s->frame_temp = &s->temps[idx];
479 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name)
481 TCGContext *s = &tcg_ctx;
484 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
487 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
488 return MAKE_TCGV_I32(idx);
491 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name)
493 TCGContext *s = &tcg_ctx;
496 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
499 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
500 return MAKE_TCGV_I64(idx);
503 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
504 intptr_t offset, const char *name)
506 TCGContext *s = &tcg_ctx;
507 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)];
508 TCGTemp *ts = tcg_global_alloc(s);
510 #ifdef HOST_WORDS_BIGENDIAN
514 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
515 TCGTemp *ts2 = tcg_global_alloc(s);
518 ts->base_type = TCG_TYPE_I64;
519 ts->type = TCG_TYPE_I32;
520 ts->mem_allocated = 1;
521 ts->mem_base = base_ts;
522 ts->mem_offset = offset + bigendian * 4;
523 pstrcpy(buf, sizeof(buf), name);
524 pstrcat(buf, sizeof(buf), "_0");
525 ts->name = strdup(buf);
527 tcg_debug_assert(ts2 == ts + 1);
528 ts2->base_type = TCG_TYPE_I64;
529 ts2->type = TCG_TYPE_I32;
530 ts2->mem_allocated = 1;
531 ts2->mem_base = base_ts;
532 ts2->mem_offset = offset + (1 - bigendian) * 4;
533 pstrcpy(buf, sizeof(buf), name);
534 pstrcat(buf, sizeof(buf), "_1");
535 ts->name = strdup(buf);
537 ts->base_type = type;
539 ts->mem_allocated = 1;
540 ts->mem_base = base_ts;
541 ts->mem_offset = offset;
544 return temp_idx(s, ts);
547 static int tcg_temp_new_internal(TCGType type, int temp_local)
549 TCGContext *s = &tcg_ctx;
553 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
554 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
555 if (idx < TCG_MAX_TEMPS) {
556 /* There is already an available temp with the right type. */
557 clear_bit(idx, s->free_temps[k].l);
560 ts->temp_allocated = 1;
561 tcg_debug_assert(ts->base_type == type);
562 tcg_debug_assert(ts->temp_local == temp_local);
564 ts = tcg_temp_alloc(s);
565 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
566 TCGTemp *ts2 = tcg_temp_alloc(s);
568 ts->base_type = type;
569 ts->type = TCG_TYPE_I32;
570 ts->temp_allocated = 1;
571 ts->temp_local = temp_local;
573 tcg_debug_assert(ts2 == ts + 1);
574 ts2->base_type = TCG_TYPE_I64;
575 ts2->type = TCG_TYPE_I32;
576 ts2->temp_allocated = 1;
577 ts2->temp_local = temp_local;
579 ts->base_type = type;
581 ts->temp_allocated = 1;
582 ts->temp_local = temp_local;
584 idx = temp_idx(s, ts);
587 #if defined(CONFIG_DEBUG_TCG)
593 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
597 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
598 return MAKE_TCGV_I32(idx);
601 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
605 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
606 return MAKE_TCGV_I64(idx);
609 static void tcg_temp_free_internal(int idx)
611 TCGContext *s = &tcg_ctx;
615 #if defined(CONFIG_DEBUG_TCG)
617 if (s->temps_in_use < 0) {
618 fprintf(stderr, "More temporaries freed than allocated!\n");
622 assert(idx >= s->nb_globals && idx < s->nb_temps);
624 assert(ts->temp_allocated != 0);
625 ts->temp_allocated = 0;
627 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
628 set_bit(idx, s->free_temps[k].l);
631 void tcg_temp_free_i32(TCGv_i32 arg)
633 tcg_temp_free_internal(GET_TCGV_I32(arg));
636 void tcg_temp_free_i64(TCGv_i64 arg)
638 tcg_temp_free_internal(GET_TCGV_I64(arg));
641 TCGv_i32 tcg_const_i32(int32_t val)
644 t0 = tcg_temp_new_i32();
645 tcg_gen_movi_i32(t0, val);
649 TCGv_i64 tcg_const_i64(int64_t val)
652 t0 = tcg_temp_new_i64();
653 tcg_gen_movi_i64(t0, val);
657 TCGv_i32 tcg_const_local_i32(int32_t val)
660 t0 = tcg_temp_local_new_i32();
661 tcg_gen_movi_i32(t0, val);
665 TCGv_i64 tcg_const_local_i64(int64_t val)
668 t0 = tcg_temp_local_new_i64();
669 tcg_gen_movi_i64(t0, val);
673 #if defined(CONFIG_DEBUG_TCG)
674 void tcg_clear_temp_count(void)
676 TCGContext *s = &tcg_ctx;
680 int tcg_check_temp_count(void)
682 TCGContext *s = &tcg_ctx;
683 if (s->temps_in_use) {
684 /* Clear the count so that we don't give another
685 * warning immediately next time around.
694 /* Note: we convert the 64 bit args to 32 bit and do some alignment
695 and endian swap. Maybe it would be better to do the alignment
696 and endian swap in tcg_reg_alloc_call(). */
697 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
698 int nargs, TCGArg *args)
700 int i, real_args, nb_rets, pi, pi_first;
701 unsigned sizemask, flags;
704 info = g_hash_table_lookup(s->helpers, (gpointer)func);
706 sizemask = info->sizemask;
708 #if defined(__sparc__) && !defined(__arch64__) \
709 && !defined(CONFIG_TCG_INTERPRETER)
710 /* We have 64-bit values in one register, but need to pass as two
711 separate parameters. Split them. */
712 int orig_sizemask = sizemask;
713 int orig_nargs = nargs;
716 TCGV_UNUSED_I64(retl);
717 TCGV_UNUSED_I64(reth);
719 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
720 for (i = real_args = 0; i < nargs; ++i) {
721 int is_64bit = sizemask & (1 << (i+1)*2);
723 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
724 TCGv_i32 h = tcg_temp_new_i32();
725 TCGv_i32 l = tcg_temp_new_i32();
726 tcg_gen_extr_i64_i32(l, h, orig);
727 split_args[real_args++] = GET_TCGV_I32(h);
728 split_args[real_args++] = GET_TCGV_I32(l);
730 split_args[real_args++] = args[i];
737 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
738 for (i = 0; i < nargs; ++i) {
739 int is_64bit = sizemask & (1 << (i+1)*2);
740 int is_signed = sizemask & (2 << (i+1)*2);
742 TCGv_i64 temp = tcg_temp_new_i64();
743 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
745 tcg_gen_ext32s_i64(temp, orig);
747 tcg_gen_ext32u_i64(temp, orig);
749 args[i] = GET_TCGV_I64(temp);
752 #endif /* TCG_TARGET_EXTEND_ARGS */
754 pi_first = pi = s->gen_next_parm_idx;
755 if (ret != TCG_CALL_DUMMY_ARG) {
756 #if defined(__sparc__) && !defined(__arch64__) \
757 && !defined(CONFIG_TCG_INTERPRETER)
758 if (orig_sizemask & 1) {
759 /* The 32-bit ABI is going to return the 64-bit value in
760 the %o0/%o1 register pair. Prepare for this by using
761 two return temporaries, and reassemble below. */
762 retl = tcg_temp_new_i64();
763 reth = tcg_temp_new_i64();
764 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
765 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
768 s->gen_opparam_buf[pi++] = ret;
772 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
773 #ifdef HOST_WORDS_BIGENDIAN
774 s->gen_opparam_buf[pi++] = ret + 1;
775 s->gen_opparam_buf[pi++] = ret;
777 s->gen_opparam_buf[pi++] = ret;
778 s->gen_opparam_buf[pi++] = ret + 1;
782 s->gen_opparam_buf[pi++] = ret;
790 for (i = 0; i < nargs; i++) {
791 int is_64bit = sizemask & (1 << (i+1)*2);
792 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
793 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
794 /* some targets want aligned 64 bit args */
796 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
800 /* If stack grows up, then we will be placing successive
801 arguments at lower addresses, which means we need to
802 reverse the order compared to how we would normally
803 treat either big or little-endian. For those arguments
804 that will wind up in registers, this still works for
805 HPPA (the only current STACK_GROWSUP target) since the
806 argument registers are *also* allocated in decreasing
807 order. If another such target is added, this logic may
808 have to get more complicated to differentiate between
809 stack arguments and register arguments. */
810 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
811 s->gen_opparam_buf[pi++] = args[i] + 1;
812 s->gen_opparam_buf[pi++] = args[i];
814 s->gen_opparam_buf[pi++] = args[i];
815 s->gen_opparam_buf[pi++] = args[i] + 1;
821 s->gen_opparam_buf[pi++] = args[i];
824 s->gen_opparam_buf[pi++] = (uintptr_t)func;
825 s->gen_opparam_buf[pi++] = flags;
827 i = s->gen_next_op_idx;
828 tcg_debug_assert(i < OPC_BUF_SIZE);
829 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
831 /* Set links for sequential allocation during translation. */
832 s->gen_op_buf[i] = (TCGOp){
833 .opc = INDEX_op_call,
841 /* Make sure the calli field didn't overflow. */
842 tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
844 s->gen_last_op_idx = i;
845 s->gen_next_op_idx = i + 1;
846 s->gen_next_parm_idx = pi;
848 #if defined(__sparc__) && !defined(__arch64__) \
849 && !defined(CONFIG_TCG_INTERPRETER)
850 /* Free all of the parts we allocated above. */
851 for (i = real_args = 0; i < orig_nargs; ++i) {
852 int is_64bit = orig_sizemask & (1 << (i+1)*2);
854 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
855 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
856 tcg_temp_free_i32(h);
857 tcg_temp_free_i32(l);
862 if (orig_sizemask & 1) {
863 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
864 Note that describing these as TCGv_i64 eliminates an unnecessary
865 zero-extension that tcg_gen_concat_i32_i64 would create. */
866 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
867 tcg_temp_free_i64(retl);
868 tcg_temp_free_i64(reth);
870 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
871 for (i = 0; i < nargs; ++i) {
872 int is_64bit = sizemask & (1 << (i+1)*2);
874 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
875 tcg_temp_free_i64(temp);
878 #endif /* TCG_TARGET_EXTEND_ARGS */
881 static void tcg_reg_alloc_start(TCGContext *s)
885 for(i = 0; i < s->nb_globals; i++) {
888 ts->val_type = TEMP_VAL_REG;
890 ts->val_type = TEMP_VAL_MEM;
893 for(i = s->nb_globals; i < s->nb_temps; i++) {
895 if (ts->temp_local) {
896 ts->val_type = TEMP_VAL_MEM;
898 ts->val_type = TEMP_VAL_DEAD;
900 ts->mem_allocated = 0;
904 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
907 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
910 int idx = temp_idx(s, ts);
912 if (idx < s->nb_globals) {
913 pstrcpy(buf, buf_size, ts->name);
914 } else if (ts->temp_local) {
915 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
917 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
922 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf,
923 int buf_size, int idx)
925 assert(idx >= 0 && idx < s->nb_temps);
926 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]);
929 /* Find helper name. */
930 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
932 const char *ret = NULL;
934 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
942 static const char * const cond_name[] =
944 [TCG_COND_NEVER] = "never",
945 [TCG_COND_ALWAYS] = "always",
946 [TCG_COND_EQ] = "eq",
947 [TCG_COND_NE] = "ne",
948 [TCG_COND_LT] = "lt",
949 [TCG_COND_GE] = "ge",
950 [TCG_COND_LE] = "le",
951 [TCG_COND_GT] = "gt",
952 [TCG_COND_LTU] = "ltu",
953 [TCG_COND_GEU] = "geu",
954 [TCG_COND_LEU] = "leu",
955 [TCG_COND_GTU] = "gtu"
958 static const char * const ldst_name[] =
974 void tcg_dump_ops(TCGContext *s)
980 for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
981 int i, k, nb_oargs, nb_iargs, nb_cargs;
986 op = &s->gen_op_buf[oi];
988 def = &tcg_op_defs[c];
989 args = &s->gen_opparam_buf[op->args];
991 if (c == INDEX_op_insn_start) {
992 qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
994 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
996 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
997 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
1001 qemu_log(" " TARGET_FMT_lx, a);
1003 } else if (c == INDEX_op_call) {
1004 /* variable number of arguments */
1005 nb_oargs = op->callo;
1006 nb_iargs = op->calli;
1007 nb_cargs = def->nb_cargs;
1009 /* function name, flags, out args */
1010 qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1011 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1012 args[nb_oargs + nb_iargs + 1], nb_oargs);
1013 for (i = 0; i < nb_oargs; i++) {
1014 qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1017 for (i = 0; i < nb_iargs; i++) {
1018 TCGArg arg = args[nb_oargs + i];
1019 const char *t = "<dummy>";
1020 if (arg != TCG_CALL_DUMMY_ARG) {
1021 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1026 qemu_log(" %s ", def->name);
1028 nb_oargs = def->nb_oargs;
1029 nb_iargs = def->nb_iargs;
1030 nb_cargs = def->nb_cargs;
1033 for (i = 0; i < nb_oargs; i++) {
1037 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1040 for (i = 0; i < nb_iargs; i++) {
1044 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1048 case INDEX_op_brcond_i32:
1049 case INDEX_op_setcond_i32:
1050 case INDEX_op_movcond_i32:
1051 case INDEX_op_brcond2_i32:
1052 case INDEX_op_setcond2_i32:
1053 case INDEX_op_brcond_i64:
1054 case INDEX_op_setcond_i64:
1055 case INDEX_op_movcond_i64:
1056 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1057 qemu_log(",%s", cond_name[args[k++]]);
1059 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1063 case INDEX_op_qemu_ld_i32:
1064 case INDEX_op_qemu_st_i32:
1065 case INDEX_op_qemu_ld_i64:
1066 case INDEX_op_qemu_st_i64:
1068 TCGMemOpIdx oi = args[k++];
1069 TCGMemOp op = get_memop(oi);
1070 unsigned ix = get_mmuidx(oi);
1072 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1073 qemu_log(",$0x%x,%u", op, ix);
1075 const char *s_al = "", *s_op;
1076 if (op & MO_AMASK) {
1077 if ((op & MO_AMASK) == MO_ALIGN) {
1083 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1084 qemu_log(",%s%s,%u", s_al, s_op, ix);
1094 case INDEX_op_set_label:
1096 case INDEX_op_brcond_i32:
1097 case INDEX_op_brcond_i64:
1098 case INDEX_op_brcond2_i32:
1099 qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
1105 for (; i < nb_cargs; i++, k++) {
1106 qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
1113 /* we give more priority to constraints with less registers */
1114 static int get_constraint_priority(const TCGOpDef *def, int k)
1116 const TCGArgConstraint *arg_ct;
1119 arg_ct = &def->args_ct[k];
1120 if (arg_ct->ct & TCG_CT_ALIAS) {
1121 /* an alias is equivalent to a single register */
1124 if (!(arg_ct->ct & TCG_CT_REG))
1127 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1128 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1132 return TCG_TARGET_NB_REGS - n + 1;
1135 /* sort from highest priority to lowest */
1136 static void sort_constraints(TCGOpDef *def, int start, int n)
1138 int i, j, p1, p2, tmp;
1140 for(i = 0; i < n; i++)
1141 def->sorted_args[start + i] = start + i;
1144 for(i = 0; i < n - 1; i++) {
1145 for(j = i + 1; j < n; j++) {
1146 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1147 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1149 tmp = def->sorted_args[start + i];
1150 def->sorted_args[start + i] = def->sorted_args[start + j];
1151 def->sorted_args[start + j] = tmp;
1157 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1165 if (tdefs->op == (TCGOpcode)-1)
1168 assert((unsigned)op < NB_OPS);
1169 def = &tcg_op_defs[op];
1170 #if defined(CONFIG_DEBUG_TCG)
1171 /* Duplicate entry in op definitions? */
1175 nb_args = def->nb_iargs + def->nb_oargs;
1176 for(i = 0; i < nb_args; i++) {
1177 ct_str = tdefs->args_ct_str[i];
1178 /* Incomplete TCGTargetOpDef entry? */
1179 assert(ct_str != NULL);
1180 tcg_regset_clear(def->args_ct[i].u.regs);
1181 def->args_ct[i].ct = 0;
1182 if (ct_str[0] >= '0' && ct_str[0] <= '9') {
1184 oarg = ct_str[0] - '0';
1185 assert(oarg < def->nb_oargs);
1186 assert(def->args_ct[oarg].ct & TCG_CT_REG);
1187 /* TCG_CT_ALIAS is for the output arguments. The input
1188 argument is tagged with TCG_CT_IALIAS. */
1189 def->args_ct[i] = def->args_ct[oarg];
1190 def->args_ct[oarg].ct = TCG_CT_ALIAS;
1191 def->args_ct[oarg].alias_index = i;
1192 def->args_ct[i].ct |= TCG_CT_IALIAS;
1193 def->args_ct[i].alias_index = oarg;
1196 if (*ct_str == '\0')
1200 def->args_ct[i].ct |= TCG_CT_CONST;
1204 if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
1205 fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1206 ct_str, i, def->name);
1214 /* TCGTargetOpDef entry with too much information? */
1215 assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1217 /* sort the constraints (XXX: this is just an heuristic) */
1218 sort_constraints(def, 0, def->nb_oargs);
1219 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1225 printf("%s: sorted=", def->name);
1226 for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
1227 printf(" %d", def->sorted_args[i]);
1234 #if defined(CONFIG_DEBUG_TCG)
1236 for (op = 0; op < tcg_op_defs_max; op++) {
1237 const TCGOpDef *def = &tcg_op_defs[op];
1238 if (def->flags & TCG_OPF_NOT_PRESENT) {
1239 /* Wrong entry in op definitions? */
1241 fprintf(stderr, "Invalid op definition for %s\n", def->name);
1245 /* Missing entry in op definitions? */
1247 fprintf(stderr, "Missing op definition for %s\n", def->name);
1258 void tcg_op_remove(TCGContext *s, TCGOp *op)
1260 int next = op->next;
1261 int prev = op->prev;
1264 s->gen_op_buf[next].prev = prev;
1266 s->gen_last_op_idx = prev;
1269 s->gen_op_buf[prev].next = next;
1271 s->gen_first_op_idx = next;
1274 memset(op, -1, sizeof(*op));
1276 #ifdef CONFIG_PROFILER
1281 #ifdef USE_LIVENESS_ANALYSIS
1282 /* liveness analysis: end of function: all temps are dead, and globals
1283 should be in memory. */
1284 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
1287 memset(dead_temps, 1, s->nb_temps);
1288 memset(mem_temps, 1, s->nb_globals);
1289 memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
1292 /* liveness analysis: end of basic block: all temps are dead, globals
1293 and local temps should be in memory. */
1294 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
1299 memset(dead_temps, 1, s->nb_temps);
1300 memset(mem_temps, 1, s->nb_globals);
1301 for(i = s->nb_globals; i < s->nb_temps; i++) {
1302 mem_temps[i] = s->temps[i].temp_local;
1306 /* Liveness analysis : update the opc_dead_args array to tell if a
1307 given input arguments is dead. Instructions updating dead
1308 temporaries are removed. */
1309 static void tcg_liveness_analysis(TCGContext *s)
1311 uint8_t *dead_temps, *mem_temps;
1312 int oi, oi_prev, nb_ops;
1314 nb_ops = s->gen_next_op_idx;
1315 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1316 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1318 dead_temps = tcg_malloc(s->nb_temps);
1319 mem_temps = tcg_malloc(s->nb_temps);
1320 tcg_la_func_end(s, dead_temps, mem_temps);
1322 for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
1323 int i, nb_iargs, nb_oargs;
1324 TCGOpcode opc_new, opc_new2;
1330 TCGOp * const op = &s->gen_op_buf[oi];
1331 TCGArg * const args = &s->gen_opparam_buf[op->args];
1332 TCGOpcode opc = op->opc;
1333 const TCGOpDef *def = &tcg_op_defs[opc];
1342 nb_oargs = op->callo;
1343 nb_iargs = op->calli;
1344 call_flags = args[nb_oargs + nb_iargs + 1];
1346 /* pure functions can be removed if their result is unused */
1347 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1348 for (i = 0; i < nb_oargs; i++) {
1350 if (!dead_temps[arg] || mem_temps[arg]) {
1351 goto do_not_remove_call;
1358 /* output args are dead */
1361 for (i = 0; i < nb_oargs; i++) {
1363 if (dead_temps[arg]) {
1364 dead_args |= (1 << i);
1366 if (mem_temps[arg]) {
1367 sync_args |= (1 << i);
1369 dead_temps[arg] = 1;
1373 if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1374 /* globals should be synced to memory */
1375 memset(mem_temps, 1, s->nb_globals);
1377 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1378 TCG_CALL_NO_READ_GLOBALS))) {
1379 /* globals should go back to memory */
1380 memset(dead_temps, 1, s->nb_globals);
1383 /* record arguments that die in this helper */
1384 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1386 if (arg != TCG_CALL_DUMMY_ARG) {
1387 if (dead_temps[arg]) {
1388 dead_args |= (1 << i);
1392 /* input arguments are live for preceding opcodes */
1393 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1395 dead_temps[arg] = 0;
1397 s->op_dead_args[oi] = dead_args;
1398 s->op_sync_args[oi] = sync_args;
1402 case INDEX_op_insn_start:
1404 case INDEX_op_discard:
1405 /* mark the temporary as dead */
1406 dead_temps[args[0]] = 1;
1407 mem_temps[args[0]] = 0;
1410 case INDEX_op_add2_i32:
1411 opc_new = INDEX_op_add_i32;
1413 case INDEX_op_sub2_i32:
1414 opc_new = INDEX_op_sub_i32;
1416 case INDEX_op_add2_i64:
1417 opc_new = INDEX_op_add_i64;
1419 case INDEX_op_sub2_i64:
1420 opc_new = INDEX_op_sub_i64;
1424 /* Test if the high part of the operation is dead, but not
1425 the low part. The result can be optimized to a simple
1426 add or sub. This happens often for x86_64 guest when the
1427 cpu mode is set to 32 bit. */
1428 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1429 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1432 /* Replace the opcode and adjust the args in place,
1433 leaving 3 unused args at the end. */
1434 op->opc = opc = opc_new;
1437 /* Fall through and mark the single-word operation live. */
1443 case INDEX_op_mulu2_i32:
1444 opc_new = INDEX_op_mul_i32;
1445 opc_new2 = INDEX_op_muluh_i32;
1446 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
1448 case INDEX_op_muls2_i32:
1449 opc_new = INDEX_op_mul_i32;
1450 opc_new2 = INDEX_op_mulsh_i32;
1451 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
1453 case INDEX_op_mulu2_i64:
1454 opc_new = INDEX_op_mul_i64;
1455 opc_new2 = INDEX_op_muluh_i64;
1456 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
1458 case INDEX_op_muls2_i64:
1459 opc_new = INDEX_op_mul_i64;
1460 opc_new2 = INDEX_op_mulsh_i64;
1461 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
1466 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1467 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1468 /* Both parts of the operation are dead. */
1471 /* The high part of the operation is dead; generate the low. */
1472 op->opc = opc = opc_new;
1475 } else if (have_opc_new2 && dead_temps[args[0]]
1476 && !mem_temps[args[0]]) {
1477 /* The low part of the operation is dead; generate the high. */
1478 op->opc = opc = opc_new2;
1485 /* Mark the single-word operation live. */
1490 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1491 nb_iargs = def->nb_iargs;
1492 nb_oargs = def->nb_oargs;
1494 /* Test if the operation can be removed because all
1495 its outputs are dead. We assume that nb_oargs == 0
1496 implies side effects */
1497 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1498 for (i = 0; i < nb_oargs; i++) {
1500 if (!dead_temps[arg] || mem_temps[arg]) {
1505 tcg_op_remove(s, op);
1508 /* output args are dead */
1511 for (i = 0; i < nb_oargs; i++) {
1513 if (dead_temps[arg]) {
1514 dead_args |= (1 << i);
1516 if (mem_temps[arg]) {
1517 sync_args |= (1 << i);
1519 dead_temps[arg] = 1;
1523 /* if end of basic block, update */
1524 if (def->flags & TCG_OPF_BB_END) {
1525 tcg_la_bb_end(s, dead_temps, mem_temps);
1526 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1527 /* globals should be synced to memory */
1528 memset(mem_temps, 1, s->nb_globals);
1531 /* record arguments that die in this opcode */
1532 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1534 if (dead_temps[arg]) {
1535 dead_args |= (1 << i);
1538 /* input arguments are live for preceding opcodes */
1539 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1541 dead_temps[arg] = 0;
1543 s->op_dead_args[oi] = dead_args;
1544 s->op_sync_args[oi] = sync_args;
1551 /* dummy liveness analysis */
1552 static void tcg_liveness_analysis(TCGContext *s)
1554 int nb_ops = s->gen_next_op_idx;
1556 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1557 memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
1558 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1559 memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
1564 static void dump_regs(TCGContext *s)
1570 for(i = 0; i < s->nb_temps; i++) {
1572 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1573 switch(ts->val_type) {
1575 printf("%s", tcg_target_reg_names[ts->reg]);
1578 printf("%d(%s)", (int)ts->mem_offset,
1579 tcg_target_reg_names[ts->mem_base->reg]);
1581 case TEMP_VAL_CONST:
1582 printf("$0x%" TCG_PRIlx, ts->val);
1594 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1595 if (s->reg_to_temp[i] != NULL) {
1597 tcg_target_reg_names[i],
1598 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
1603 static void check_regs(TCGContext *s)
1610 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1611 ts = s->reg_to_temp[reg];
1613 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
1614 printf("Inconsistency for register %s:\n",
1615 tcg_target_reg_names[reg]);
1620 for (k = 0; k < s->nb_temps; k++) {
1622 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
1623 && s->reg_to_temp[ts->reg] != ts) {
1624 printf("Inconsistency for temp %s:\n",
1625 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
1627 printf("reg state:\n");
1635 static void temp_allocate_frame(TCGContext *s, int temp)
1638 ts = &s->temps[temp];
1639 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1640 /* Sparc64 stack is accessed with offset of 2047 */
1641 s->current_frame_offset = (s->current_frame_offset +
1642 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1643 ~(sizeof(tcg_target_long) - 1);
1645 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1649 ts->mem_offset = s->current_frame_offset;
1650 ts->mem_base = s->frame_temp;
1651 ts->mem_allocated = 1;
1652 s->current_frame_offset += sizeof(tcg_target_long);
1655 /* sync register 'reg' by saving it to the corresponding temporary */
1656 static inline void tcg_reg_sync(TCGContext *s, TCGReg reg)
1658 TCGTemp *ts = s->reg_to_temp[reg];
1660 assert(ts->val_type == TEMP_VAL_REG);
1661 if (!ts->mem_coherent && !ts->fixed_reg) {
1662 if (!ts->mem_allocated) {
1663 temp_allocate_frame(s, temp_idx(s, ts));
1665 tcg_out_st(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
1667 ts->mem_coherent = 1;
1670 /* free register 'reg' by spilling the corresponding temporary if necessary */
1671 static void tcg_reg_free(TCGContext *s, TCGReg reg)
1673 TCGTemp *ts = s->reg_to_temp[reg];
1676 tcg_reg_sync(s, reg);
1677 ts->val_type = TEMP_VAL_MEM;
1678 s->reg_to_temp[reg] = NULL;
1682 /* Allocate a register belonging to reg1 & ~reg2 */
1683 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
1689 tcg_regset_andnot(reg_ct, reg1, reg2);
1691 /* first try free registers */
1692 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1693 reg = tcg_target_reg_alloc_order[i];
1694 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
1698 /* XXX: do better spill choice */
1699 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1700 reg = tcg_target_reg_alloc_order[i];
1701 if (tcg_regset_test_reg(reg_ct, reg)) {
1702 tcg_reg_free(s, reg);
1710 /* mark a temporary as dead. */
1711 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
1713 if (ts->fixed_reg) {
1716 if (ts->val_type == TEMP_VAL_REG) {
1717 s->reg_to_temp[ts->reg] = NULL;
1719 ts->val_type = (temp_idx(s, ts) < s->nb_globals || ts->temp_local
1720 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1723 /* sync a temporary to memory. 'allocated_regs' is used in case a
1724 temporary registers needs to be allocated to store a constant. */
1725 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
1727 if (ts->fixed_reg) {
1730 switch (ts->val_type) {
1731 case TEMP_VAL_CONST:
1732 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
1734 ts->val_type = TEMP_VAL_REG;
1735 s->reg_to_temp[ts->reg] = ts;
1736 ts->mem_coherent = 0;
1737 tcg_out_movi(s, ts->type, ts->reg, ts->val);
1740 tcg_reg_sync(s, ts->reg);
1750 /* save a temporary to memory. 'allocated_regs' is used in case a
1751 temporary registers needs to be allocated to store a constant. */
1752 static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
1754 TCGTemp *ts = &s->temps[temp];
1756 #ifdef USE_LIVENESS_ANALYSIS
1757 /* The liveness analysis already ensures that globals are back
1758 in memory. Keep an assert for safety. */
1759 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
1761 temp_sync(s, ts, allocated_regs);
1766 /* save globals to their canonical location and assume they can be
1767 modified be the following code. 'allocated_regs' is used in case a
1768 temporary registers needs to be allocated to store a constant. */
1769 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
1773 for(i = 0; i < s->nb_globals; i++) {
1774 temp_save(s, i, allocated_regs);
1778 /* sync globals to their canonical location and assume they can be
1779 read by the following code. 'allocated_regs' is used in case a
1780 temporary registers needs to be allocated to store a constant. */
1781 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
1785 for (i = 0; i < s->nb_globals; i++) {
1786 TCGTemp *ts = &s->temps[i];
1787 #ifdef USE_LIVENESS_ANALYSIS
1788 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
1790 || ts->mem_coherent);
1792 temp_sync(s, ts, allocated_regs);
1797 /* at the end of a basic block, we assume all temporaries are dead and
1798 all globals are stored at their canonical location. */
1799 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
1804 for(i = s->nb_globals; i < s->nb_temps; i++) {
1806 if (ts->temp_local) {
1807 temp_save(s, i, allocated_regs);
1809 #ifdef USE_LIVENESS_ANALYSIS
1810 /* The liveness analysis already ensures that temps are dead.
1811 Keep an assert for safety. */
1812 assert(ts->val_type == TEMP_VAL_DEAD);
1819 save_globals(s, allocated_regs);
1822 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
1823 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
1825 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
1826 uint16_t dead_args, uint8_t sync_args)
1829 tcg_target_ulong val;
1831 ots = &s->temps[args[0]];
1834 if (ots->fixed_reg) {
1835 /* for fixed registers, we do not do any constant
1837 tcg_out_movi(s, ots->type, ots->reg, val);
1839 /* The movi is not explicitly generated here */
1840 if (ots->val_type == TEMP_VAL_REG) {
1841 s->reg_to_temp[ots->reg] = NULL;
1843 ots->val_type = TEMP_VAL_CONST;
1846 if (NEED_SYNC_ARG(0)) {
1847 temp_sync(s, ots, s->reserved_regs);
1849 if (IS_DEAD_ARG(0)) {
1854 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
1855 const TCGArg *args, uint16_t dead_args,
1858 TCGRegSet allocated_regs;
1860 TCGType otype, itype;
1862 tcg_regset_set(allocated_regs, s->reserved_regs);
1863 ots = &s->temps[args[0]];
1864 ts = &s->temps[args[1]];
1866 /* Note that otype != itype for no-op truncation. */
1870 /* If the source value is not in a register, and we're going to be
1871 forced to have it in a register in order to perform the copy,
1872 then copy the SOURCE value into its own register first. That way
1873 we don't have to reload SOURCE the next time it is used. */
1874 if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
1875 || ts->val_type == TEMP_VAL_MEM) {
1876 ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[itype],
1878 if (ts->val_type == TEMP_VAL_MEM) {
1879 tcg_out_ld(s, itype, ts->reg, ts->mem_base->reg, ts->mem_offset);
1880 ts->mem_coherent = 1;
1881 } else if (ts->val_type == TEMP_VAL_CONST) {
1882 tcg_out_movi(s, itype, ts->reg, ts->val);
1883 ts->mem_coherent = 0;
1885 s->reg_to_temp[ts->reg] = ts;
1886 ts->val_type = TEMP_VAL_REG;
1889 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
1890 /* mov to a non-saved dead register makes no sense (even with
1891 liveness analysis disabled). */
1892 assert(NEED_SYNC_ARG(0));
1893 /* The code above should have moved the temp to a register. */
1894 assert(ts->val_type == TEMP_VAL_REG);
1895 if (!ots->mem_allocated) {
1896 temp_allocate_frame(s, args[0]);
1898 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
1899 if (IS_DEAD_ARG(1)) {
1903 } else if (ts->val_type == TEMP_VAL_CONST) {
1904 /* propagate constant */
1905 if (ots->val_type == TEMP_VAL_REG) {
1906 s->reg_to_temp[ots->reg] = NULL;
1908 ots->val_type = TEMP_VAL_CONST;
1910 if (IS_DEAD_ARG(1)) {
1914 /* The code in the first if block should have moved the
1915 temp to a register. */
1916 assert(ts->val_type == TEMP_VAL_REG);
1917 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
1918 /* the mov can be suppressed */
1919 if (ots->val_type == TEMP_VAL_REG) {
1920 s->reg_to_temp[ots->reg] = NULL;
1925 if (ots->val_type != TEMP_VAL_REG) {
1926 /* When allocating a new register, make sure to not spill the
1928 tcg_regset_set_reg(allocated_regs, ts->reg);
1929 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
1932 tcg_out_mov(s, otype, ots->reg, ts->reg);
1934 ots->val_type = TEMP_VAL_REG;
1935 ots->mem_coherent = 0;
1936 s->reg_to_temp[ots->reg] = ots;
1937 if (NEED_SYNC_ARG(0)) {
1938 tcg_reg_sync(s, ots->reg);
1943 static void tcg_reg_alloc_op(TCGContext *s,
1944 const TCGOpDef *def, TCGOpcode opc,
1945 const TCGArg *args, uint16_t dead_args,
1948 TCGRegSet allocated_regs;
1949 int i, k, nb_iargs, nb_oargs;
1952 const TCGArgConstraint *arg_ct;
1954 TCGArg new_args[TCG_MAX_OP_ARGS];
1955 int const_args[TCG_MAX_OP_ARGS];
1957 nb_oargs = def->nb_oargs;
1958 nb_iargs = def->nb_iargs;
1960 /* copy constants */
1961 memcpy(new_args + nb_oargs + nb_iargs,
1962 args + nb_oargs + nb_iargs,
1963 sizeof(TCGArg) * def->nb_cargs);
1965 /* satisfy input constraints */
1966 tcg_regset_set(allocated_regs, s->reserved_regs);
1967 for(k = 0; k < nb_iargs; k++) {
1968 i = def->sorted_args[nb_oargs + k];
1970 arg_ct = &def->args_ct[i];
1971 ts = &s->temps[arg];
1972 if (ts->val_type == TEMP_VAL_MEM) {
1973 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
1974 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
1975 ts->val_type = TEMP_VAL_REG;
1977 ts->mem_coherent = 1;
1978 s->reg_to_temp[reg] = ts;
1979 } else if (ts->val_type == TEMP_VAL_CONST) {
1980 if (tcg_target_const_match(ts->val, ts->type, arg_ct)) {
1981 /* constant is OK for instruction */
1983 new_args[i] = ts->val;
1986 /* need to move to a register */
1987 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
1988 tcg_out_movi(s, ts->type, reg, ts->val);
1989 ts->val_type = TEMP_VAL_REG;
1991 ts->mem_coherent = 0;
1992 s->reg_to_temp[reg] = ts;
1995 assert(ts->val_type == TEMP_VAL_REG);
1996 if (arg_ct->ct & TCG_CT_IALIAS) {
1997 if (ts->fixed_reg) {
1998 /* if fixed register, we must allocate a new register
1999 if the alias is not the same register */
2000 if (arg != args[arg_ct->alias_index])
2001 goto allocate_in_reg;
2003 /* if the input is aliased to an output and if it is
2004 not dead after the instruction, we must allocate
2005 a new register and move it */
2006 if (!IS_DEAD_ARG(i)) {
2007 goto allocate_in_reg;
2009 /* check if the current register has already been allocated
2010 for another input aliased to an output */
2012 for (k2 = 0 ; k2 < k ; k2++) {
2013 i2 = def->sorted_args[nb_oargs + k2];
2014 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2015 (new_args[i2] == ts->reg)) {
2016 goto allocate_in_reg;
2022 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2023 /* nothing to do : the constraint is satisfied */
2026 /* allocate a new register matching the constraint
2027 and move the temporary register into it */
2028 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2029 tcg_out_mov(s, ts->type, reg, ts->reg);
2033 tcg_regset_set_reg(allocated_regs, reg);
2037 /* mark dead temporaries and free the associated registers */
2038 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2039 if (IS_DEAD_ARG(i)) {
2040 temp_dead(s, &s->temps[args[i]]);
2044 if (def->flags & TCG_OPF_BB_END) {
2045 tcg_reg_alloc_bb_end(s, allocated_regs);
2047 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2048 /* XXX: permit generic clobber register list ? */
2049 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2050 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2055 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2056 /* sync globals if the op has side effects and might trigger
2058 sync_globals(s, allocated_regs);
2061 /* satisfy the output constraints */
2062 tcg_regset_set(allocated_regs, s->reserved_regs);
2063 for(k = 0; k < nb_oargs; k++) {
2064 i = def->sorted_args[k];
2066 arg_ct = &def->args_ct[i];
2067 ts = &s->temps[arg];
2068 if (arg_ct->ct & TCG_CT_ALIAS) {
2069 reg = new_args[arg_ct->alias_index];
2071 /* if fixed register, we try to use it */
2073 if (ts->fixed_reg &&
2074 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2077 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2079 tcg_regset_set_reg(allocated_regs, reg);
2080 /* if a fixed register is used, then a move will be done afterwards */
2081 if (!ts->fixed_reg) {
2082 if (ts->val_type == TEMP_VAL_REG) {
2083 s->reg_to_temp[ts->reg] = NULL;
2085 ts->val_type = TEMP_VAL_REG;
2087 /* temp value is modified, so the value kept in memory is
2088 potentially not the same */
2089 ts->mem_coherent = 0;
2090 s->reg_to_temp[reg] = ts;
2097 /* emit instruction */
2098 tcg_out_op(s, opc, new_args, const_args);
2100 /* move the outputs in the correct register if needed */
2101 for(i = 0; i < nb_oargs; i++) {
2102 ts = &s->temps[args[i]];
2104 if (ts->fixed_reg && ts->reg != reg) {
2105 tcg_out_mov(s, ts->type, ts->reg, reg);
2107 if (NEED_SYNC_ARG(i)) {
2108 tcg_reg_sync(s, reg);
2110 if (IS_DEAD_ARG(i)) {
2116 #ifdef TCG_TARGET_STACK_GROWSUP
2117 #define STACK_DIR(x) (-(x))
2119 #define STACK_DIR(x) (x)
2122 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
2123 const TCGArg * const args, uint16_t dead_args,
2126 int flags, nb_regs, i;
2130 intptr_t stack_offset;
2131 size_t call_stack_size;
2132 tcg_insn_unit *func_addr;
2134 TCGRegSet allocated_regs;
2136 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2137 flags = args[nb_oargs + nb_iargs + 1];
2139 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2140 if (nb_regs > nb_iargs) {
2144 /* assign stack slots first */
2145 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2146 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2147 ~(TCG_TARGET_STACK_ALIGN - 1);
2148 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2149 if (allocate_args) {
2150 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2151 preallocate call stack */
2155 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2156 for(i = nb_regs; i < nb_iargs; i++) {
2157 arg = args[nb_oargs + i];
2158 #ifdef TCG_TARGET_STACK_GROWSUP
2159 stack_offset -= sizeof(tcg_target_long);
2161 if (arg != TCG_CALL_DUMMY_ARG) {
2162 ts = &s->temps[arg];
2163 if (ts->val_type == TEMP_VAL_REG) {
2164 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2165 } else if (ts->val_type == TEMP_VAL_MEM) {
2166 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2168 /* XXX: not correct if reading values from the stack */
2169 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2170 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2171 } else if (ts->val_type == TEMP_VAL_CONST) {
2172 reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
2174 /* XXX: sign extend may be needed on some targets */
2175 tcg_out_movi(s, ts->type, reg, ts->val);
2176 tcg_out_st(s, ts->type, reg, TCG_REG_CALL_STACK, stack_offset);
2181 #ifndef TCG_TARGET_STACK_GROWSUP
2182 stack_offset += sizeof(tcg_target_long);
2186 /* assign input registers */
2187 tcg_regset_set(allocated_regs, s->reserved_regs);
2188 for(i = 0; i < nb_regs; i++) {
2189 arg = args[nb_oargs + i];
2190 if (arg != TCG_CALL_DUMMY_ARG) {
2191 ts = &s->temps[arg];
2192 reg = tcg_target_call_iarg_regs[i];
2193 tcg_reg_free(s, reg);
2194 if (ts->val_type == TEMP_VAL_REG) {
2195 if (ts->reg != reg) {
2196 tcg_out_mov(s, ts->type, reg, ts->reg);
2198 } else if (ts->val_type == TEMP_VAL_MEM) {
2199 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
2200 } else if (ts->val_type == TEMP_VAL_CONST) {
2201 /* XXX: sign extend ? */
2202 tcg_out_movi(s, ts->type, reg, ts->val);
2206 tcg_regset_set_reg(allocated_regs, reg);
2210 /* mark dead temporaries and free the associated registers */
2211 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2212 if (IS_DEAD_ARG(i)) {
2213 temp_dead(s, &s->temps[args[i]]);
2217 /* clobber call registers */
2218 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2219 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2224 /* Save globals if they might be written by the helper, sync them if
2225 they might be read. */
2226 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2228 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2229 sync_globals(s, allocated_regs);
2231 save_globals(s, allocated_regs);
2234 tcg_out_call(s, func_addr);
2236 /* assign output registers and emit moves if needed */
2237 for(i = 0; i < nb_oargs; i++) {
2239 ts = &s->temps[arg];
2240 reg = tcg_target_call_oarg_regs[i];
2241 assert(s->reg_to_temp[reg] == NULL);
2243 if (ts->fixed_reg) {
2244 if (ts->reg != reg) {
2245 tcg_out_mov(s, ts->type, ts->reg, reg);
2248 if (ts->val_type == TEMP_VAL_REG) {
2249 s->reg_to_temp[ts->reg] = NULL;
2251 ts->val_type = TEMP_VAL_REG;
2253 ts->mem_coherent = 0;
2254 s->reg_to_temp[reg] = ts;
2255 if (NEED_SYNC_ARG(i)) {
2256 tcg_reg_sync(s, reg);
2258 if (IS_DEAD_ARG(i)) {
2265 #ifdef CONFIG_PROFILER
2267 static int64_t tcg_table_op_count[NB_OPS];
2269 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2273 for (i = 0; i < NB_OPS; i++) {
2274 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
2275 tcg_table_op_count[i]);
2279 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2281 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2286 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
2288 int i, oi, oi_next, num_insns;
2290 #ifdef CONFIG_PROFILER
2294 n = s->gen_last_op_idx + 1;
2296 if (n > s->op_count_max) {
2297 s->op_count_max = n;
2302 if (n > s->temp_count_max) {
2303 s->temp_count_max = n;
2309 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
2316 #ifdef CONFIG_PROFILER
2317 s->opt_time -= profile_getclock();
2320 #ifdef USE_TCG_OPTIMIZATIONS
2324 #ifdef CONFIG_PROFILER
2325 s->opt_time += profile_getclock();
2326 s->la_time -= profile_getclock();
2329 tcg_liveness_analysis(s);
2331 #ifdef CONFIG_PROFILER
2332 s->la_time += profile_getclock();
2336 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
2337 qemu_log("OP after optimization and liveness analysis:\n");
2343 tcg_reg_alloc_start(s);
2345 s->code_buf = gen_code_buf;
2346 s->code_ptr = gen_code_buf;
2351 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
2352 TCGOp * const op = &s->gen_op_buf[oi];
2353 TCGArg * const args = &s->gen_opparam_buf[op->args];
2354 TCGOpcode opc = op->opc;
2355 const TCGOpDef *def = &tcg_op_defs[opc];
2356 uint16_t dead_args = s->op_dead_args[oi];
2357 uint8_t sync_args = s->op_sync_args[oi];
2360 #ifdef CONFIG_PROFILER
2361 tcg_table_op_count[opc]++;
2365 case INDEX_op_mov_i32:
2366 case INDEX_op_mov_i64:
2367 tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
2369 case INDEX_op_movi_i32:
2370 case INDEX_op_movi_i64:
2371 tcg_reg_alloc_movi(s, args, dead_args, sync_args);
2373 case INDEX_op_insn_start:
2374 if (num_insns >= 0) {
2375 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2378 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2380 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2381 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
2385 s->gen_insn_data[num_insns][i] = a;
2388 case INDEX_op_discard:
2389 temp_dead(s, &s->temps[args[0]]);
2391 case INDEX_op_set_label:
2392 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2393 tcg_out_label(s, arg_label(args[0]), s->code_ptr);
2396 tcg_reg_alloc_call(s, op->callo, op->calli, args,
2397 dead_args, sync_args);
2400 /* Sanity check that we've not introduced any unhandled opcodes. */
2401 if (def->flags & TCG_OPF_NOT_PRESENT) {
2404 /* Note: in order to speed up the code, it would be much
2405 faster to have specialized register allocator functions for
2406 some common argument patterns */
2407 tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
2413 /* Test for (pending) buffer overflow. The assumption is that any
2414 one operation beginning below the high water mark cannot overrun
2415 the buffer completely. Thus we can test for overflow after
2416 generating code without having to check during generation. */
2417 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
2421 tcg_debug_assert(num_insns >= 0);
2422 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2424 /* Generate TB finalization at the end of block */
2425 if (!tcg_out_tb_finalize(s)) {
2429 /* flush instruction cache */
2430 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2432 return tcg_current_code_size(s);
2435 #ifdef CONFIG_PROFILER
2436 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2438 TCGContext *s = &tcg_ctx;
2439 int64_t tb_count = s->tb_count;
2440 int64_t tb_div_count = tb_count ? tb_count : 1;
2441 int64_t tot = s->interm_time + s->code_time;
2443 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2445 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2446 tb_count, s->tb_count1 - tb_count,
2447 (double)(s->tb_count1 - s->tb_count)
2448 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
2449 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2450 (double)s->op_count / tb_div_count, s->op_count_max);
2451 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2452 (double)s->del_op_count / tb_div_count);
2453 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2454 (double)s->temp_count / tb_div_count, s->temp_count_max);
2455 cpu_fprintf(f, "avg host code/TB %0.1f\n",
2456 (double)s->code_out_len / tb_div_count);
2457 cpu_fprintf(f, "avg search data/TB %0.1f\n",
2458 (double)s->search_out_len / tb_div_count);
2460 cpu_fprintf(f, "cycles/op %0.1f\n",
2461 s->op_count ? (double)tot / s->op_count : 0);
2462 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2463 s->code_in_len ? (double)tot / s->code_in_len : 0);
2464 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2465 s->code_out_len ? (double)tot / s->code_out_len : 0);
2466 cpu_fprintf(f, "cycles/search byte %0.1f\n",
2467 s->search_out_len ? (double)tot / s->search_out_len : 0);
2471 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2472 (double)s->interm_time / tot * 100.0);
2473 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2474 (double)s->code_time / tot * 100.0);
2475 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2476 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2478 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2479 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2480 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2482 cpu_fprintf(f, " avg cycles %0.1f\n",
2483 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2486 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2488 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2492 #ifdef ELF_HOST_MACHINE
2493 /* In order to use this feature, the backend needs to do three things:
2495 (1) Define ELF_HOST_MACHINE to indicate both what value to
2496 put into the ELF image and to indicate support for the feature.
2498 (2) Define tcg_register_jit. This should create a buffer containing
2499 the contents of a .debug_frame section that describes the post-
2500 prologue unwind info for the tcg machine.
2502 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2505 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2512 struct jit_code_entry {
2513 struct jit_code_entry *next_entry;
2514 struct jit_code_entry *prev_entry;
2515 const void *symfile_addr;
2516 uint64_t symfile_size;
2519 struct jit_descriptor {
2521 uint32_t action_flag;
2522 struct jit_code_entry *relevant_entry;
2523 struct jit_code_entry *first_entry;
2526 void __jit_debug_register_code(void) __attribute__((noinline));
2527 void __jit_debug_register_code(void)
2532 /* Must statically initialize the version, because GDB may check
2533 the version before we can set it. */
2534 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2536 /* End GDB interface. */
2538 static int find_string(const char *strtab, const char *str)
2540 const char *p = strtab + 1;
2543 if (strcmp(p, str) == 0) {
2550 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2551 const void *debug_frame,
2552 size_t debug_frame_size)
2554 struct __attribute__((packed)) DebugInfo {
2561 uintptr_t cu_low_pc;
2562 uintptr_t cu_high_pc;
2565 uintptr_t fn_low_pc;
2566 uintptr_t fn_high_pc;
2575 struct DebugInfo di;
2580 struct ElfImage *img;
2582 static const struct ElfImage img_template = {
2584 .e_ident[EI_MAG0] = ELFMAG0,
2585 .e_ident[EI_MAG1] = ELFMAG1,
2586 .e_ident[EI_MAG2] = ELFMAG2,
2587 .e_ident[EI_MAG3] = ELFMAG3,
2588 .e_ident[EI_CLASS] = ELF_CLASS,
2589 .e_ident[EI_DATA] = ELF_DATA,
2590 .e_ident[EI_VERSION] = EV_CURRENT,
2592 .e_machine = ELF_HOST_MACHINE,
2593 .e_version = EV_CURRENT,
2594 .e_phoff = offsetof(struct ElfImage, phdr),
2595 .e_shoff = offsetof(struct ElfImage, shdr),
2596 .e_ehsize = sizeof(ElfW(Shdr)),
2597 .e_phentsize = sizeof(ElfW(Phdr)),
2599 .e_shentsize = sizeof(ElfW(Shdr)),
2600 .e_shnum = ARRAY_SIZE(img->shdr),
2601 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2602 #ifdef ELF_HOST_FLAGS
2603 .e_flags = ELF_HOST_FLAGS,
2606 .e_ident[EI_OSABI] = ELF_OSABI,
2614 [0] = { .sh_type = SHT_NULL },
2615 /* Trick: The contents of code_gen_buffer are not present in
2616 this fake ELF file; that got allocated elsewhere. Therefore
2617 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2618 will not look for contents. We can record any address. */
2620 .sh_type = SHT_NOBITS,
2621 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2623 [2] = { /* .debug_info */
2624 .sh_type = SHT_PROGBITS,
2625 .sh_offset = offsetof(struct ElfImage, di),
2626 .sh_size = sizeof(struct DebugInfo),
2628 [3] = { /* .debug_abbrev */
2629 .sh_type = SHT_PROGBITS,
2630 .sh_offset = offsetof(struct ElfImage, da),
2631 .sh_size = sizeof(img->da),
2633 [4] = { /* .debug_frame */
2634 .sh_type = SHT_PROGBITS,
2635 .sh_offset = sizeof(struct ElfImage),
2637 [5] = { /* .symtab */
2638 .sh_type = SHT_SYMTAB,
2639 .sh_offset = offsetof(struct ElfImage, sym),
2640 .sh_size = sizeof(img->sym),
2642 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2643 .sh_entsize = sizeof(ElfW(Sym)),
2645 [6] = { /* .strtab */
2646 .sh_type = SHT_STRTAB,
2647 .sh_offset = offsetof(struct ElfImage, str),
2648 .sh_size = sizeof(img->str),
2652 [1] = { /* code_gen_buffer */
2653 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2658 .len = sizeof(struct DebugInfo) - 4,
2660 .ptr_size = sizeof(void *),
2662 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2664 .fn_name = "code_gen_buffer"
2667 1, /* abbrev number (the cu) */
2668 0x11, 1, /* DW_TAG_compile_unit, has children */
2669 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2670 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2671 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2672 0, 0, /* end of abbrev */
2673 2, /* abbrev number (the fn) */
2674 0x2e, 0, /* DW_TAG_subprogram, no children */
2675 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2676 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2677 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2678 0, 0, /* end of abbrev */
2679 0 /* no more abbrev */
2681 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2682 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2685 /* We only need a single jit entry; statically allocate it. */
2686 static struct jit_code_entry one_entry;
2688 uintptr_t buf = (uintptr_t)buf_ptr;
2689 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2690 DebugFrameHeader *dfh;
2692 img = g_malloc(img_size);
2693 *img = img_template;
2695 img->phdr.p_vaddr = buf;
2696 img->phdr.p_paddr = buf;
2697 img->phdr.p_memsz = buf_size;
2699 img->shdr[1].sh_name = find_string(img->str, ".text");
2700 img->shdr[1].sh_addr = buf;
2701 img->shdr[1].sh_size = buf_size;
2703 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2704 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2706 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2707 img->shdr[4].sh_size = debug_frame_size;
2709 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2710 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2712 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2713 img->sym[1].st_value = buf;
2714 img->sym[1].st_size = buf_size;
2716 img->di.cu_low_pc = buf;
2717 img->di.cu_high_pc = buf + buf_size;
2718 img->di.fn_low_pc = buf;
2719 img->di.fn_high_pc = buf + buf_size;
2721 dfh = (DebugFrameHeader *)(img + 1);
2722 memcpy(dfh, debug_frame, debug_frame_size);
2723 dfh->fde.func_start = buf;
2724 dfh->fde.func_len = buf_size;
2727 /* Enable this block to be able to debug the ELF image file creation.
2728 One can use readelf, objdump, or other inspection utilities. */
2730 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2732 if (fwrite(img, img_size, 1, f) != img_size) {
2733 /* Avoid stupid unused return value warning for fwrite. */
2740 one_entry.symfile_addr = img;
2741 one_entry.symfile_size = img_size;
2743 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2744 __jit_debug_descriptor.relevant_entry = &one_entry;
2745 __jit_debug_descriptor.first_entry = &one_entry;
2746 __jit_debug_register_code();
2749 /* No support for the feature. Provide the entry point expected by exec.c,
2750 and implement the internal function we declared earlier. */
2752 static void tcg_register_jit_int(void *buf, size_t size,
2753 const void *debug_frame,
2754 size_t debug_frame_size)
2758 void tcg_register_jit(void *buf, size_t buf_size)
2761 #endif /* ELF_HOST_MACHINE */