2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 /* define it to use liveness analysis (better code) */
26 #define USE_LIVENESS_ANALYSIS
27 #define USE_TCG_OPTIMIZATIONS
29 #include "qemu/osdep.h"
31 /* Define to jump the ELF file used to communicate with GDB. */
34 #if !defined(CONFIG_DEBUG_TCG) && !defined(NDEBUG)
35 /* define it to suppress various consistency checks (faster) */
39 #include "qemu-common.h"
40 #include "qemu/host-utils.h"
41 #include "qemu/timer.h"
43 /* Note: the long term plan is to reduce the dependencies on the QEMU
44 CPU definitions. Currently they are used for qemu_ld/st
46 #define NO_CPU_IO_DEFS
51 #if UINTPTR_MAX == UINT32_MAX
52 # define ELF_CLASS ELFCLASS32
54 # define ELF_CLASS ELFCLASS64
56 #ifdef HOST_WORDS_BIGENDIAN
57 # define ELF_DATA ELFDATA2MSB
59 # define ELF_DATA ELFDATA2LSB
65 /* Forward declarations for functions declared in tcg-target.c and used here. */
66 static void tcg_target_init(TCGContext *s);
67 static void tcg_target_qemu_prologue(TCGContext *s);
68 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
69 intptr_t value, intptr_t addend);
71 /* The CIE and FDE header definitions will be common to all hosts. */
73 uint32_t len __attribute__((aligned((sizeof(void *)))));
79 uint8_t return_column;
82 typedef struct QEMU_PACKED {
83 uint32_t len __attribute__((aligned((sizeof(void *)))));
87 } DebugFrameFDEHeader;
89 typedef struct QEMU_PACKED {
91 DebugFrameFDEHeader fde;
94 static void tcg_register_jit_int(void *buf, size_t size,
95 const void *debug_frame,
96 size_t debug_frame_size)
97 __attribute__((unused));
99 /* Forward declarations for functions declared and used in tcg-target.c. */
100 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
103 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105 TCGReg ret, tcg_target_long arg);
106 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
107 const int *const_args);
108 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
110 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
111 static int tcg_target_const_match(tcg_target_long val, TCGType type,
112 const TCGArgConstraint *arg_ct);
113 static void tcg_out_tb_init(TCGContext *s);
114 static bool tcg_out_tb_finalize(TCGContext *s);
118 static TCGRegSet tcg_target_available_regs[2];
119 static TCGRegSet tcg_target_call_clobber_regs;
121 #if TCG_TARGET_INSN_UNIT_SIZE == 1
122 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
127 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
134 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
135 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
137 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
140 tcg_insn_unit *p = s->code_ptr;
141 memcpy(p, &v, sizeof(v));
142 s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
146 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
149 if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
152 memcpy(p, &v, sizeof(v));
157 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
158 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
160 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
163 tcg_insn_unit *p = s->code_ptr;
164 memcpy(p, &v, sizeof(v));
165 s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
169 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
172 if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
175 memcpy(p, &v, sizeof(v));
180 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
181 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
183 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
186 tcg_insn_unit *p = s->code_ptr;
187 memcpy(p, &v, sizeof(v));
188 s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
192 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
195 if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
198 memcpy(p, &v, sizeof(v));
203 /* label relocation processing */
205 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
206 TCGLabel *l, intptr_t addend)
211 /* FIXME: This may break relocations on RISC targets that
212 modify instruction fields in place. The caller may not have
213 written the initial value. */
214 patch_reloc(code_ptr, type, l->u.value, addend);
216 /* add a new relocation entry */
217 r = tcg_malloc(sizeof(TCGRelocation));
221 r->next = l->u.first_reloc;
222 l->u.first_reloc = r;
226 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
228 intptr_t value = (intptr_t)ptr;
231 assert(!l->has_value);
233 for (r = l->u.first_reloc; r != NULL; r = r->next) {
234 patch_reloc(r->ptr, r->type, value, r->addend);
238 l->u.value_ptr = ptr;
241 TCGLabel *gen_new_label(void)
243 TCGContext *s = &tcg_ctx;
244 TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
253 #include "tcg-target.c"
255 /* pool based memory allocation */
256 void *tcg_malloc_internal(TCGContext *s, int size)
261 if (size > TCG_POOL_CHUNK_SIZE) {
262 /* big malloc: insert a new pool (XXX: could optimize) */
263 p = g_malloc(sizeof(TCGPool) + size);
265 p->next = s->pool_first_large;
266 s->pool_first_large = p;
277 pool_size = TCG_POOL_CHUNK_SIZE;
278 p = g_malloc(sizeof(TCGPool) + pool_size);
282 s->pool_current->next = p;
291 s->pool_cur = p->data + size;
292 s->pool_end = p->data + p->size;
296 void tcg_pool_reset(TCGContext *s)
299 for (p = s->pool_first_large; p; p = t) {
303 s->pool_first_large = NULL;
304 s->pool_cur = s->pool_end = NULL;
305 s->pool_current = NULL;
308 typedef struct TCGHelperInfo {
315 #include "exec/helper-proto.h"
317 static const TCGHelperInfo all_helpers[] = {
318 #include "exec/helper-tcg.h"
321 void tcg_context_init(TCGContext *s)
323 int op, total_args, n, i;
325 TCGArgConstraint *args_ct;
327 GHashTable *helper_table;
329 memset(s, 0, sizeof(*s));
332 /* Count total number of arguments and allocate the corresponding
335 for(op = 0; op < NB_OPS; op++) {
336 def = &tcg_op_defs[op];
337 n = def->nb_iargs + def->nb_oargs;
341 args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
342 sorted_args = g_malloc(sizeof(int) * total_args);
344 for(op = 0; op < NB_OPS; op++) {
345 def = &tcg_op_defs[op];
346 def->args_ct = args_ct;
347 def->sorted_args = sorted_args;
348 n = def->nb_iargs + def->nb_oargs;
353 /* Register helpers. */
354 /* Use g_direct_hash/equal for direct pointer comparisons on func. */
355 s->helpers = helper_table = g_hash_table_new(NULL, NULL);
357 for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
358 g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
359 (gpointer)&all_helpers[i]);
365 void tcg_prologue_init(TCGContext *s)
367 size_t prologue_size, total_size;
370 /* Put the prologue at the beginning of code_gen_buffer. */
371 buf0 = s->code_gen_buffer;
374 s->code_gen_prologue = buf0;
376 /* Generate the prologue. */
377 tcg_target_qemu_prologue(s);
379 flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
381 /* Deduct the prologue from the buffer. */
382 prologue_size = tcg_current_code_size(s);
383 s->code_gen_ptr = buf1;
384 s->code_gen_buffer = buf1;
386 total_size = s->code_gen_buffer_size - prologue_size;
387 s->code_gen_buffer_size = total_size;
389 /* Compute a high-water mark, at which we voluntarily flush the buffer
390 and start over. The size here is arbitrary, significantly larger
391 than we expect the code generation for any one opcode to require. */
392 s->code_gen_highwater = s->code_gen_buffer + (total_size - 1024);
394 tcg_register_jit(s->code_gen_buffer, total_size);
397 if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
398 qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
399 log_disas(buf0, prologue_size);
406 void tcg_func_start(TCGContext *s)
409 s->nb_temps = s->nb_globals;
411 /* No temps have been previously allocated for size or locality. */
412 memset(s->free_temps, 0, sizeof(s->free_temps));
415 s->current_frame_offset = s->frame_start;
417 #ifdef CONFIG_DEBUG_TCG
418 s->goto_tb_issue_mask = 0;
421 s->gen_first_op_idx = 0;
422 s->gen_last_op_idx = -1;
423 s->gen_next_op_idx = 0;
424 s->gen_next_parm_idx = 0;
426 s->be = tcg_malloc(sizeof(TCGBackendData));
429 static inline int temp_idx(TCGContext *s, TCGTemp *ts)
431 ptrdiff_t n = ts - s->temps;
432 tcg_debug_assert(n >= 0 && n < s->nb_temps);
436 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
438 int n = s->nb_temps++;
439 tcg_debug_assert(n < TCG_MAX_TEMPS);
440 return memset(&s->temps[n], 0, sizeof(TCGTemp));
443 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
445 tcg_debug_assert(s->nb_globals == s->nb_temps);
447 return tcg_temp_alloc(s);
450 static int tcg_global_reg_new_internal(TCGContext *s, TCGType type,
451 TCGReg reg, const char *name)
455 if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
459 ts = tcg_global_alloc(s);
460 ts->base_type = type;
465 tcg_regset_set_reg(s->reserved_regs, reg);
467 return temp_idx(s, ts);
470 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
473 s->frame_start = start;
474 s->frame_end = start + size;
475 idx = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
476 s->frame_temp = &s->temps[idx];
479 TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name)
481 TCGContext *s = &tcg_ctx;
484 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
487 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I32, reg, name);
488 return MAKE_TCGV_I32(idx);
491 TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name)
493 TCGContext *s = &tcg_ctx;
496 if (tcg_regset_test_reg(s->reserved_regs, reg)) {
499 idx = tcg_global_reg_new_internal(s, TCG_TYPE_I64, reg, name);
500 return MAKE_TCGV_I64(idx);
503 int tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
504 intptr_t offset, const char *name)
506 TCGContext *s = &tcg_ctx;
507 TCGTemp *base_ts = &s->temps[GET_TCGV_PTR(base)];
508 TCGTemp *ts = tcg_global_alloc(s);
510 #ifdef HOST_WORDS_BIGENDIAN
514 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
515 TCGTemp *ts2 = tcg_global_alloc(s);
518 ts->base_type = TCG_TYPE_I64;
519 ts->type = TCG_TYPE_I32;
520 ts->mem_allocated = 1;
521 ts->mem_base = base_ts;
522 ts->mem_offset = offset + bigendian * 4;
523 pstrcpy(buf, sizeof(buf), name);
524 pstrcat(buf, sizeof(buf), "_0");
525 ts->name = strdup(buf);
527 tcg_debug_assert(ts2 == ts + 1);
528 ts2->base_type = TCG_TYPE_I64;
529 ts2->type = TCG_TYPE_I32;
530 ts2->mem_allocated = 1;
531 ts2->mem_base = base_ts;
532 ts2->mem_offset = offset + (1 - bigendian) * 4;
533 pstrcpy(buf, sizeof(buf), name);
534 pstrcat(buf, sizeof(buf), "_1");
535 ts->name = strdup(buf);
537 ts->base_type = type;
539 ts->mem_allocated = 1;
540 ts->mem_base = base_ts;
541 ts->mem_offset = offset;
544 return temp_idx(s, ts);
547 static int tcg_temp_new_internal(TCGType type, int temp_local)
549 TCGContext *s = &tcg_ctx;
553 k = type + (temp_local ? TCG_TYPE_COUNT : 0);
554 idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
555 if (idx < TCG_MAX_TEMPS) {
556 /* There is already an available temp with the right type. */
557 clear_bit(idx, s->free_temps[k].l);
560 ts->temp_allocated = 1;
561 tcg_debug_assert(ts->base_type == type);
562 tcg_debug_assert(ts->temp_local == temp_local);
564 ts = tcg_temp_alloc(s);
565 if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
566 TCGTemp *ts2 = tcg_temp_alloc(s);
568 ts->base_type = type;
569 ts->type = TCG_TYPE_I32;
570 ts->temp_allocated = 1;
571 ts->temp_local = temp_local;
573 tcg_debug_assert(ts2 == ts + 1);
574 ts2->base_type = TCG_TYPE_I64;
575 ts2->type = TCG_TYPE_I32;
576 ts2->temp_allocated = 1;
577 ts2->temp_local = temp_local;
579 ts->base_type = type;
581 ts->temp_allocated = 1;
582 ts->temp_local = temp_local;
584 idx = temp_idx(s, ts);
587 #if defined(CONFIG_DEBUG_TCG)
593 TCGv_i32 tcg_temp_new_internal_i32(int temp_local)
597 idx = tcg_temp_new_internal(TCG_TYPE_I32, temp_local);
598 return MAKE_TCGV_I32(idx);
601 TCGv_i64 tcg_temp_new_internal_i64(int temp_local)
605 idx = tcg_temp_new_internal(TCG_TYPE_I64, temp_local);
606 return MAKE_TCGV_I64(idx);
609 static void tcg_temp_free_internal(int idx)
611 TCGContext *s = &tcg_ctx;
615 #if defined(CONFIG_DEBUG_TCG)
617 if (s->temps_in_use < 0) {
618 fprintf(stderr, "More temporaries freed than allocated!\n");
622 assert(idx >= s->nb_globals && idx < s->nb_temps);
624 assert(ts->temp_allocated != 0);
625 ts->temp_allocated = 0;
627 k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
628 set_bit(idx, s->free_temps[k].l);
631 void tcg_temp_free_i32(TCGv_i32 arg)
633 tcg_temp_free_internal(GET_TCGV_I32(arg));
636 void tcg_temp_free_i64(TCGv_i64 arg)
638 tcg_temp_free_internal(GET_TCGV_I64(arg));
641 TCGv_i32 tcg_const_i32(int32_t val)
644 t0 = tcg_temp_new_i32();
645 tcg_gen_movi_i32(t0, val);
649 TCGv_i64 tcg_const_i64(int64_t val)
652 t0 = tcg_temp_new_i64();
653 tcg_gen_movi_i64(t0, val);
657 TCGv_i32 tcg_const_local_i32(int32_t val)
660 t0 = tcg_temp_local_new_i32();
661 tcg_gen_movi_i32(t0, val);
665 TCGv_i64 tcg_const_local_i64(int64_t val)
668 t0 = tcg_temp_local_new_i64();
669 tcg_gen_movi_i64(t0, val);
673 #if defined(CONFIG_DEBUG_TCG)
674 void tcg_clear_temp_count(void)
676 TCGContext *s = &tcg_ctx;
680 int tcg_check_temp_count(void)
682 TCGContext *s = &tcg_ctx;
683 if (s->temps_in_use) {
684 /* Clear the count so that we don't give another
685 * warning immediately next time around.
694 /* Note: we convert the 64 bit args to 32 bit and do some alignment
695 and endian swap. Maybe it would be better to do the alignment
696 and endian swap in tcg_reg_alloc_call(). */
697 void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
698 int nargs, TCGArg *args)
700 int i, real_args, nb_rets, pi, pi_first;
701 unsigned sizemask, flags;
704 info = g_hash_table_lookup(s->helpers, (gpointer)func);
706 sizemask = info->sizemask;
708 #if defined(__sparc__) && !defined(__arch64__) \
709 && !defined(CONFIG_TCG_INTERPRETER)
710 /* We have 64-bit values in one register, but need to pass as two
711 separate parameters. Split them. */
712 int orig_sizemask = sizemask;
713 int orig_nargs = nargs;
716 TCGV_UNUSED_I64(retl);
717 TCGV_UNUSED_I64(reth);
719 TCGArg *split_args = __builtin_alloca(sizeof(TCGArg) * nargs * 2);
720 for (i = real_args = 0; i < nargs; ++i) {
721 int is_64bit = sizemask & (1 << (i+1)*2);
723 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
724 TCGv_i32 h = tcg_temp_new_i32();
725 TCGv_i32 l = tcg_temp_new_i32();
726 tcg_gen_extr_i64_i32(l, h, orig);
727 split_args[real_args++] = GET_TCGV_I32(h);
728 split_args[real_args++] = GET_TCGV_I32(l);
730 split_args[real_args++] = args[i];
737 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
738 for (i = 0; i < nargs; ++i) {
739 int is_64bit = sizemask & (1 << (i+1)*2);
740 int is_signed = sizemask & (2 << (i+1)*2);
742 TCGv_i64 temp = tcg_temp_new_i64();
743 TCGv_i64 orig = MAKE_TCGV_I64(args[i]);
745 tcg_gen_ext32s_i64(temp, orig);
747 tcg_gen_ext32u_i64(temp, orig);
749 args[i] = GET_TCGV_I64(temp);
752 #endif /* TCG_TARGET_EXTEND_ARGS */
754 pi_first = pi = s->gen_next_parm_idx;
755 if (ret != TCG_CALL_DUMMY_ARG) {
756 #if defined(__sparc__) && !defined(__arch64__) \
757 && !defined(CONFIG_TCG_INTERPRETER)
758 if (orig_sizemask & 1) {
759 /* The 32-bit ABI is going to return the 64-bit value in
760 the %o0/%o1 register pair. Prepare for this by using
761 two return temporaries, and reassemble below. */
762 retl = tcg_temp_new_i64();
763 reth = tcg_temp_new_i64();
764 s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
765 s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
768 s->gen_opparam_buf[pi++] = ret;
772 if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
773 #ifdef HOST_WORDS_BIGENDIAN
774 s->gen_opparam_buf[pi++] = ret + 1;
775 s->gen_opparam_buf[pi++] = ret;
777 s->gen_opparam_buf[pi++] = ret;
778 s->gen_opparam_buf[pi++] = ret + 1;
782 s->gen_opparam_buf[pi++] = ret;
790 for (i = 0; i < nargs; i++) {
791 int is_64bit = sizemask & (1 << (i+1)*2);
792 if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
793 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
794 /* some targets want aligned 64 bit args */
796 s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
800 /* If stack grows up, then we will be placing successive
801 arguments at lower addresses, which means we need to
802 reverse the order compared to how we would normally
803 treat either big or little-endian. For those arguments
804 that will wind up in registers, this still works for
805 HPPA (the only current STACK_GROWSUP target) since the
806 argument registers are *also* allocated in decreasing
807 order. If another such target is added, this logic may
808 have to get more complicated to differentiate between
809 stack arguments and register arguments. */
810 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
811 s->gen_opparam_buf[pi++] = args[i] + 1;
812 s->gen_opparam_buf[pi++] = args[i];
814 s->gen_opparam_buf[pi++] = args[i];
815 s->gen_opparam_buf[pi++] = args[i] + 1;
821 s->gen_opparam_buf[pi++] = args[i];
824 s->gen_opparam_buf[pi++] = (uintptr_t)func;
825 s->gen_opparam_buf[pi++] = flags;
827 i = s->gen_next_op_idx;
828 tcg_debug_assert(i < OPC_BUF_SIZE);
829 tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
831 /* Set links for sequential allocation during translation. */
832 s->gen_op_buf[i] = (TCGOp){
833 .opc = INDEX_op_call,
841 /* Make sure the calli field didn't overflow. */
842 tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
844 s->gen_last_op_idx = i;
845 s->gen_next_op_idx = i + 1;
846 s->gen_next_parm_idx = pi;
848 #if defined(__sparc__) && !defined(__arch64__) \
849 && !defined(CONFIG_TCG_INTERPRETER)
850 /* Free all of the parts we allocated above. */
851 for (i = real_args = 0; i < orig_nargs; ++i) {
852 int is_64bit = orig_sizemask & (1 << (i+1)*2);
854 TCGv_i32 h = MAKE_TCGV_I32(args[real_args++]);
855 TCGv_i32 l = MAKE_TCGV_I32(args[real_args++]);
856 tcg_temp_free_i32(h);
857 tcg_temp_free_i32(l);
862 if (orig_sizemask & 1) {
863 /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them.
864 Note that describing these as TCGv_i64 eliminates an unnecessary
865 zero-extension that tcg_gen_concat_i32_i64 would create. */
866 tcg_gen_concat32_i64(MAKE_TCGV_I64(ret), retl, reth);
867 tcg_temp_free_i64(retl);
868 tcg_temp_free_i64(reth);
870 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
871 for (i = 0; i < nargs; ++i) {
872 int is_64bit = sizemask & (1 << (i+1)*2);
874 TCGv_i64 temp = MAKE_TCGV_I64(args[i]);
875 tcg_temp_free_i64(temp);
878 #endif /* TCG_TARGET_EXTEND_ARGS */
881 static void tcg_reg_alloc_start(TCGContext *s)
885 for(i = 0; i < s->nb_globals; i++) {
888 ts->val_type = TEMP_VAL_REG;
890 ts->val_type = TEMP_VAL_MEM;
893 for(i = s->nb_globals; i < s->nb_temps; i++) {
895 if (ts->temp_local) {
896 ts->val_type = TEMP_VAL_MEM;
898 ts->val_type = TEMP_VAL_DEAD;
900 ts->mem_allocated = 0;
904 memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
907 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
910 int idx = temp_idx(s, ts);
912 if (idx < s->nb_globals) {
913 pstrcpy(buf, buf_size, ts->name);
914 } else if (ts->temp_local) {
915 snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
917 snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
922 static char *tcg_get_arg_str_idx(TCGContext *s, char *buf,
923 int buf_size, int idx)
925 assert(idx >= 0 && idx < s->nb_temps);
926 return tcg_get_arg_str_ptr(s, buf, buf_size, &s->temps[idx]);
929 /* Find helper name. */
930 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
932 const char *ret = NULL;
934 TCGHelperInfo *info = g_hash_table_lookup(s->helpers, (gpointer)val);
942 static const char * const cond_name[] =
944 [TCG_COND_NEVER] = "never",
945 [TCG_COND_ALWAYS] = "always",
946 [TCG_COND_EQ] = "eq",
947 [TCG_COND_NE] = "ne",
948 [TCG_COND_LT] = "lt",
949 [TCG_COND_GE] = "ge",
950 [TCG_COND_LE] = "le",
951 [TCG_COND_GT] = "gt",
952 [TCG_COND_LTU] = "ltu",
953 [TCG_COND_GEU] = "geu",
954 [TCG_COND_LEU] = "leu",
955 [TCG_COND_GTU] = "gtu"
958 static const char * const ldst_name[] =
974 void tcg_dump_ops(TCGContext *s)
980 for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
981 int i, k, nb_oargs, nb_iargs, nb_cargs;
986 op = &s->gen_op_buf[oi];
988 def = &tcg_op_defs[c];
989 args = &s->gen_opparam_buf[op->args];
991 if (c == INDEX_op_insn_start) {
992 qemu_log("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
994 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
996 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
997 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
1001 qemu_log(" " TARGET_FMT_lx, a);
1003 } else if (c == INDEX_op_call) {
1004 /* variable number of arguments */
1005 nb_oargs = op->callo;
1006 nb_iargs = op->calli;
1007 nb_cargs = def->nb_cargs;
1009 /* function name, flags, out args */
1010 qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1011 tcg_find_helper(s, args[nb_oargs + nb_iargs]),
1012 args[nb_oargs + nb_iargs + 1], nb_oargs);
1013 for (i = 0; i < nb_oargs; i++) {
1014 qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1017 for (i = 0; i < nb_iargs; i++) {
1018 TCGArg arg = args[nb_oargs + i];
1019 const char *t = "<dummy>";
1020 if (arg != TCG_CALL_DUMMY_ARG) {
1021 t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
1026 qemu_log(" %s ", def->name);
1028 nb_oargs = def->nb_oargs;
1029 nb_iargs = def->nb_iargs;
1030 nb_cargs = def->nb_cargs;
1033 for (i = 0; i < nb_oargs; i++) {
1037 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1040 for (i = 0; i < nb_iargs; i++) {
1044 qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
1048 case INDEX_op_brcond_i32:
1049 case INDEX_op_setcond_i32:
1050 case INDEX_op_movcond_i32:
1051 case INDEX_op_brcond2_i32:
1052 case INDEX_op_setcond2_i32:
1053 case INDEX_op_brcond_i64:
1054 case INDEX_op_setcond_i64:
1055 case INDEX_op_movcond_i64:
1056 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
1057 qemu_log(",%s", cond_name[args[k++]]);
1059 qemu_log(",$0x%" TCG_PRIlx, args[k++]);
1063 case INDEX_op_qemu_ld_i32:
1064 case INDEX_op_qemu_st_i32:
1065 case INDEX_op_qemu_ld_i64:
1066 case INDEX_op_qemu_st_i64:
1068 TCGMemOpIdx oi = args[k++];
1069 TCGMemOp op = get_memop(oi);
1070 unsigned ix = get_mmuidx(oi);
1072 if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1073 qemu_log(",$0x%x,%u", op, ix);
1075 const char *s_al = "", *s_op;
1076 if (op & MO_AMASK) {
1077 if ((op & MO_AMASK) == MO_ALIGN) {
1083 s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1084 qemu_log(",%s%s,%u", s_al, s_op, ix);
1094 case INDEX_op_set_label:
1096 case INDEX_op_brcond_i32:
1097 case INDEX_op_brcond_i64:
1098 case INDEX_op_brcond2_i32:
1099 qemu_log("%s$L%d", k ? "," : "", arg_label(args[k])->id);
1105 for (; i < nb_cargs; i++, k++) {
1106 qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
1113 /* we give more priority to constraints with less registers */
1114 static int get_constraint_priority(const TCGOpDef *def, int k)
1116 const TCGArgConstraint *arg_ct;
1119 arg_ct = &def->args_ct[k];
1120 if (arg_ct->ct & TCG_CT_ALIAS) {
1121 /* an alias is equivalent to a single register */
1124 if (!(arg_ct->ct & TCG_CT_REG))
1127 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1128 if (tcg_regset_test_reg(arg_ct->u.regs, i))
1132 return TCG_TARGET_NB_REGS - n + 1;
1135 /* sort from highest priority to lowest */
1136 static void sort_constraints(TCGOpDef *def, int start, int n)
1138 int i, j, p1, p2, tmp;
1140 for(i = 0; i < n; i++)
1141 def->sorted_args[start + i] = start + i;
1144 for(i = 0; i < n - 1; i++) {
1145 for(j = i + 1; j < n; j++) {
1146 p1 = get_constraint_priority(def, def->sorted_args[start + i]);
1147 p2 = get_constraint_priority(def, def->sorted_args[start + j]);
1149 tmp = def->sorted_args[start + i];
1150 def->sorted_args[start + i] = def->sorted_args[start + j];
1151 def->sorted_args[start + j] = tmp;
1157 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs)
1165 if (tdefs->op == (TCGOpcode)-1)
1168 assert((unsigned)op < NB_OPS);
1169 def = &tcg_op_defs[op];
1170 #if defined(CONFIG_DEBUG_TCG)
1171 /* Duplicate entry in op definitions? */
1175 nb_args = def->nb_iargs + def->nb_oargs;
1176 for(i = 0; i < nb_args; i++) {
1177 ct_str = tdefs->args_ct_str[i];
1178 /* Incomplete TCGTargetOpDef entry? */
1179 assert(ct_str != NULL);
1180 tcg_regset_clear(def->args_ct[i].u.regs);
1181 def->args_ct[i].ct = 0;
1182 if (ct_str[0] >= '0' && ct_str[0] <= '9') {
1184 oarg = ct_str[0] - '0';
1185 assert(oarg < def->nb_oargs);
1186 assert(def->args_ct[oarg].ct & TCG_CT_REG);
1187 /* TCG_CT_ALIAS is for the output arguments. The input
1188 argument is tagged with TCG_CT_IALIAS. */
1189 def->args_ct[i] = def->args_ct[oarg];
1190 def->args_ct[oarg].ct = TCG_CT_ALIAS;
1191 def->args_ct[oarg].alias_index = i;
1192 def->args_ct[i].ct |= TCG_CT_IALIAS;
1193 def->args_ct[i].alias_index = oarg;
1196 if (*ct_str == '\0')
1200 def->args_ct[i].ct |= TCG_CT_CONST;
1204 if (target_parse_constraint(&def->args_ct[i], &ct_str) < 0) {
1205 fprintf(stderr, "Invalid constraint '%s' for arg %d of operation '%s'\n",
1206 ct_str, i, def->name);
1214 /* TCGTargetOpDef entry with too much information? */
1215 assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
1217 /* sort the constraints (XXX: this is just an heuristic) */
1218 sort_constraints(def, 0, def->nb_oargs);
1219 sort_constraints(def, def->nb_oargs, def->nb_iargs);
1225 printf("%s: sorted=", def->name);
1226 for(i = 0; i < def->nb_oargs + def->nb_iargs; i++)
1227 printf(" %d", def->sorted_args[i]);
1234 #if defined(CONFIG_DEBUG_TCG)
1236 for (op = 0; op < tcg_op_defs_max; op++) {
1237 const TCGOpDef *def = &tcg_op_defs[op];
1238 if (def->flags & TCG_OPF_NOT_PRESENT) {
1239 /* Wrong entry in op definitions? */
1241 fprintf(stderr, "Invalid op definition for %s\n", def->name);
1245 /* Missing entry in op definitions? */
1247 fprintf(stderr, "Missing op definition for %s\n", def->name);
1258 void tcg_op_remove(TCGContext *s, TCGOp *op)
1260 int next = op->next;
1261 int prev = op->prev;
1264 s->gen_op_buf[next].prev = prev;
1266 s->gen_last_op_idx = prev;
1269 s->gen_op_buf[prev].next = next;
1271 s->gen_first_op_idx = next;
1274 memset(op, -1, sizeof(*op));
1276 #ifdef CONFIG_PROFILER
1281 #ifdef USE_LIVENESS_ANALYSIS
1282 /* liveness analysis: end of function: all temps are dead, and globals
1283 should be in memory. */
1284 static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
1287 memset(dead_temps, 1, s->nb_temps);
1288 memset(mem_temps, 1, s->nb_globals);
1289 memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals);
1292 /* liveness analysis: end of basic block: all temps are dead, globals
1293 and local temps should be in memory. */
1294 static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
1299 memset(dead_temps, 1, s->nb_temps);
1300 memset(mem_temps, 1, s->nb_globals);
1301 for(i = s->nb_globals; i < s->nb_temps; i++) {
1302 mem_temps[i] = s->temps[i].temp_local;
1306 /* Liveness analysis : update the opc_dead_args array to tell if a
1307 given input arguments is dead. Instructions updating dead
1308 temporaries are removed. */
1309 static void tcg_liveness_analysis(TCGContext *s)
1311 uint8_t *dead_temps, *mem_temps;
1312 int oi, oi_prev, nb_ops;
1314 nb_ops = s->gen_next_op_idx;
1315 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1316 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1318 dead_temps = tcg_malloc(s->nb_temps);
1319 mem_temps = tcg_malloc(s->nb_temps);
1320 tcg_la_func_end(s, dead_temps, mem_temps);
1322 for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
1323 int i, nb_iargs, nb_oargs;
1324 TCGOpcode opc_new, opc_new2;
1330 TCGOp * const op = &s->gen_op_buf[oi];
1331 TCGArg * const args = &s->gen_opparam_buf[op->args];
1332 TCGOpcode opc = op->opc;
1333 const TCGOpDef *def = &tcg_op_defs[opc];
1342 nb_oargs = op->callo;
1343 nb_iargs = op->calli;
1344 call_flags = args[nb_oargs + nb_iargs + 1];
1346 /* pure functions can be removed if their result is unused */
1347 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
1348 for (i = 0; i < nb_oargs; i++) {
1350 if (!dead_temps[arg] || mem_temps[arg]) {
1351 goto do_not_remove_call;
1358 /* output args are dead */
1361 for (i = 0; i < nb_oargs; i++) {
1363 if (dead_temps[arg]) {
1364 dead_args |= (1 << i);
1366 if (mem_temps[arg]) {
1367 sync_args |= (1 << i);
1369 dead_temps[arg] = 1;
1373 if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
1374 /* globals should be synced to memory */
1375 memset(mem_temps, 1, s->nb_globals);
1377 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
1378 TCG_CALL_NO_READ_GLOBALS))) {
1379 /* globals should go back to memory */
1380 memset(dead_temps, 1, s->nb_globals);
1383 /* record arguments that die in this helper */
1384 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
1386 if (arg != TCG_CALL_DUMMY_ARG) {
1387 if (dead_temps[arg]) {
1388 dead_args |= (1 << i);
1392 /* input arguments are live for preceding opcodes */
1393 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1395 dead_temps[arg] = 0;
1397 s->op_dead_args[oi] = dead_args;
1398 s->op_sync_args[oi] = sync_args;
1402 case INDEX_op_insn_start:
1404 case INDEX_op_discard:
1405 /* mark the temporary as dead */
1406 dead_temps[args[0]] = 1;
1407 mem_temps[args[0]] = 0;
1410 case INDEX_op_add2_i32:
1411 opc_new = INDEX_op_add_i32;
1413 case INDEX_op_sub2_i32:
1414 opc_new = INDEX_op_sub_i32;
1416 case INDEX_op_add2_i64:
1417 opc_new = INDEX_op_add_i64;
1419 case INDEX_op_sub2_i64:
1420 opc_new = INDEX_op_sub_i64;
1424 /* Test if the high part of the operation is dead, but not
1425 the low part. The result can be optimized to a simple
1426 add or sub. This happens often for x86_64 guest when the
1427 cpu mode is set to 32 bit. */
1428 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1429 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1432 /* Replace the opcode and adjust the args in place,
1433 leaving 3 unused args at the end. */
1434 op->opc = opc = opc_new;
1437 /* Fall through and mark the single-word operation live. */
1443 case INDEX_op_mulu2_i32:
1444 opc_new = INDEX_op_mul_i32;
1445 opc_new2 = INDEX_op_muluh_i32;
1446 have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
1448 case INDEX_op_muls2_i32:
1449 opc_new = INDEX_op_mul_i32;
1450 opc_new2 = INDEX_op_mulsh_i32;
1451 have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
1453 case INDEX_op_mulu2_i64:
1454 opc_new = INDEX_op_mul_i64;
1455 opc_new2 = INDEX_op_muluh_i64;
1456 have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
1458 case INDEX_op_muls2_i64:
1459 opc_new = INDEX_op_mul_i64;
1460 opc_new2 = INDEX_op_mulsh_i64;
1461 have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
1466 if (dead_temps[args[1]] && !mem_temps[args[1]]) {
1467 if (dead_temps[args[0]] && !mem_temps[args[0]]) {
1468 /* Both parts of the operation are dead. */
1471 /* The high part of the operation is dead; generate the low. */
1472 op->opc = opc = opc_new;
1475 } else if (have_opc_new2 && dead_temps[args[0]]
1476 && !mem_temps[args[0]]) {
1477 /* The low part of the operation is dead; generate the high. */
1478 op->opc = opc = opc_new2;
1485 /* Mark the single-word operation live. */
1490 /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
1491 nb_iargs = def->nb_iargs;
1492 nb_oargs = def->nb_oargs;
1494 /* Test if the operation can be removed because all
1495 its outputs are dead. We assume that nb_oargs == 0
1496 implies side effects */
1497 if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
1498 for (i = 0; i < nb_oargs; i++) {
1500 if (!dead_temps[arg] || mem_temps[arg]) {
1505 tcg_op_remove(s, op);
1508 /* output args are dead */
1511 for (i = 0; i < nb_oargs; i++) {
1513 if (dead_temps[arg]) {
1514 dead_args |= (1 << i);
1516 if (mem_temps[arg]) {
1517 sync_args |= (1 << i);
1519 dead_temps[arg] = 1;
1523 /* if end of basic block, update */
1524 if (def->flags & TCG_OPF_BB_END) {
1525 tcg_la_bb_end(s, dead_temps, mem_temps);
1526 } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
1527 /* globals should be synced to memory */
1528 memset(mem_temps, 1, s->nb_globals);
1531 /* record arguments that die in this opcode */
1532 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1534 if (dead_temps[arg]) {
1535 dead_args |= (1 << i);
1538 /* input arguments are live for preceding opcodes */
1539 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
1541 dead_temps[arg] = 0;
1543 s->op_dead_args[oi] = dead_args;
1544 s->op_sync_args[oi] = sync_args;
1551 /* dummy liveness analysis */
1552 static void tcg_liveness_analysis(TCGContext *s)
1554 int nb_ops = s->gen_next_op_idx;
1556 s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
1557 memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
1558 s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
1559 memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t));
1564 static void dump_regs(TCGContext *s)
1570 for(i = 0; i < s->nb_temps; i++) {
1572 printf(" %10s: ", tcg_get_arg_str_idx(s, buf, sizeof(buf), i));
1573 switch(ts->val_type) {
1575 printf("%s", tcg_target_reg_names[ts->reg]);
1578 printf("%d(%s)", (int)ts->mem_offset,
1579 tcg_target_reg_names[ts->mem_base->reg]);
1581 case TEMP_VAL_CONST:
1582 printf("$0x%" TCG_PRIlx, ts->val);
1594 for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
1595 if (s->reg_to_temp[i] != NULL) {
1597 tcg_target_reg_names[i],
1598 tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
1603 static void check_regs(TCGContext *s)
1610 for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
1611 ts = s->reg_to_temp[reg];
1613 if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
1614 printf("Inconsistency for register %s:\n",
1615 tcg_target_reg_names[reg]);
1620 for (k = 0; k < s->nb_temps; k++) {
1622 if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
1623 && s->reg_to_temp[ts->reg] != ts) {
1624 printf("Inconsistency for temp %s:\n",
1625 tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
1627 printf("reg state:\n");
1635 static void temp_allocate_frame(TCGContext *s, int temp)
1638 ts = &s->temps[temp];
1639 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
1640 /* Sparc64 stack is accessed with offset of 2047 */
1641 s->current_frame_offset = (s->current_frame_offset +
1642 (tcg_target_long)sizeof(tcg_target_long) - 1) &
1643 ~(sizeof(tcg_target_long) - 1);
1645 if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
1649 ts->mem_offset = s->current_frame_offset;
1650 ts->mem_base = s->frame_temp;
1651 ts->mem_allocated = 1;
1652 s->current_frame_offset += sizeof(tcg_target_long);
1655 /* sync register 'reg' by saving it to the corresponding temporary */
1656 static inline void tcg_reg_sync(TCGContext *s, TCGReg reg)
1658 TCGTemp *ts = s->reg_to_temp[reg];
1660 assert(ts->val_type == TEMP_VAL_REG);
1661 if (!ts->mem_coherent && !ts->fixed_reg) {
1662 if (!ts->mem_allocated) {
1663 temp_allocate_frame(s, temp_idx(s, ts));
1665 tcg_out_st(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
1667 ts->mem_coherent = 1;
1670 /* free register 'reg' by spilling the corresponding temporary if necessary */
1671 static void tcg_reg_free(TCGContext *s, TCGReg reg)
1673 TCGTemp *ts = s->reg_to_temp[reg];
1676 tcg_reg_sync(s, reg);
1677 ts->val_type = TEMP_VAL_MEM;
1678 s->reg_to_temp[reg] = NULL;
1682 /* Allocate a register belonging to reg1 & ~reg2 */
1683 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2)
1689 tcg_regset_andnot(reg_ct, reg1, reg2);
1691 /* first try free registers */
1692 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1693 reg = tcg_target_reg_alloc_order[i];
1694 if (tcg_regset_test_reg(reg_ct, reg) && s->reg_to_temp[reg] == NULL)
1698 /* XXX: do better spill choice */
1699 for(i = 0; i < ARRAY_SIZE(tcg_target_reg_alloc_order); i++) {
1700 reg = tcg_target_reg_alloc_order[i];
1701 if (tcg_regset_test_reg(reg_ct, reg)) {
1702 tcg_reg_free(s, reg);
1710 /* Make sure the temporary is in a register. If needed, allocate the register
1711 from DESIRED while avoiding ALLOCATED. */
1712 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
1713 TCGRegSet allocated_regs)
1717 switch (ts->val_type) {
1720 case TEMP_VAL_CONST:
1721 reg = tcg_reg_alloc(s, desired_regs, allocated_regs);
1722 tcg_out_movi(s, ts->type, reg, ts->val);
1723 ts->mem_coherent = 0;
1726 reg = tcg_reg_alloc(s, desired_regs, allocated_regs);
1727 tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
1728 ts->mem_coherent = 1;
1735 ts->val_type = TEMP_VAL_REG;
1736 s->reg_to_temp[reg] = ts;
1739 /* mark a temporary as dead. */
1740 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
1742 if (ts->fixed_reg) {
1745 if (ts->val_type == TEMP_VAL_REG) {
1746 s->reg_to_temp[ts->reg] = NULL;
1748 ts->val_type = (temp_idx(s, ts) < s->nb_globals || ts->temp_local
1749 ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1752 /* sync a temporary to memory. 'allocated_regs' is used in case a
1753 temporary registers needs to be allocated to store a constant. */
1754 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
1756 if (ts->fixed_reg) {
1759 switch (ts->val_type) {
1760 case TEMP_VAL_CONST:
1761 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs);
1764 tcg_reg_sync(s, ts->reg);
1774 /* save a temporary to memory. 'allocated_regs' is used in case a
1775 temporary registers needs to be allocated to store a constant. */
1776 static inline void temp_save(TCGContext *s, TCGTemp *ts,
1777 TCGRegSet allocated_regs)
1779 #ifdef USE_LIVENESS_ANALYSIS
1780 /* The liveness analysis already ensures that globals are back
1781 in memory. Keep an assert for safety. */
1782 tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
1784 temp_sync(s, ts, allocated_regs);
1789 /* save globals to their canonical location and assume they can be
1790 modified be the following code. 'allocated_regs' is used in case a
1791 temporary registers needs to be allocated to store a constant. */
1792 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
1796 for (i = 0; i < s->nb_globals; i++) {
1797 temp_save(s, &s->temps[i], allocated_regs);
1801 /* sync globals to their canonical location and assume they can be
1802 read by the following code. 'allocated_regs' is used in case a
1803 temporary registers needs to be allocated to store a constant. */
1804 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
1808 for (i = 0; i < s->nb_globals; i++) {
1809 TCGTemp *ts = &s->temps[i];
1810 #ifdef USE_LIVENESS_ANALYSIS
1811 tcg_debug_assert(ts->val_type != TEMP_VAL_REG
1813 || ts->mem_coherent);
1815 temp_sync(s, ts, allocated_regs);
1820 /* at the end of a basic block, we assume all temporaries are dead and
1821 all globals are stored at their canonical location. */
1822 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
1826 for (i = s->nb_globals; i < s->nb_temps; i++) {
1827 TCGTemp *ts = &s->temps[i];
1828 if (ts->temp_local) {
1829 temp_save(s, ts, allocated_regs);
1831 #ifdef USE_LIVENESS_ANALYSIS
1832 /* The liveness analysis already ensures that temps are dead.
1833 Keep an assert for safety. */
1834 assert(ts->val_type == TEMP_VAL_DEAD);
1841 save_globals(s, allocated_regs);
1844 #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
1845 #define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
1847 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
1848 uint16_t dead_args, uint8_t sync_args)
1851 tcg_target_ulong val;
1853 ots = &s->temps[args[0]];
1856 if (ots->fixed_reg) {
1857 /* for fixed registers, we do not do any constant
1859 tcg_out_movi(s, ots->type, ots->reg, val);
1861 /* The movi is not explicitly generated here */
1862 if (ots->val_type == TEMP_VAL_REG) {
1863 s->reg_to_temp[ots->reg] = NULL;
1865 ots->val_type = TEMP_VAL_CONST;
1868 if (NEED_SYNC_ARG(0)) {
1869 temp_sync(s, ots, s->reserved_regs);
1871 if (IS_DEAD_ARG(0)) {
1876 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
1877 const TCGArg *args, uint16_t dead_args,
1880 TCGRegSet allocated_regs;
1882 TCGType otype, itype;
1884 tcg_regset_set(allocated_regs, s->reserved_regs);
1885 ots = &s->temps[args[0]];
1886 ts = &s->temps[args[1]];
1888 /* Note that otype != itype for no-op truncation. */
1892 /* If the source value is not in a register, and we're going to be
1893 forced to have it in a register in order to perform the copy,
1894 then copy the SOURCE value into its own register first. That way
1895 we don't have to reload SOURCE the next time it is used. */
1896 if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG)
1897 || ts->val_type == TEMP_VAL_MEM) {
1898 temp_load(s, ts, tcg_target_available_regs[itype], allocated_regs);
1901 if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
1902 /* mov to a non-saved dead register makes no sense (even with
1903 liveness analysis disabled). */
1904 assert(NEED_SYNC_ARG(0));
1905 /* The code above should have moved the temp to a register. */
1906 assert(ts->val_type == TEMP_VAL_REG);
1907 if (!ots->mem_allocated) {
1908 temp_allocate_frame(s, args[0]);
1910 tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
1911 if (IS_DEAD_ARG(1)) {
1915 } else if (ts->val_type == TEMP_VAL_CONST) {
1916 /* propagate constant */
1917 if (ots->val_type == TEMP_VAL_REG) {
1918 s->reg_to_temp[ots->reg] = NULL;
1920 ots->val_type = TEMP_VAL_CONST;
1922 if (IS_DEAD_ARG(1)) {
1926 /* The code in the first if block should have moved the
1927 temp to a register. */
1928 assert(ts->val_type == TEMP_VAL_REG);
1929 if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
1930 /* the mov can be suppressed */
1931 if (ots->val_type == TEMP_VAL_REG) {
1932 s->reg_to_temp[ots->reg] = NULL;
1937 if (ots->val_type != TEMP_VAL_REG) {
1938 /* When allocating a new register, make sure to not spill the
1940 tcg_regset_set_reg(allocated_regs, ts->reg);
1941 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
1944 tcg_out_mov(s, otype, ots->reg, ts->reg);
1946 ots->val_type = TEMP_VAL_REG;
1947 ots->mem_coherent = 0;
1948 s->reg_to_temp[ots->reg] = ots;
1949 if (NEED_SYNC_ARG(0)) {
1950 tcg_reg_sync(s, ots->reg);
1955 static void tcg_reg_alloc_op(TCGContext *s,
1956 const TCGOpDef *def, TCGOpcode opc,
1957 const TCGArg *args, uint16_t dead_args,
1960 TCGRegSet allocated_regs;
1961 int i, k, nb_iargs, nb_oargs;
1964 const TCGArgConstraint *arg_ct;
1966 TCGArg new_args[TCG_MAX_OP_ARGS];
1967 int const_args[TCG_MAX_OP_ARGS];
1969 nb_oargs = def->nb_oargs;
1970 nb_iargs = def->nb_iargs;
1972 /* copy constants */
1973 memcpy(new_args + nb_oargs + nb_iargs,
1974 args + nb_oargs + nb_iargs,
1975 sizeof(TCGArg) * def->nb_cargs);
1977 /* satisfy input constraints */
1978 tcg_regset_set(allocated_regs, s->reserved_regs);
1979 for(k = 0; k < nb_iargs; k++) {
1980 i = def->sorted_args[nb_oargs + k];
1982 arg_ct = &def->args_ct[i];
1983 ts = &s->temps[arg];
1985 if (ts->val_type == TEMP_VAL_CONST
1986 && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
1987 /* constant is OK for instruction */
1989 new_args[i] = ts->val;
1993 temp_load(s, ts, arg_ct->u.regs, allocated_regs);
1995 if (arg_ct->ct & TCG_CT_IALIAS) {
1996 if (ts->fixed_reg) {
1997 /* if fixed register, we must allocate a new register
1998 if the alias is not the same register */
1999 if (arg != args[arg_ct->alias_index])
2000 goto allocate_in_reg;
2002 /* if the input is aliased to an output and if it is
2003 not dead after the instruction, we must allocate
2004 a new register and move it */
2005 if (!IS_DEAD_ARG(i)) {
2006 goto allocate_in_reg;
2008 /* check if the current register has already been allocated
2009 for another input aliased to an output */
2011 for (k2 = 0 ; k2 < k ; k2++) {
2012 i2 = def->sorted_args[nb_oargs + k2];
2013 if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
2014 (new_args[i2] == ts->reg)) {
2015 goto allocate_in_reg;
2021 if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2022 /* nothing to do : the constraint is satisfied */
2025 /* allocate a new register matching the constraint
2026 and move the temporary register into it */
2027 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2028 tcg_out_mov(s, ts->type, reg, ts->reg);
2032 tcg_regset_set_reg(allocated_regs, reg);
2036 /* mark dead temporaries and free the associated registers */
2037 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2038 if (IS_DEAD_ARG(i)) {
2039 temp_dead(s, &s->temps[args[i]]);
2043 if (def->flags & TCG_OPF_BB_END) {
2044 tcg_reg_alloc_bb_end(s, allocated_regs);
2046 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2047 /* XXX: permit generic clobber register list ? */
2048 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2049 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2054 if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2055 /* sync globals if the op has side effects and might trigger
2057 sync_globals(s, allocated_regs);
2060 /* satisfy the output constraints */
2061 tcg_regset_set(allocated_regs, s->reserved_regs);
2062 for(k = 0; k < nb_oargs; k++) {
2063 i = def->sorted_args[k];
2065 arg_ct = &def->args_ct[i];
2066 ts = &s->temps[arg];
2067 if (arg_ct->ct & TCG_CT_ALIAS) {
2068 reg = new_args[arg_ct->alias_index];
2070 /* if fixed register, we try to use it */
2072 if (ts->fixed_reg &&
2073 tcg_regset_test_reg(arg_ct->u.regs, reg)) {
2076 reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs);
2078 tcg_regset_set_reg(allocated_regs, reg);
2079 /* if a fixed register is used, then a move will be done afterwards */
2080 if (!ts->fixed_reg) {
2081 if (ts->val_type == TEMP_VAL_REG) {
2082 s->reg_to_temp[ts->reg] = NULL;
2084 ts->val_type = TEMP_VAL_REG;
2086 /* temp value is modified, so the value kept in memory is
2087 potentially not the same */
2088 ts->mem_coherent = 0;
2089 s->reg_to_temp[reg] = ts;
2096 /* emit instruction */
2097 tcg_out_op(s, opc, new_args, const_args);
2099 /* move the outputs in the correct register if needed */
2100 for(i = 0; i < nb_oargs; i++) {
2101 ts = &s->temps[args[i]];
2103 if (ts->fixed_reg && ts->reg != reg) {
2104 tcg_out_mov(s, ts->type, ts->reg, reg);
2106 if (NEED_SYNC_ARG(i)) {
2107 tcg_reg_sync(s, reg);
2109 if (IS_DEAD_ARG(i)) {
2115 #ifdef TCG_TARGET_STACK_GROWSUP
2116 #define STACK_DIR(x) (-(x))
2118 #define STACK_DIR(x) (x)
2121 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
2122 const TCGArg * const args, uint16_t dead_args,
2125 int flags, nb_regs, i;
2129 intptr_t stack_offset;
2130 size_t call_stack_size;
2131 tcg_insn_unit *func_addr;
2133 TCGRegSet allocated_regs;
2135 func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
2136 flags = args[nb_oargs + nb_iargs + 1];
2138 nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2139 if (nb_regs > nb_iargs) {
2143 /* assign stack slots first */
2144 call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
2145 call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
2146 ~(TCG_TARGET_STACK_ALIGN - 1);
2147 allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
2148 if (allocate_args) {
2149 /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
2150 preallocate call stack */
2154 stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
2155 for(i = nb_regs; i < nb_iargs; i++) {
2156 arg = args[nb_oargs + i];
2157 #ifdef TCG_TARGET_STACK_GROWSUP
2158 stack_offset -= sizeof(tcg_target_long);
2160 if (arg != TCG_CALL_DUMMY_ARG) {
2161 ts = &s->temps[arg];
2162 temp_load(s, ts, tcg_target_available_regs[ts->type],
2164 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
2166 #ifndef TCG_TARGET_STACK_GROWSUP
2167 stack_offset += sizeof(tcg_target_long);
2171 /* assign input registers */
2172 tcg_regset_set(allocated_regs, s->reserved_regs);
2173 for(i = 0; i < nb_regs; i++) {
2174 arg = args[nb_oargs + i];
2175 if (arg != TCG_CALL_DUMMY_ARG) {
2176 ts = &s->temps[arg];
2177 reg = tcg_target_call_iarg_regs[i];
2178 tcg_reg_free(s, reg);
2180 if (ts->val_type == TEMP_VAL_REG) {
2181 if (ts->reg != reg) {
2182 tcg_out_mov(s, ts->type, reg, ts->reg);
2187 tcg_regset_clear(arg_set);
2188 tcg_regset_set_reg(arg_set, reg);
2189 temp_load(s, ts, arg_set, allocated_regs);
2192 tcg_regset_set_reg(allocated_regs, reg);
2196 /* mark dead temporaries and free the associated registers */
2197 for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2198 if (IS_DEAD_ARG(i)) {
2199 temp_dead(s, &s->temps[args[i]]);
2203 /* clobber call registers */
2204 for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
2205 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
2210 /* Save globals if they might be written by the helper, sync them if
2211 they might be read. */
2212 if (flags & TCG_CALL_NO_READ_GLOBALS) {
2214 } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
2215 sync_globals(s, allocated_regs);
2217 save_globals(s, allocated_regs);
2220 tcg_out_call(s, func_addr);
2222 /* assign output registers and emit moves if needed */
2223 for(i = 0; i < nb_oargs; i++) {
2225 ts = &s->temps[arg];
2226 reg = tcg_target_call_oarg_regs[i];
2227 assert(s->reg_to_temp[reg] == NULL);
2229 if (ts->fixed_reg) {
2230 if (ts->reg != reg) {
2231 tcg_out_mov(s, ts->type, ts->reg, reg);
2234 if (ts->val_type == TEMP_VAL_REG) {
2235 s->reg_to_temp[ts->reg] = NULL;
2237 ts->val_type = TEMP_VAL_REG;
2239 ts->mem_coherent = 0;
2240 s->reg_to_temp[reg] = ts;
2241 if (NEED_SYNC_ARG(i)) {
2242 tcg_reg_sync(s, reg);
2244 if (IS_DEAD_ARG(i)) {
2251 #ifdef CONFIG_PROFILER
2253 static int64_t tcg_table_op_count[NB_OPS];
2255 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2259 for (i = 0; i < NB_OPS; i++) {
2260 cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
2261 tcg_table_op_count[i]);
2265 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
2267 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2272 int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
2274 int i, oi, oi_next, num_insns;
2276 #ifdef CONFIG_PROFILER
2280 n = s->gen_last_op_idx + 1;
2282 if (n > s->op_count_max) {
2283 s->op_count_max = n;
2288 if (n > s->temp_count_max) {
2289 s->temp_count_max = n;
2295 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
2302 #ifdef CONFIG_PROFILER
2303 s->opt_time -= profile_getclock();
2306 #ifdef USE_TCG_OPTIMIZATIONS
2310 #ifdef CONFIG_PROFILER
2311 s->opt_time += profile_getclock();
2312 s->la_time -= profile_getclock();
2315 tcg_liveness_analysis(s);
2317 #ifdef CONFIG_PROFILER
2318 s->la_time += profile_getclock();
2322 if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
2323 qemu_log("OP after optimization and liveness analysis:\n");
2329 tcg_reg_alloc_start(s);
2331 s->code_buf = gen_code_buf;
2332 s->code_ptr = gen_code_buf;
2337 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
2338 TCGOp * const op = &s->gen_op_buf[oi];
2339 TCGArg * const args = &s->gen_opparam_buf[op->args];
2340 TCGOpcode opc = op->opc;
2341 const TCGOpDef *def = &tcg_op_defs[opc];
2342 uint16_t dead_args = s->op_dead_args[oi];
2343 uint8_t sync_args = s->op_sync_args[oi];
2346 #ifdef CONFIG_PROFILER
2347 tcg_table_op_count[opc]++;
2351 case INDEX_op_mov_i32:
2352 case INDEX_op_mov_i64:
2353 tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
2355 case INDEX_op_movi_i32:
2356 case INDEX_op_movi_i64:
2357 tcg_reg_alloc_movi(s, args, dead_args, sync_args);
2359 case INDEX_op_insn_start:
2360 if (num_insns >= 0) {
2361 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2364 for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2366 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2367 a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
2371 s->gen_insn_data[num_insns][i] = a;
2374 case INDEX_op_discard:
2375 temp_dead(s, &s->temps[args[0]]);
2377 case INDEX_op_set_label:
2378 tcg_reg_alloc_bb_end(s, s->reserved_regs);
2379 tcg_out_label(s, arg_label(args[0]), s->code_ptr);
2382 tcg_reg_alloc_call(s, op->callo, op->calli, args,
2383 dead_args, sync_args);
2386 /* Sanity check that we've not introduced any unhandled opcodes. */
2387 if (def->flags & TCG_OPF_NOT_PRESENT) {
2390 /* Note: in order to speed up the code, it would be much
2391 faster to have specialized register allocator functions for
2392 some common argument patterns */
2393 tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
2399 /* Test for (pending) buffer overflow. The assumption is that any
2400 one operation beginning below the high water mark cannot overrun
2401 the buffer completely. Thus we can test for overflow after
2402 generating code without having to check during generation. */
2403 if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
2407 tcg_debug_assert(num_insns >= 0);
2408 s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
2410 /* Generate TB finalization at the end of block */
2411 if (!tcg_out_tb_finalize(s)) {
2415 /* flush instruction cache */
2416 flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
2418 return tcg_current_code_size(s);
2421 #ifdef CONFIG_PROFILER
2422 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2424 TCGContext *s = &tcg_ctx;
2425 int64_t tb_count = s->tb_count;
2426 int64_t tb_div_count = tb_count ? tb_count : 1;
2427 int64_t tot = s->interm_time + s->code_time;
2429 cpu_fprintf(f, "JIT cycles %" PRId64 " (%0.3f s at 2.4 GHz)\n",
2431 cpu_fprintf(f, "translated TBs %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
2432 tb_count, s->tb_count1 - tb_count,
2433 (double)(s->tb_count1 - s->tb_count)
2434 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
2435 cpu_fprintf(f, "avg ops/TB %0.1f max=%d\n",
2436 (double)s->op_count / tb_div_count, s->op_count_max);
2437 cpu_fprintf(f, "deleted ops/TB %0.2f\n",
2438 (double)s->del_op_count / tb_div_count);
2439 cpu_fprintf(f, "avg temps/TB %0.2f max=%d\n",
2440 (double)s->temp_count / tb_div_count, s->temp_count_max);
2441 cpu_fprintf(f, "avg host code/TB %0.1f\n",
2442 (double)s->code_out_len / tb_div_count);
2443 cpu_fprintf(f, "avg search data/TB %0.1f\n",
2444 (double)s->search_out_len / tb_div_count);
2446 cpu_fprintf(f, "cycles/op %0.1f\n",
2447 s->op_count ? (double)tot / s->op_count : 0);
2448 cpu_fprintf(f, "cycles/in byte %0.1f\n",
2449 s->code_in_len ? (double)tot / s->code_in_len : 0);
2450 cpu_fprintf(f, "cycles/out byte %0.1f\n",
2451 s->code_out_len ? (double)tot / s->code_out_len : 0);
2452 cpu_fprintf(f, "cycles/search byte %0.1f\n",
2453 s->search_out_len ? (double)tot / s->search_out_len : 0);
2457 cpu_fprintf(f, " gen_interm time %0.1f%%\n",
2458 (double)s->interm_time / tot * 100.0);
2459 cpu_fprintf(f, " gen_code time %0.1f%%\n",
2460 (double)s->code_time / tot * 100.0);
2461 cpu_fprintf(f, "optim./code time %0.1f%%\n",
2462 (double)s->opt_time / (s->code_time ? s->code_time : 1)
2464 cpu_fprintf(f, "liveness/code time %0.1f%%\n",
2465 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
2466 cpu_fprintf(f, "cpu_restore count %" PRId64 "\n",
2468 cpu_fprintf(f, " avg cycles %0.1f\n",
2469 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
2472 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
2474 cpu_fprintf(f, "[TCG profiler not compiled]\n");
2478 #ifdef ELF_HOST_MACHINE
2479 /* In order to use this feature, the backend needs to do three things:
2481 (1) Define ELF_HOST_MACHINE to indicate both what value to
2482 put into the ELF image and to indicate support for the feature.
2484 (2) Define tcg_register_jit. This should create a buffer containing
2485 the contents of a .debug_frame section that describes the post-
2486 prologue unwind info for the tcg machine.
2488 (3) Call tcg_register_jit_int, with the constructed .debug_frame.
2491 /* Begin GDB interface. THE FOLLOWING MUST MATCH GDB DOCS. */
2498 struct jit_code_entry {
2499 struct jit_code_entry *next_entry;
2500 struct jit_code_entry *prev_entry;
2501 const void *symfile_addr;
2502 uint64_t symfile_size;
2505 struct jit_descriptor {
2507 uint32_t action_flag;
2508 struct jit_code_entry *relevant_entry;
2509 struct jit_code_entry *first_entry;
2512 void __jit_debug_register_code(void) __attribute__((noinline));
2513 void __jit_debug_register_code(void)
2518 /* Must statically initialize the version, because GDB may check
2519 the version before we can set it. */
2520 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
2522 /* End GDB interface. */
2524 static int find_string(const char *strtab, const char *str)
2526 const char *p = strtab + 1;
2529 if (strcmp(p, str) == 0) {
2536 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
2537 const void *debug_frame,
2538 size_t debug_frame_size)
2540 struct __attribute__((packed)) DebugInfo {
2547 uintptr_t cu_low_pc;
2548 uintptr_t cu_high_pc;
2551 uintptr_t fn_low_pc;
2552 uintptr_t fn_high_pc;
2561 struct DebugInfo di;
2566 struct ElfImage *img;
2568 static const struct ElfImage img_template = {
2570 .e_ident[EI_MAG0] = ELFMAG0,
2571 .e_ident[EI_MAG1] = ELFMAG1,
2572 .e_ident[EI_MAG2] = ELFMAG2,
2573 .e_ident[EI_MAG3] = ELFMAG3,
2574 .e_ident[EI_CLASS] = ELF_CLASS,
2575 .e_ident[EI_DATA] = ELF_DATA,
2576 .e_ident[EI_VERSION] = EV_CURRENT,
2578 .e_machine = ELF_HOST_MACHINE,
2579 .e_version = EV_CURRENT,
2580 .e_phoff = offsetof(struct ElfImage, phdr),
2581 .e_shoff = offsetof(struct ElfImage, shdr),
2582 .e_ehsize = sizeof(ElfW(Shdr)),
2583 .e_phentsize = sizeof(ElfW(Phdr)),
2585 .e_shentsize = sizeof(ElfW(Shdr)),
2586 .e_shnum = ARRAY_SIZE(img->shdr),
2587 .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
2588 #ifdef ELF_HOST_FLAGS
2589 .e_flags = ELF_HOST_FLAGS,
2592 .e_ident[EI_OSABI] = ELF_OSABI,
2600 [0] = { .sh_type = SHT_NULL },
2601 /* Trick: The contents of code_gen_buffer are not present in
2602 this fake ELF file; that got allocated elsewhere. Therefore
2603 we mark .text as SHT_NOBITS (similar to .bss) so that readers
2604 will not look for contents. We can record any address. */
2606 .sh_type = SHT_NOBITS,
2607 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
2609 [2] = { /* .debug_info */
2610 .sh_type = SHT_PROGBITS,
2611 .sh_offset = offsetof(struct ElfImage, di),
2612 .sh_size = sizeof(struct DebugInfo),
2614 [3] = { /* .debug_abbrev */
2615 .sh_type = SHT_PROGBITS,
2616 .sh_offset = offsetof(struct ElfImage, da),
2617 .sh_size = sizeof(img->da),
2619 [4] = { /* .debug_frame */
2620 .sh_type = SHT_PROGBITS,
2621 .sh_offset = sizeof(struct ElfImage),
2623 [5] = { /* .symtab */
2624 .sh_type = SHT_SYMTAB,
2625 .sh_offset = offsetof(struct ElfImage, sym),
2626 .sh_size = sizeof(img->sym),
2628 .sh_link = ARRAY_SIZE(img->shdr) - 1,
2629 .sh_entsize = sizeof(ElfW(Sym)),
2631 [6] = { /* .strtab */
2632 .sh_type = SHT_STRTAB,
2633 .sh_offset = offsetof(struct ElfImage, str),
2634 .sh_size = sizeof(img->str),
2638 [1] = { /* code_gen_buffer */
2639 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
2644 .len = sizeof(struct DebugInfo) - 4,
2646 .ptr_size = sizeof(void *),
2648 .cu_lang = 0x8001, /* DW_LANG_Mips_Assembler */
2650 .fn_name = "code_gen_buffer"
2653 1, /* abbrev number (the cu) */
2654 0x11, 1, /* DW_TAG_compile_unit, has children */
2655 0x13, 0x5, /* DW_AT_language, DW_FORM_data2 */
2656 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2657 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2658 0, 0, /* end of abbrev */
2659 2, /* abbrev number (the fn) */
2660 0x2e, 0, /* DW_TAG_subprogram, no children */
2661 0x3, 0x8, /* DW_AT_name, DW_FORM_string */
2662 0x11, 0x1, /* DW_AT_low_pc, DW_FORM_addr */
2663 0x12, 0x1, /* DW_AT_high_pc, DW_FORM_addr */
2664 0, 0, /* end of abbrev */
2665 0 /* no more abbrev */
2667 .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
2668 ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
2671 /* We only need a single jit entry; statically allocate it. */
2672 static struct jit_code_entry one_entry;
2674 uintptr_t buf = (uintptr_t)buf_ptr;
2675 size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
2676 DebugFrameHeader *dfh;
2678 img = g_malloc(img_size);
2679 *img = img_template;
2681 img->phdr.p_vaddr = buf;
2682 img->phdr.p_paddr = buf;
2683 img->phdr.p_memsz = buf_size;
2685 img->shdr[1].sh_name = find_string(img->str, ".text");
2686 img->shdr[1].sh_addr = buf;
2687 img->shdr[1].sh_size = buf_size;
2689 img->shdr[2].sh_name = find_string(img->str, ".debug_info");
2690 img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
2692 img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
2693 img->shdr[4].sh_size = debug_frame_size;
2695 img->shdr[5].sh_name = find_string(img->str, ".symtab");
2696 img->shdr[6].sh_name = find_string(img->str, ".strtab");
2698 img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
2699 img->sym[1].st_value = buf;
2700 img->sym[1].st_size = buf_size;
2702 img->di.cu_low_pc = buf;
2703 img->di.cu_high_pc = buf + buf_size;
2704 img->di.fn_low_pc = buf;
2705 img->di.fn_high_pc = buf + buf_size;
2707 dfh = (DebugFrameHeader *)(img + 1);
2708 memcpy(dfh, debug_frame, debug_frame_size);
2709 dfh->fde.func_start = buf;
2710 dfh->fde.func_len = buf_size;
2713 /* Enable this block to be able to debug the ELF image file creation.
2714 One can use readelf, objdump, or other inspection utilities. */
2716 FILE *f = fopen("/tmp/qemu.jit", "w+b");
2718 if (fwrite(img, img_size, 1, f) != img_size) {
2719 /* Avoid stupid unused return value warning for fwrite. */
2726 one_entry.symfile_addr = img;
2727 one_entry.symfile_size = img_size;
2729 __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
2730 __jit_debug_descriptor.relevant_entry = &one_entry;
2731 __jit_debug_descriptor.first_entry = &one_entry;
2732 __jit_debug_register_code();
2735 /* No support for the feature. Provide the entry point expected by exec.c,
2736 and implement the internal function we declared earlier. */
2738 static void tcg_register_jit_int(void *buf, size_t size,
2739 const void *debug_frame,
2740 size_t debug_frame_size)
2744 void tcg_register_jit(void *buf, size_t buf_size)
2747 #endif /* ELF_HOST_MACHINE */