#endif
#include "qemu-common.h"
-#include "cache-utils.h"
-#include "host-utils.h"
-#include "qemu-timer.h"
+#include "qemu/cache-utils.h"
+#include "qemu/host-utils.h"
+#include "qemu/timer.h"
/* Note: the long term plan is to reduce the dependancies on the QEMU
CPU definitions. Currently they are used for qemu_ld/st
static TCGRegSet tcg_target_available_regs[2];
static TCGRegSet tcg_target_call_clobber_regs;
-/* XXX: move that inside the context */
-uint16_t *gen_opc_ptr;
-TCGArg *gen_opparam_ptr;
-
static inline void tcg_out8(TCGContext *s, uint8_t v)
{
*s->code_ptr++ = v;
s->goto_tb_issue_mask = 0;
#endif
- gen_opc_ptr = gen_opc_buf;
- gen_opparam_ptr = gen_opparam_buf;
+ s->gen_opc_ptr = s->gen_opc_buf;
+ s->gen_opparam_ptr = s->gen_opparam_buf;
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* Initialize qemu_ld/st labels to assist code generation at the end of TB
+ for TLB miss cases at the end of TB */
+ s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) *
+ TCG_MAX_QEMU_LDST);
+ s->nb_qemu_ldst_labels = 0;
+#endif
}
static inline void tcg_temp_alloc(TCGContext *s, int n)
}
#endif /* TCG_TARGET_EXTEND_ARGS */
- *gen_opc_ptr++ = INDEX_op_call;
- nparam = gen_opparam_ptr++;
+ *s->gen_opc_ptr++ = INDEX_op_call;
+ nparam = s->gen_opparam_ptr++;
if (ret != TCG_CALL_DUMMY_ARG) {
#if TCG_TARGET_REG_BITS < 64
if (sizemask & 1) {
#ifdef TCG_TARGET_WORDS_BIGENDIAN
- *gen_opparam_ptr++ = ret + 1;
- *gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret + 1;
+ *s->gen_opparam_ptr++ = ret;
#else
- *gen_opparam_ptr++ = ret;
- *gen_opparam_ptr++ = ret + 1;
+ *s->gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret + 1;
#endif
nb_rets = 2;
} else
#endif
{
- *gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret;
nb_rets = 1;
}
} else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
/* some targets want aligned 64 bit args */
if (real_args & 1) {
- *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+ *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
real_args++;
}
#endif
have to get more complicated to differentiate between
stack arguments and register arguments. */
#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
- *gen_opparam_ptr++ = args[i] + 1;
- *gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i] + 1;
+ *s->gen_opparam_ptr++ = args[i];
#else
- *gen_opparam_ptr++ = args[i];
- *gen_opparam_ptr++ = args[i] + 1;
+ *s->gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i] + 1;
#endif
real_args += 2;
continue;
}
#endif /* TCG_TARGET_REG_BITS < 64 */
- *gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i];
real_args++;
}
- *gen_opparam_ptr++ = GET_TCGV_PTR(func);
+ *s->gen_opparam_ptr++ = GET_TCGV_PTR(func);
- *gen_opparam_ptr++ = flags;
+ *s->gen_opparam_ptr++ = flags;
*nparam = (nb_rets << 16) | (real_args + 1);
/* total parameters, needed to go backward in the instruction stream */
- *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+ *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
for (i = 0; i < nargs; ++i) {
char buf[128];
first_insn = 1;
- opc_ptr = gen_opc_buf;
- args = gen_opparam_buf;
- while (opc_ptr < gen_opc_ptr) {
+ opc_ptr = s->gen_opc_buf;
+ args = s->gen_opparam_buf;
+ while (opc_ptr < s->gen_opc_ptr) {
c = *opc_ptr++;
def = &tcg_op_defs[c];
if (c == INDEX_op_debug_insn_start) {
uint16_t dead_args;
uint8_t sync_args;
- gen_opc_ptr++; /* skip end */
+ s->gen_opc_ptr++; /* skip end */
- nb_ops = gen_opc_ptr - gen_opc_buf;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
- args = gen_opparam_ptr;
+ args = s->gen_opparam_ptr;
op_index = nb_ops - 1;
while (op_index >= 0) {
- op = gen_opc_buf[op_index];
+ op = s->gen_opc_buf[op_index];
def = &tcg_op_defs[op];
switch(op) {
case INDEX_op_call:
goto do_not_remove_call;
}
}
- tcg_set_nop(s, gen_opc_buf + op_index,
+ tcg_set_nop(s, s->gen_opc_buf + op_index,
args - 1, nb_args);
} else {
do_not_remove_call:
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
- if (dead_temps[args[1]]) {
- if (dead_temps[args[0]]) {
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
/* Create the single operation plus nop. */
} else {
op = INDEX_op_sub_i32;
}
- gen_opc_buf[op_index] = op;
+ s->gen_opc_buf[op_index] = op;
args[1] = args[2];
args[2] = args[4];
- assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
/* Fall through and mark the single-word operation live. */
nb_iargs = 2;
nb_oargs = 1;
nb_iargs = 2;
nb_oargs = 2;
/* Likewise, test for the high part of the operation dead. */
- if (dead_temps[args[1]]) {
- if (dead_temps[args[0]]) {
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
- gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+ s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
args[1] = args[2];
args[2] = args[3];
- assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
/* Fall through and mark the single-word operation live. */
nb_oargs = 1;
}
}
}
do_remove:
- tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
+ tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
op_index--;
}
- if (args != gen_opparam_buf)
+ if (args != s->gen_opparam_buf) {
tcg_abort();
+ }
}
#else
/* dummy liveness analysis */
static void tcg_liveness_analysis(TCGContext *s)
{
int nb_ops;
- nb_ops = gen_opc_ptr - gen_opc_buf;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = -1;
}
- if (temp < s->nb_globals || (ts->temp_local && ts->mem_allocated)) {
+ if (temp < s->nb_globals || ts->temp_local) {
ts->val_type = TEMP_VAL_MEM;
} else {
ts->val_type = TEMP_VAL_DEAD;
#endif
#ifdef USE_TCG_OPTIMIZATIONS
- gen_opparam_ptr =
- tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
+ s->gen_opparam_ptr =
+ tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
#endif
#ifdef CONFIG_PROFILER
s->code_buf = gen_code_buf;
s->code_ptr = gen_code_buf;
- args = gen_opparam_buf;
+ args = s->gen_opparam_buf;
op_index = 0;
for(;;) {
- opc = gen_opc_buf[op_index];
+ opc = s->gen_opc_buf[op_index];
#ifdef CONFIG_PROFILER
tcg_table_op_count[opc]++;
#endif
#endif
}
the_end:
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* Generate TB finalization at the end of block */
+ tcg_out_tb_finalize(s);
+#endif
return -1;
}
#ifdef CONFIG_PROFILER
{
int n;
- n = (gen_opc_ptr - gen_opc_buf);
+ n = (s->gen_opc_ptr - s->gen_opc_buf);
s->op_count += n;
if (n > s->op_count_max)
s->op_count_max = n;