#endif
#include "qemu-common.h"
-#include "cache-utils.h"
-#include "host-utils.h"
-#include "qemu-timer.h"
+#include "qemu/cache-utils.h"
+#include "qemu/host-utils.h"
+#include "qemu/timer.h"
/* Note: the long term plan is to reduce the dependancies on the QEMU
CPU definitions. Currently they are used for qemu_ld/st
static TCGRegSet tcg_target_available_regs[2];
static TCGRegSet tcg_target_call_clobber_regs;
-/* XXX: move that inside the context */
-uint16_t *gen_opc_ptr;
-TCGArg *gen_opparam_ptr;
-
static inline void tcg_out8(TCGContext *s, uint8_t v)
{
*s->code_ptr++ = v;
s->goto_tb_issue_mask = 0;
#endif
- gen_opc_ptr = gen_opc_buf;
- gen_opparam_ptr = gen_opparam_buf;
+ s->gen_opc_ptr = s->gen_opc_buf;
+ s->gen_opparam_ptr = s->gen_opparam_buf;
+
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* Initialize qemu_ld/st labels to assist code generation at the end of TB
+ for TLB miss cases at the end of TB */
+ s->qemu_ldst_labels = tcg_malloc(sizeof(TCGLabelQemuLdst) *
+ TCG_MAX_QEMU_LDST);
+ s->nb_qemu_ldst_labels = 0;
+#endif
}
static inline void tcg_temp_alloc(TCGContext *s, int n)
}
#endif /* TCG_TARGET_EXTEND_ARGS */
- *gen_opc_ptr++ = INDEX_op_call;
- nparam = gen_opparam_ptr++;
+ *s->gen_opc_ptr++ = INDEX_op_call;
+ nparam = s->gen_opparam_ptr++;
if (ret != TCG_CALL_DUMMY_ARG) {
#if TCG_TARGET_REG_BITS < 64
if (sizemask & 1) {
#ifdef TCG_TARGET_WORDS_BIGENDIAN
- *gen_opparam_ptr++ = ret + 1;
- *gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret + 1;
+ *s->gen_opparam_ptr++ = ret;
#else
- *gen_opparam_ptr++ = ret;
- *gen_opparam_ptr++ = ret + 1;
+ *s->gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret + 1;
#endif
nb_rets = 2;
} else
#endif
{
- *gen_opparam_ptr++ = ret;
+ *s->gen_opparam_ptr++ = ret;
nb_rets = 1;
}
} else {
#ifdef TCG_TARGET_CALL_ALIGN_ARGS
/* some targets want aligned 64 bit args */
if (real_args & 1) {
- *gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
+ *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
real_args++;
}
#endif
have to get more complicated to differentiate between
stack arguments and register arguments. */
#if defined(TCG_TARGET_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
- *gen_opparam_ptr++ = args[i] + 1;
- *gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i] + 1;
+ *s->gen_opparam_ptr++ = args[i];
#else
- *gen_opparam_ptr++ = args[i];
- *gen_opparam_ptr++ = args[i] + 1;
+ *s->gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i] + 1;
#endif
real_args += 2;
continue;
}
#endif /* TCG_TARGET_REG_BITS < 64 */
- *gen_opparam_ptr++ = args[i];
+ *s->gen_opparam_ptr++ = args[i];
real_args++;
}
- *gen_opparam_ptr++ = GET_TCGV_PTR(func);
+ *s->gen_opparam_ptr++ = GET_TCGV_PTR(func);
- *gen_opparam_ptr++ = flags;
+ *s->gen_opparam_ptr++ = flags;
*nparam = (nb_rets << 16) | (real_args + 1);
/* total parameters, needed to go backward in the instruction stream */
- *gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
+ *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
for (i = 0; i < nargs; ++i) {
char buf[128];
first_insn = 1;
- opc_ptr = gen_opc_buf;
- args = gen_opparam_buf;
- while (opc_ptr < gen_opc_ptr) {
+ opc_ptr = s->gen_opc_buf;
+ args = s->gen_opparam_buf;
+ while (opc_ptr < s->gen_opc_ptr) {
c = *opc_ptr++;
def = &tcg_op_defs[c];
if (c == INDEX_op_debug_insn_start) {
uint16_t dead_args;
uint8_t sync_args;
- gen_opc_ptr++; /* skip end */
+ s->gen_opc_ptr++; /* skip end */
- nb_ops = gen_opc_ptr - gen_opc_buf;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
mem_temps = tcg_malloc(s->nb_temps);
tcg_la_func_end(s, dead_temps, mem_temps);
- args = gen_opparam_ptr;
+ args = s->gen_opparam_ptr;
op_index = nb_ops - 1;
while (op_index >= 0) {
- op = gen_opc_buf[op_index];
+ op = s->gen_opc_buf[op_index];
def = &tcg_op_defs[op];
switch(op) {
case INDEX_op_call:
/* pure functions can be removed if their result is not
used */
- if (call_flags & TCG_CALL_PURE) {
+ if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for(i = 0; i < nb_oargs; i++) {
arg = args[i];
if (!dead_temps[arg] || mem_temps[arg]) {
goto do_not_remove_call;
}
}
- tcg_set_nop(s, gen_opc_buf + op_index,
+ tcg_set_nop(s, s->gen_opc_buf + op_index,
args - 1, nb_args);
} else {
do_not_remove_call:
dead_temps[arg] = 1;
mem_temps[arg] = 0;
}
-
- if (!(call_flags & TCG_CALL_CONST)) {
+
+ if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
+ /* globals should be synced to memory */
+ memset(mem_temps, 1, s->nb_globals);
+ }
+ if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
+ TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */
memset(dead_temps, 1, s->nb_globals);
- memset(mem_temps, 1, s->nb_globals);
}
/* input args are live */
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
- if (dead_temps[args[1]]) {
- if (dead_temps[args[0]]) {
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
/* Create the single operation plus nop. */
} else {
op = INDEX_op_sub_i32;
}
- gen_opc_buf[op_index] = op;
+ s->gen_opc_buf[op_index] = op;
args[1] = args[2];
args[2] = args[4];
- assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
/* Fall through and mark the single-word operation live. */
nb_iargs = 2;
nb_oargs = 1;
nb_iargs = 2;
nb_oargs = 2;
/* Likewise, test for the high part of the operation dead. */
- if (dead_temps[args[1]]) {
- if (dead_temps[args[0]]) {
+ if (dead_temps[args[1]] && !mem_temps[args[1]]) {
+ if (dead_temps[args[0]] && !mem_temps[args[0]]) {
goto do_remove;
}
- gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+ s->gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
args[1] = args[2];
args[2] = args[3];
- assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
- tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+ assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
+ tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
/* Fall through and mark the single-word operation live. */
nb_oargs = 1;
}
}
}
do_remove:
- tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
+ tcg_set_nop(s, s->gen_opc_buf + op_index, args, def->nb_args);
#ifdef CONFIG_PROFILER
s->del_op_count++;
#endif
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
tcg_la_bb_end(s, dead_temps, mem_temps);
- } else if (def->flags & TCG_OPF_CALL_CLOBBER) {
- /* globals should go back to memory */
- memset(dead_temps, 1, s->nb_globals);
+ } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* globals should be synced to memory */
memset(mem_temps, 1, s->nb_globals);
}
op_index--;
}
- if (args != gen_opparam_buf)
+ if (args != s->gen_opparam_buf) {
tcg_abort();
+ }
}
#else
/* dummy liveness analysis */
static void tcg_liveness_analysis(TCGContext *s)
{
int nb_ops;
- nb_ops = gen_opc_ptr - gen_opc_buf;
+ nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
if (ts->val_type == TEMP_VAL_REG) {
s->reg_to_temp[ts->reg] = -1;
}
- if (temp < s->nb_globals || (ts->temp_local && ts->mem_allocated)) {
+ if (temp < s->nb_globals || ts->temp_local) {
ts->val_type = TEMP_VAL_MEM;
} else {
ts->val_type = TEMP_VAL_DEAD;
temporary registers needs to be allocated to store a constant. */
static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs)
{
+#ifdef USE_LIVENESS_ANALYSIS
+ /* The liveness analysis already ensures that globals are back
+ in memory. Keep an assert for safety. */
+ assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg);
+#else
temp_sync(s, temp, allocated_regs);
temp_dead(s, temp);
+#endif
}
/* save globals to their canonical location and assume they can be
}
}
+/* sync globals to their canonical location and assume they can be
+ read by the following code. 'allocated_regs' is used in case a
+ temporary registers needs to be allocated to store a constant. */
+static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
+{
+ int i;
+
+ for (i = 0; i < s->nb_globals; i++) {
+#ifdef USE_LIVENESS_ANALYSIS
+ assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg ||
+ s->temps[i].mem_coherent);
+#else
+ temp_sync(s, i, allocated_regs);
+#endif
+ }
+}
+
/* at the end of a basic block, we assume all temporaries are dead and
all globals are stored at their canonical location. */
static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
if (ts->temp_local) {
temp_save(s, i, allocated_regs);
} else {
+#ifdef USE_LIVENESS_ANALYSIS
+ /* The liveness analysis already ensures that temps are dead.
+ Keep an assert for safety. */
+ assert(ts->val_type == TEMP_VAL_DEAD);
+#else
temp_dead(s, i);
+#endif
}
}
tcg_reg_free(s, reg);
}
}
- /* XXX: for load/store we could do that only for the slow path
- (i.e. when a memory callback is called) */
-
- /* store globals and free associated registers (we assume the insn
- can modify any global. */
- save_globals(s, allocated_regs);
+ }
+ if (def->flags & TCG_OPF_SIDE_EFFECTS) {
+ /* sync globals if the op has side effects and might trigger
+ an exception. */
+ sync_globals(s, allocated_regs);
}
/* satisfy the output constraints */
tcg_reg_free(s, reg);
}
}
-
- /* store globals and free associated registers (we assume the call
- can modify any global. */
- if (!(flags & TCG_CALL_CONST)) {
+
+ /* Save globals if they might be written by the helper, sync them if
+ they might be read. */
+ if (flags & TCG_CALL_NO_READ_GLOBALS) {
+ /* Nothing to do */
+ } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
+ sync_globals(s, allocated_regs);
+ } else {
save_globals(s, allocated_regs);
}
#endif
#ifdef USE_TCG_OPTIMIZATIONS
- gen_opparam_ptr =
- tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
+ s->gen_opparam_ptr =
+ tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
#endif
#ifdef CONFIG_PROFILER
s->code_buf = gen_code_buf;
s->code_ptr = gen_code_buf;
- args = gen_opparam_buf;
+ args = s->gen_opparam_buf;
op_index = 0;
for(;;) {
- opc = gen_opc_buf[op_index];
+ opc = s->gen_opc_buf[op_index];
#ifdef CONFIG_PROFILER
tcg_table_op_count[opc]++;
#endif
#endif
}
the_end:
+#if defined(CONFIG_QEMU_LDST_OPTIMIZATION) && defined(CONFIG_SOFTMMU)
+ /* Generate TB finalization at the end of block */
+ tcg_out_tb_finalize(s);
+#endif
return -1;
}
#ifdef CONFIG_PROFILER
{
int n;
- n = (gen_opc_ptr - gen_opc_buf);
+ n = (s->gen_opc_ptr - s->gen_opc_buf);
s->op_count += n;
if (n > s->op_count_max)
s->op_count_max = n;