#define TCG_H
#include "qemu-common.h"
+#include "cpu.h"
+#include "exec/tb-context.h"
#include "qemu/bitops.h"
#include "tcg-target.h"
+/* XXX: make safe guess about sizes */
+#define MAX_OP_PER_INSTR 266
+
+#if HOST_LONG_BITS == 32
+#define MAX_OPC_PARAM_PER_ARG 2
+#else
+#define MAX_OPC_PARAM_PER_ARG 1
+#endif
+#define MAX_OPC_PARAM_IARGS 5
+#define MAX_OPC_PARAM_OARGS 1
+#define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
+
+/* A Call op needs up to 4 + 2N parameters on 32-bit archs,
+ * and up to 4 + N parameters on 64-bit archs
+ * (N = number of input arguments + output arguments). */
+#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
+#define OPC_BUF_SIZE 640
+#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
+
+#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
+
#define CPU_TEMP_BUF_NLONGS 128
/* Default target word size to pointer size. */
#endif
+#if defined CONFIG_DEBUG_TCG || defined QEMU_STATIC_ANALYSIS
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#elif QEMU_GNUC_PREREQ(4, 5)
+# define tcg_debug_assert(X) \
+ do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#else
+# define tcg_debug_assert(X) do { (void)(X); } while (0)
+#endif
+
typedef struct TCGRelocation {
struct TCGRelocation *next;
int type;
#endif
/* MO_UNALN accesses are never checked for alignment.
- MO_ALIGN accesses will result in a call to the CPU's
- do_unaligned_access hook if the guest address is not aligned.
- The default depends on whether the target CPU defines ALIGNED_ONLY. */
- MO_AMASK = 16,
+ * MO_ALIGN accesses will result in a call to the CPU's
+ * do_unaligned_access hook if the guest address is not aligned.
+ * The default depends on whether the target CPU defines ALIGNED_ONLY.
+ *
+ * Some architectures (e.g. ARMv8) need the address which is aligned
+ * to a size more than the size of the memory access.
+ * Some architectures (e.g. SPARCv9) need an address which is aligned,
+ * but less strictly than the natural alignment.
+ *
+ * MO_ALIGN supposes the alignment size is the size of a memory access.
+ *
+ * There are three options:
+ * - unaligned access permitted (MO_UNALN).
+ * - an alignment to the size of an access (MO_ALIGN);
+ * - an alignment to a specified size, which may be more or less than
+ * the access size (MO_ALIGN_x where 'x' is a size in bytes);
+ */
+ MO_ASHIFT = 4,
+ MO_AMASK = 7 << MO_ASHIFT,
#ifdef ALIGNED_ONLY
MO_ALIGN = 0,
MO_UNALN = MO_AMASK,
MO_ALIGN = MO_AMASK,
MO_UNALN = 0,
#endif
+ MO_ALIGN_2 = 1 << MO_ASHIFT,
+ MO_ALIGN_4 = 2 << MO_ASHIFT,
+ MO_ALIGN_8 = 3 << MO_ASHIFT,
+ MO_ALIGN_16 = 4 << MO_ASHIFT,
+ MO_ALIGN_32 = 5 << MO_ASHIFT,
+ MO_ALIGN_64 = 6 << MO_ASHIFT,
/* Combinations of the above, for ease of use. */
MO_UB = MO_8,
MO_SSIZE = MO_SIZE | MO_SIGN,
} TCGMemOp;
+/**
+ * get_alignment_bits
+ * @memop: TCGMemOp value
+ *
+ * Extract the alignment size from the memop.
+ */
+static inline unsigned get_alignment_bits(TCGMemOp memop)
+{
+ unsigned a = memop & MO_AMASK;
+
+ if (a == MO_UNALN) {
+ /* No alignment required. */
+ a = 0;
+ } else if (a == MO_ALIGN) {
+ /* A natural alignment requirement. */
+ a = memop & MO_SIZE;
+ } else {
+ /* A specific alignment requirement. */
+ a = a >> MO_ASHIFT;
+ }
+#if defined(CONFIG_SOFTMMU)
+ /* The requested alignment cannot overlap the TLB flags. */
+ tcg_debug_assert((TLB_FLAGS_MASK & ((1 << a) - 1)) == 0);
+#endif
+ return a;
+}
+
typedef tcg_target_ulong TCGArg;
/* Define a type and accessor macros for variables. Using pointer types
typedef struct TCGv_i32_d *TCGv_i32;
typedef struct TCGv_i64_d *TCGv_i64;
typedef struct TCGv_ptr_d *TCGv_ptr;
+typedef TCGv_ptr TCGv_env;
+#if TARGET_LONG_BITS == 32
+#define TCGv TCGv_i32
+#elif TARGET_LONG_BITS == 64
+#define TCGv TCGv_i64
+#else
+#error Unhandled TARGET_LONG_BITS value
+#endif
static inline TCGv_i32 QEMU_ARTIFICIAL MAKE_TCGV_I32(intptr_t i)
{
#define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1)
#define TCG_CALL_DUMMY_ARG ((TCGArg)(-1))
+typedef enum {
+ /* Used to indicate the type of accesses on which ordering
+ is to be ensured. Modeled after SPARC barriers. */
+ TCG_MO_LD_LD = 0x01,
+ TCG_MO_ST_LD = 0x02,
+ TCG_MO_LD_ST = 0x04,
+ TCG_MO_ST_ST = 0x08,
+ TCG_MO_ALL = 0x0F, /* OR of the above */
+
+ /* Used to indicate the kind of ordering which is to be ensured by the
+ instruction. These types are derived from x86/aarch64 instructions.
+ It should be noted that these are different from C11 semantics. */
+ TCG_BAR_LDAQ = 0x10, /* Following ops will not come forward */
+ TCG_BAR_STRL = 0x20, /* Previous ops will not be delayed */
+ TCG_BAR_SC = 0x30, /* No ops cross barrier; OR of the above */
+} TCGBar;
+
/* Conditions. Note that these are laid out for easy manipulation by
the functions below:
bit 0 is used for inverting;
} TCGTempVal;
typedef struct TCGTemp {
- unsigned int reg:8;
- unsigned int mem_reg:8;
+ TCGReg reg:8;
TCGTempVal val_type:8;
TCGType base_type:8;
TCGType type:8;
unsigned int fixed_reg:1;
+ unsigned int indirect_reg:1;
+ unsigned int indirect_base:1;
unsigned int mem_coherent:1;
unsigned int mem_allocated:1;
unsigned int temp_local:1; /* If true, the temp is saved across
unsigned int temp_allocated:1; /* never used for code gen */
tcg_target_long val;
+ struct TCGTemp *mem_base;
intptr_t mem_offset;
const char *name;
} TCGTemp;
unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
} TCGTempSet;
+/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
+ this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
+ There are never more than 2 outputs, which means that we can store all
+ dead + sync data within 16 bits. */
+#define DEAD_ARG 4
+#define SYNC_ARG 1
+typedef uint16_t TCGLifeData;
+
+/* The layout here is designed to avoid crossing of a 32-bit boundary.
+ If we do so, gcc adds padding, expanding the size to 12. */
typedef struct TCGOp {
- TCGOpcode opc : 8;
+ TCGOpcode opc : 8; /* 8 */
+
+ /* Index of the prev/next op, or 0 for the end of the list. */
+ unsigned prev : 10; /* 18 */
+ unsigned next : 10; /* 28 */
/* The number of out and in parameter for a call. */
- unsigned callo : 2;
- unsigned calli : 6;
+ unsigned calli : 4; /* 32 */
+ unsigned callo : 2; /* 34 */
- /* Index of the arguments for this op, or -1 for zero-operand ops. */
- signed args : 16;
+ /* Index of the arguments for this op, or 0 for zero-operand ops. */
+ unsigned args : 14; /* 48 */
- /* Index of the prex/next op, or -1 for the end of the list. */
- signed prev : 16;
- signed next : 16;
+ /* Lifetime data of the operands. */
+ unsigned life : 16; /* 64 */
} TCGOp;
-QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
-QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
-QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
+/* Make sure operands fit in the bitfields above. */
+QEMU_BUILD_BUG_ON(NB_OPS > (1 << 8));
+QEMU_BUILD_BUG_ON(OPC_BUF_SIZE > (1 << 10));
+QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE > (1 << 14));
+
+/* Make sure that we don't overflow 64 bits without noticing. */
+QEMU_BUILD_BUG_ON(sizeof(TCGOp) > 8);
struct TCGContext {
uint8_t *pool_cur, *pool_end;
int nb_labels;
int nb_globals;
int nb_temps;
+ int nb_indirects;
/* goto_tb support */
tcg_insn_unit *code_buf;
- uintptr_t *tb_next;
- uint16_t *tb_next_offset;
- uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
-
- /* liveness analysis */
- uint16_t *op_dead_args; /* for each operation, each bit tells if the
- corresponding argument is dead */
- uint8_t *op_sync_args; /* for each operation, each bit tells if the
- corresponding output argument needs to be
- sync to memory. */
-
+ uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */
+ uint16_t *tb_jmp_insn_offset; /* tb->jmp_insn_offset if USE_DIRECT_JUMP */
+ uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP */
+
TCGRegSet reserved_regs;
intptr_t current_frame_offset;
intptr_t frame_start;
intptr_t frame_end;
- int frame_reg;
+ TCGTemp *frame_temp;
tcg_insn_unit *code_ptr;
int64_t del_op_count;
int64_t code_in_len;
int64_t code_out_len;
+ int64_t search_out_len;
int64_t interm_time;
int64_t code_time;
int64_t la_time;
int goto_tb_issue_mask;
#endif
- int gen_first_op_idx;
- int gen_last_op_idx;
int gen_next_op_idx;
int gen_next_parm_idx;
void *code_gen_prologue;
void *code_gen_buffer;
size_t code_gen_buffer_size;
- /* threshold to flush the translated code buffer */
- size_t code_gen_buffer_max_size;
void *code_gen_ptr;
+ /* Threshold to flush the translated code buffer. */
+ void *code_gen_highwater;
+
TBContext tb_ctx;
- /* The TCGBackendData structure is private to tcg-target.c. */
+ /* Track which vCPU triggers events */
+ CPUState *cpu; /* *_trans */
+ TCGv_env tcg_env; /* *_exec */
+
+ /* The TCGBackendData structure is private to tcg-target.inc.c. */
struct TCGBackendData *be;
TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
- /* tells in which temporary a given register is. It does not take
- into account fixed registers */
- int reg_to_temp[TCG_TARGET_NB_REGS];
+ /* Tells which temporary holds a given register.
+ It does not take into account fixed registers */
+ TCGTemp *reg_to_temp[TCG_TARGET_NB_REGS];
TCGOp gen_op_buf[OPC_BUF_SIZE];
TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
- target_ulong gen_opc_pc[OPC_BUF_SIZE];
- uint16_t gen_opc_icount[OPC_BUF_SIZE];
- uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
-
- target_ulong gen_opc_data[TARGET_INSN_START_WORDS];
+ uint16_t gen_insn_end_off[TCG_MAX_INSNS];
+ target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
};
extern TCGContext tcg_ctx;
+extern bool parallel_cpus;
+
+static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
+{
+ int op_argi = tcg_ctx.gen_op_buf[op_idx].args;
+ tcg_ctx.gen_opparam_buf[op_argi + arg] = v;
+}
/* The number of opcodes emitted so far. */
static inline int tcg_op_buf_count(void)
void *tcg_malloc_internal(TCGContext *s, int size);
void tcg_pool_reset(TCGContext *s);
-void tcg_pool_delete(TCGContext *s);
void tb_lock(void);
void tb_unlock(void);
void tcg_prologue_init(TCGContext *s);
void tcg_func_start(TCGContext *s);
-int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
-int tcg_gen_code_search_pc(TCGContext *s, tcg_insn_unit *gen_code_buf,
- long offset);
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb);
+
+void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size);
+
+int tcg_global_mem_new_internal(TCGType, TCGv_ptr, intptr_t, const char *);
-void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size);
+TCGv_i32 tcg_global_reg_new_i32(TCGReg reg, const char *name);
+TCGv_i64 tcg_global_reg_new_i64(TCGReg reg, const char *name);
-TCGv_i32 tcg_global_reg_new_i32(int reg, const char *name);
-TCGv_i32 tcg_global_mem_new_i32(int reg, intptr_t offset, const char *name);
TCGv_i32 tcg_temp_new_internal_i32(int temp_local);
+TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
+
+void tcg_temp_free_i32(TCGv_i32 arg);
+void tcg_temp_free_i64(TCGv_i64 arg);
+
+static inline TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t offset,
+ const char *name)
+{
+ int idx = tcg_global_mem_new_internal(TCG_TYPE_I32, reg, offset, name);
+ return MAKE_TCGV_I32(idx);
+}
+
static inline TCGv_i32 tcg_temp_new_i32(void)
{
return tcg_temp_new_internal_i32(0);
}
+
static inline TCGv_i32 tcg_temp_local_new_i32(void)
{
return tcg_temp_new_internal_i32(1);
}
-void tcg_temp_free_i32(TCGv_i32 arg);
-char *tcg_get_arg_str_i32(TCGContext *s, char *buf, int buf_size, TCGv_i32 arg);
-TCGv_i64 tcg_global_reg_new_i64(int reg, const char *name);
-TCGv_i64 tcg_global_mem_new_i64(int reg, intptr_t offset, const char *name);
-TCGv_i64 tcg_temp_new_internal_i64(int temp_local);
+static inline TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t offset,
+ const char *name)
+{
+ int idx = tcg_global_mem_new_internal(TCG_TYPE_I64, reg, offset, name);
+ return MAKE_TCGV_I64(idx);
+}
+
static inline TCGv_i64 tcg_temp_new_i64(void)
{
return tcg_temp_new_internal_i64(0);
}
+
static inline TCGv_i64 tcg_temp_local_new_i64(void)
{
return tcg_temp_new_internal_i64(1);
}
-void tcg_temp_free_i64(TCGv_i64 arg);
-char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
#if defined(CONFIG_DEBUG_TCG)
/* If you call tcg_clear_temp_count() at the start of a section of
abort();\
} while (0)
-#ifdef CONFIG_DEBUG_TCG
-# define tcg_debug_assert(X) do { assert(X); } while (0)
-#elif QEMU_GNUC_PREREQ(4, 5)
-# define tcg_debug_assert(X) \
- do { if (!(X)) { __builtin_unreachable(); } } while (0)
-#else
-# define tcg_debug_assert(X) do { (void)(X); } while (0)
-#endif
-
void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
#if UINTPTR_MAX == UINT32_MAX
TCGArg ret, int nargs, TCGArg *args);
void tcg_op_remove(TCGContext *s, TCGOp *op);
+TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *op, TCGOpcode opc, int narg);
+
void tcg_optimize(TCGContext *s);
/* only used for debugging purposes */
void tcg_dump_ops(TCGContext *s);
-void dump_ops(const uint16_t *opc_buf, const TCGArg *opparam_buf);
TCGv_i32 tcg_const_i32(int32_t val);
TCGv_i64 tcg_const_i64(int64_t val);
TCGv_i32 tcg_const_local_i32(int32_t val);
/**
* tcg_qemu_tb_exec:
- * @env: CPUArchState * for the CPU
+ * @env: pointer to CPUArchState for the CPU
* @tb_ptr: address of generated code for the TB to execute
*
* Start executing code from a given translation block.
* which has not yet been directly linked, or an asynchronous
* event such as an interrupt needs handling.
*
- * The return value is a pointer to the next TB to execute
- * (if known; otherwise zero). This pointer is assumed to be
- * 4-aligned, and the bottom two bits are used to return further
- * information:
+ * Return: The return value is the value passed to the corresponding
+ * tcg_gen_exit_tb() at translation time of the last TB attempted to execute.
+ * The value is either zero or a 4-byte aligned pointer to that TB combined
+ * with additional information in its two least significant bits. The
+ * additional information is encoded as follows:
* 0, 1: the link between this TB and the next is via the specified
* TB index (0 or 1). That is, we left the TB via (the equivalent
* of) "goto_tb <index>". The main loop uses this to determine
* how to link the TB just executed to the next.
* 2: we are using instruction counting code generation, and we
* did not start executing this TB because the instruction counter
- * would hit zero midway through it. In this case the next-TB pointer
+ * would hit zero midway through it. In this case the pointer
* returned is the TB we were about to execute, and the caller must
* arrange to execute the remaining count of instructions.
* 3: we stopped because the CPU's exit_request flag was set
* (usually meaning that there is an interrupt that needs to be
- * handled). The next-TB pointer returned is the TB we were
- * about to execute when we noticed the pending exit request.
+ * handled). The pointer returned is the TB we were about to execute
+ * when we noticed the pending exit request.
*
* If the bottom two bits indicate an exit-via-index then the CPU
* state is correctly synchronised and ready for execution of the next
* TB (and in particular the guest PC is the address to execute next).
* Otherwise, we gave up on execution of this TB before it started, and
* the caller must fix up the CPU state by calling the CPU's
- * synchronize_from_tb() method with the next-TB pointer we return (falling
+ * synchronize_from_tb() method with the TB pointer we return (falling
* back to calling the CPU's set_pc method with tb->pb if no
* synchronize_from_tb() method exists).
*
# define helper_ret_ldq_cmmu helper_le_ldq_cmmu
#endif
+uint32_t helper_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
+ uint32_t cmpv, uint32_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgw_le_mmu(CPUArchState *env, target_ulong addr,
+ uint32_t cmpv, uint32_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgl_le_mmu(CPUArchState *env, target_ulong addr,
+ uint32_t cmpv, uint32_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_atomic_cmpxchgq_le_mmu(CPUArchState *env, target_ulong addr,
+ uint64_t cmpv, uint64_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgw_be_mmu(CPUArchState *env, target_ulong addr,
+ uint32_t cmpv, uint32_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint32_t helper_atomic_cmpxchgl_be_mmu(CPUArchState *env, target_ulong addr,
+ uint32_t cmpv, uint32_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+uint64_t helper_atomic_cmpxchgq_be_mmu(CPUArchState *env, target_ulong addr,
+ uint64_t cmpv, uint64_t newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+#define GEN_ATOMIC_HELPER(NAME, TYPE, SUFFIX) \
+TYPE helper_atomic_ ## NAME ## SUFFIX ## _mmu \
+ (CPUArchState *env, target_ulong addr, TYPE val, \
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+#ifdef CONFIG_ATOMIC64
+#define GEN_ATOMIC_HELPER_ALL(NAME) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, b) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, l_be) \
+ GEN_ATOMIC_HELPER(NAME, uint64_t, q_le) \
+ GEN_ATOMIC_HELPER(NAME, uint64_t, q_be)
+#else
+#define GEN_ATOMIC_HELPER_ALL(NAME) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, b) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, w_le) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, w_be) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, l_le) \
+ GEN_ATOMIC_HELPER(NAME, uint32_t, l_be)
+#endif
+
+GEN_ATOMIC_HELPER_ALL(fetch_add)
+GEN_ATOMIC_HELPER_ALL(fetch_sub)
+GEN_ATOMIC_HELPER_ALL(fetch_and)
+GEN_ATOMIC_HELPER_ALL(fetch_or)
+GEN_ATOMIC_HELPER_ALL(fetch_xor)
+
+GEN_ATOMIC_HELPER_ALL(add_fetch)
+GEN_ATOMIC_HELPER_ALL(sub_fetch)
+GEN_ATOMIC_HELPER_ALL(and_fetch)
+GEN_ATOMIC_HELPER_ALL(or_fetch)
+GEN_ATOMIC_HELPER_ALL(xor_fetch)
+
+GEN_ATOMIC_HELPER_ALL(xchg)
+
+#undef GEN_ATOMIC_HELPER_ALL
+#undef GEN_ATOMIC_HELPER
#endif /* CONFIG_SOFTMMU */
+#ifdef CONFIG_ATOMIC128
+#include "qemu/int128.h"
+
+/* These aren't really a "proper" helpers because TCG cannot manage Int128.
+ However, use the same format as the others, for use by the backends. */
+Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr,
+ Int128 cmpv, Int128 newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr,
+ Int128 cmpv, Int128 newv,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val,
+ TCGMemOpIdx oi, uintptr_t retaddr);
+
+#endif /* CONFIG_ATOMIC128 */
+
#endif /* TCG_H */