X-Git-Url: https://repo.jachan.dev/qemu.git/blobdiff_plain/702b33b1d5f8fee7e5799af450375671264a9ebd..9ea0f58fc723daeb9e1dba9a762269e8cbbd1b73:/tcg/arm/tcg-target.c diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 375c1e1c7f..eb0e84ce44 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -22,50 +22,43 @@ * THE SOFTWARE. */ -#if defined(__ARM_ARCH_7__) || \ - defined(__ARM_ARCH_7A__) || \ - defined(__ARM_ARCH_7EM__) || \ - defined(__ARM_ARCH_7M__) || \ - defined(__ARM_ARCH_7R__) -#define USE_ARMV7_INSTRUCTIONS +/* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */ +#ifndef __ARM_ARCH +# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ + || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ + || defined(__ARM_ARCH_7EM__) +# define __ARM_ARCH 7 +# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ + || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \ + || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) +# define __ARM_ARCH 6 +# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \ + || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \ + || defined(__ARM_ARCH_5TEJ__) +# define __ARM_ARCH 5 +# else +# define __ARM_ARCH 4 +# endif #endif -#if defined(USE_ARMV7_INSTRUCTIONS) || \ - defined(__ARM_ARCH_6J__) || \ - defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6T2__) || \ - defined(__ARM_ARCH_6Z__) || \ - defined(__ARM_ARCH_6ZK__) -#define USE_ARMV6_INSTRUCTIONS -#endif - -#if defined(USE_ARMV6_INSTRUCTIONS) || \ - defined(__ARM_ARCH_5T__) || \ - defined(__ARM_ARCH_5TE__) || \ - defined(__ARM_ARCH_5TEJ__) -#define USE_ARMV5_INSTRUCTIONS -#endif +static int arm_arch = __ARM_ARCH; -#ifdef USE_ARMV5_INSTRUCTIONS -static const int use_armv5_instructions = 1; +#if defined(__ARM_ARCH_5T__) \ + || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__) +# define use_armv5t_instructions 1 #else -static const int use_armv5_instructions = 0; +# define use_armv5t_instructions use_armv6_instructions #endif -#undef USE_ARMV5_INSTRUCTIONS -#ifdef USE_ARMV6_INSTRUCTIONS -static const int use_armv6_instructions = 1; -#else -static const int use_armv6_instructions = 0; -#endif -#undef USE_ARMV6_INSTRUCTIONS +#define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6) +#define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7) -#ifdef USE_ARMV7_INSTRUCTIONS -static const int use_armv7_instructions = 1; -#else -static const int use_armv7_instructions = 0; +#ifndef use_idiv_instructions +bool use_idiv_instructions; +#endif +#ifdef CONFIG_GETAUXVAL +# include #endif -#undef USE_ARMV7_INSTRUCTIONS #ifndef NDEBUG static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { @@ -115,21 +108,21 @@ static const int tcg_target_call_oarg_regs[2] = { #define TCG_REG_TMP TCG_REG_R12 -static inline void reloc_abs32(void *code_ptr, tcg_target_long target) +static inline void reloc_abs32(void *code_ptr, intptr_t target) { *(uint32_t *) code_ptr = target; } -static inline void reloc_pc24(void *code_ptr, tcg_target_long target) +static inline void reloc_pc24(void *code_ptr, intptr_t target) { - uint32_t offset = ((target - ((tcg_target_long) code_ptr + 8)) >> 2); + uint32_t offset = ((target - ((intptr_t)code_ptr + 8)) >> 2); *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff) | (offset & 0xffffff); } static void patch_reloc(uint8_t *code_ptr, int type, - tcg_target_long value, tcg_target_long addend) + intptr_t value, intptr_t addend) { switch (type) { case R_ARM_ABS32: @@ -201,8 +194,6 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) /* qemu_st address & data_reg */ case 's': - /* qemu_st64 data_reg2 */ - case 'S': ct->ct |= TCG_CT_REG; tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1); /* r0-r2 will be overwritten when reading the tlb entry (softmmu only) @@ -421,6 +412,20 @@ static inline void tcg_out_dat_reg(TCGContext *s, (rn << 16) | (rd << 12) | shift | rm); } +static inline void tcg_out_nop(TCGContext *s) +{ + if (use_armv7_instructions) { + /* Architected nop introduced in v6k. */ + /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this + also Just So Happened to do nothing on pre-v6k so that we + don't need to conditionalize it? */ + tcg_out32(s, 0xe320f000); + } else { + /* Prior to that the assembler uses mov r0, r0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0)); + } +} + static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm) { /* Simple reg-reg move, optimising out the 'do nothing' case */ @@ -1011,13 +1016,16 @@ static inline void tcg_out_call(TCGContext *s, uint32_t addr) if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) { if (addr & 1) { /* Use BLX if the target is in Thumb mode */ - if (!use_armv5_instructions) { + if (!use_armv5t_instructions) { tcg_abort(); } tcg_out_blx_imm(s, val); } else { tcg_out_bl(s, COND_AL, val); } + } else if (use_armv7_instructions) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr); + tcg_out_blx(s, COND_AL, TCG_REG_TMP); } else { tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4); tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); @@ -1027,7 +1035,7 @@ static inline void tcg_out_call(TCGContext *s, uint32_t addr) static inline void tcg_out_callr(TCGContext *s, int cond, int arg) { - if (use_armv5_instructions) { + if (use_armv5t_instructions) { tcg_out_blx(s, cond, arg); } else { tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0, @@ -1040,14 +1048,9 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) { TCGLabel *l = &s->labels[label_index]; - if (l->has_value) + if (l->has_value) { tcg_out_goto(s, cond, l->u.value); - else if (cond == COND_AL) { - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4); - tcg_out_reloc(s, s->code_ptr, R_ARM_ABS32, label_index, 31337); - s->code_ptr += 4; } else { - /* Probably this should be preferred even for COND_AL... */ tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337); tcg_out_b_noaddr(s, cond); } @@ -1055,8 +1058,6 @@ static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index) #ifdef CONFIG_SOFTMMU -#include "exec/softmmu_defs.h" - /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr, int mmu_idx) */ static const void * const qemu_ld_helpers[4] = { @@ -1180,7 +1181,7 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, TCG_REG_R2, tlb_offset, 1, 1); if (TARGET_LONG_BITS == 64) { - tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, + tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1, TCG_REG_R2, 4, 1, 0); } } @@ -1199,6 +1200,134 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); } } + +/* Record the context of a call to the out of line helper code for the slow + path for a load or store, so that we can later generate the correct + helper code. */ +static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc, + int data_reg, int data_reg2, int addrlo_reg, + int addrhi_reg, int mem_index, + uint8_t *raddr, uint8_t *label_ptr) +{ + int idx; + TCGLabelQemuLdst *label; + + if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) { + tcg_abort(); + } + + idx = s->nb_qemu_ldst_labels++; + label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx]; + label->is_ld = is_ld; + label->opc = opc; + label->datalo_reg = data_reg; + label->datahi_reg = data_reg2; + label->addrlo_reg = addrlo_reg; + label->addrhi_reg = addrhi_reg; + label->mem_index = mem_index; + label->raddr = raddr; + label->label_ptr[0] = label_ptr; +} + +static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, data_reg, data_reg2; + uint8_t *start; + + reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); + tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[lb->opc & 3]); + + data_reg = lb->datalo_reg; + data_reg2 = lb->datahi_reg; + + start = s->code_ptr; + switch (lb->opc) { + case 0 | 4: + tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 1 | 4: + tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 0: + case 1: + case 2: + default: + tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); + break; + case 3: + tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); + tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); + break; + } + + /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between + the call and the branch back to straight-line code. Note that the + moves above could be elided by register allocation, nor do we know + which code alternative we chose for extension. */ + switch (s->code_ptr - start) { + case 0: + tcg_out_nop(s); + /* FALLTHRU */ + case 4: + tcg_out_nop(s); + /* FALLTHRU */ + case 8: + break; + default: + abort(); + } + + tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); +} + +static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) +{ + TCGReg argreg, data_reg, data_reg2; + + reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr); + + argreg = TCG_REG_R0; + argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); + if (TARGET_LONG_BITS == 64) { + argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg); + } else { + argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg); + } + + data_reg = lb->datalo_reg; + data_reg2 = lb->datahi_reg; + switch (lb->opc) { + case 0: + argreg = tcg_out_arg_reg8(s, argreg, data_reg); + break; + case 1: + argreg = tcg_out_arg_reg16(s, argreg, data_reg); + break; + case 2: + argreg = tcg_out_arg_reg32(s, argreg, data_reg); + break; + case 3: + argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); + break; + } + + argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index); + tcg_out_call(s, (tcg_target_long) qemu_st_helpers[lb->opc & 3]); + + /* For GETPC_LDST in exec-all.h, we architect exactly 2 insns between + the call and the branch back to straight-line code. */ + tcg_out_nop(s); + tcg_out_nop(s); + tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr); +} #endif /* SOFTMMU */ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) @@ -1207,8 +1336,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) bool bswap; #ifdef CONFIG_SOFTMMU int mem_index, s_bits; - TCGReg argreg, addr_reg2; - uint32_t *label_ptr; + TCGReg addr_reg2; + uint8_t *label_ptr; #endif #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; @@ -1227,89 +1356,56 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc) tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)); - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R2, + label_ptr = s->code_ptr; + tcg_out_b_noaddr(s, COND_NE); + + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read)); switch (opc) { case 0: - tcg_out_ld8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 0 | 4: - tcg_out_ld8s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 1: - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); if (bswap) { - tcg_out_bswap16(s, COND_EQ, data_reg, data_reg); + tcg_out_bswap16(s, COND_AL, data_reg, data_reg); } break; case 1 | 4: if (bswap) { - tcg_out_ld16u_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); - tcg_out_bswap16s(s, COND_EQ, data_reg, data_reg); + tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); + tcg_out_bswap16s(s, COND_AL, data_reg, data_reg); } else { - tcg_out_ld16s_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 2: default: - tcg_out_ld32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); if (bswap) { - tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); } break; case 3: if (bswap) { - tcg_out_ld32_rwb(s, COND_EQ, data_reg2, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg, TCG_REG_R1, 4); - tcg_out_bswap32(s, COND_EQ, data_reg2, data_reg2); - tcg_out_bswap32(s, COND_EQ, data_reg, data_reg); + tcg_out_ld32_rwb(s, COND_AL, data_reg2, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_AL, data_reg, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2); + tcg_out_bswap32(s, COND_AL, data_reg, data_reg); } else { - tcg_out_ld32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); - tcg_out_ld32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + tcg_out_ld32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); + tcg_out_ld32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); } break; } - label_ptr = (void *) s->code_ptr; - tcg_out_b_noaddr(s, COND_EQ); - - /* TODO: move this code to where the constants pool will be */ - /* Note that this code relies on the constraints we set in arm_op_defs[] - * to ensure that later arguments are not passed to us in registers we - * trash by moving the earlier arguments into them. - */ - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2); - } else { - argreg = tcg_out_arg_reg32(s, argreg, addr_reg); - } - argreg = tcg_out_arg_imm32(s, argreg, mem_index); - tcg_out_call(s, (tcg_target_long) qemu_ld_helpers[s_bits]); - - switch (opc) { - case 0 | 4: - tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 1 | 4: - tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 0: - case 1: - case 2: - default: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - break; - case 3: - tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0); - tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1); - break; - } - - reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2, + mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1378,8 +1474,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) bool bswap; #ifdef CONFIG_SOFTMMU int mem_index, s_bits; - TCGReg argreg, addr_reg2; - uint32_t *label_ptr; + TCGReg addr_reg2; + uint8_t *label_ptr; #endif #ifdef TARGET_WORDS_BIGENDIAN bswap = 1; @@ -1399,79 +1495,49 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc) offsetof(CPUArchState, tlb_table[mem_index][0].addr_write)); - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R2, + label_ptr = s->code_ptr; + tcg_out_b_noaddr(s, COND_NE); + + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write)); switch (opc) { case 0: - tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); break; case 1: if (bswap) { - tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st16_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { - tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st16_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 2: default: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, TCG_REG_R1); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_r(s, COND_AL, TCG_REG_R0, addr_reg, TCG_REG_R1); } else { - tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, TCG_REG_R1); + tcg_out_st32_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); } break; case 3: if (bswap) { - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2); - tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, addr_reg); - tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg); - tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, TCG_REG_R1, 4); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2); + tcg_out_st32_rwb(s, COND_AL, TCG_REG_R0, TCG_REG_R1, addr_reg); + tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg); + tcg_out_st32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R1, 4); } else { - tcg_out_st32_rwb(s, COND_EQ, data_reg, TCG_REG_R1, addr_reg); - tcg_out_st32_12(s, COND_EQ, data_reg2, TCG_REG_R1, 4); + tcg_out_st32_rwb(s, COND_AL, data_reg, TCG_REG_R1, addr_reg); + tcg_out_st32_12(s, COND_AL, data_reg2, TCG_REG_R1, 4); } break; } - label_ptr = (void *) s->code_ptr; - tcg_out_b_noaddr(s, COND_EQ); - - /* TODO: move this code to where the constants pool will be */ - /* Note that this code relies on the constraints we set in arm_op_defs[] - * to ensure that later arguments are not passed to us in registers we - * trash by moving the earlier arguments into them. - */ - argreg = TCG_REG_R0; - argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0); - if (TARGET_LONG_BITS == 64) { - argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2); - } else { - argreg = tcg_out_arg_reg32(s, argreg, addr_reg); - } - - switch (opc) { - case 0: - argreg = tcg_out_arg_reg8(s, argreg, data_reg); - break; - case 1: - argreg = tcg_out_arg_reg16(s, argreg, data_reg); - break; - case 2: - argreg = tcg_out_arg_reg32(s, argreg, data_reg); - break; - case 3: - argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2); - break; - } - - argreg = tcg_out_arg_imm32(s, argreg, mem_index); - tcg_out_call(s, (tcg_target_long) qemu_st_helpers[s_bits]); - - reloc_pc24(label_ptr, (tcg_target_long)s->code_ptr); + add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2, + mem_index, s->code_ptr, label_ptr); #else /* !CONFIG_SOFTMMU */ if (GUEST_BASE) { uint32_t offset = GUEST_BASE; @@ -1536,17 +1602,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_exit_tb: - { + if (use_armv7_instructions || check_fit_imm(args[0])) { + tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]); + tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); + } else { uint8_t *ld_ptr = s->code_ptr; - if (args[0] >> 8) - tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); - else - tcg_out_dat_imm(s, COND_AL, ARITH_MOV, TCG_REG_R0, 0, args[0]); + tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0); tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr); - if (args[0] >> 8) { - *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; - tcg_out32(s, args[0]); - } + *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8; + tcg_out32(s, args[0]); } break; case INDEX_op_goto_tb: @@ -1853,24 +1917,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_divu_i32: tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]); break; - case INDEX_op_rem_i32: - tcg_out_sdiv(s, COND_AL, TCG_REG_TMP, args[1], args[2]); - tcg_out_mul32(s, COND_AL, TCG_REG_TMP, TCG_REG_TMP, args[2]); - tcg_out_dat_reg(s, COND_AL, ARITH_SUB, args[0], args[1], TCG_REG_TMP, - SHIFT_IMM_LSL(0)); - break; - case INDEX_op_remu_i32: - tcg_out_udiv(s, COND_AL, TCG_REG_TMP, args[1], args[2]); - tcg_out_mul32(s, COND_AL, TCG_REG_TMP, TCG_REG_TMP, args[2]); - tcg_out_dat_reg(s, COND_AL, ARITH_SUB, args[0], args[1], TCG_REG_TMP, - SHIFT_IMM_LSL(0)); - break; default: tcg_abort(); } } +#ifdef CONFIG_SOFTMMU +/* Generate TB finalization at the end of block. */ +void tcg_out_tb_finalize(TCGContext *s) +{ + int i; + for (i = 0; i < s->nb_qemu_ldst_labels; i++) { + TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i]; + if (label->is_ld) { + tcg_out_qemu_ld_slow_path(s, label); + } else { + tcg_out_qemu_st_slow_path(s, label); + } + } +} +#endif /* SOFTMMU */ + static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, @@ -1928,7 +1996,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st8, { "s", "s" } }, { INDEX_op_qemu_st16, { "s", "s" } }, { INDEX_op_qemu_st32, { "s", "s" } }, - { INDEX_op_qemu_st64, { "S", "S", "s" } }, + { INDEX_op_qemu_st64, { "s", "s", "s" } }, #else { INDEX_op_qemu_ld8u, { "r", "l", "l" } }, { INDEX_op_qemu_ld8s, { "r", "l", "l" } }, @@ -1940,7 +2008,7 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_qemu_st8, { "s", "s", "s" } }, { INDEX_op_qemu_st16, { "s", "s", "s" } }, { INDEX_op_qemu_st32, { "s", "s", "s" } }, - { INDEX_op_qemu_st64, { "S", "S", "s", "s" } }, + { INDEX_op_qemu_st64, { "s", "s", "s", "s" } }, #endif { INDEX_op_bswap16_i32, { "r", "r" } }, @@ -1952,23 +2020,30 @@ static const TCGTargetOpDef arm_op_defs[] = { { INDEX_op_deposit_i32, { "r", "0", "rZ" } }, -#if TCG_TARGET_HAS_div_i32 { INDEX_op_div_i32, { "r", "r", "r" } }, - { INDEX_op_rem_i32, { "r", "r", "r" } }, { INDEX_op_divu_i32, { "r", "r", "r" } }, - { INDEX_op_remu_i32, { "r", "r", "r" } }, -#endif { -1 }, }; static void tcg_target_init(TCGContext *s) { -#if !defined(CONFIG_USER_ONLY) - /* fail safe */ - if ((1 << CPU_TLB_ENTRY_BITS) != sizeof(CPUTLBEntry)) - tcg_abort(); -#endif +#if defined(CONFIG_GETAUXVAL) + /* Only probe for the platform and capabilities if we havn't already + determined maximum values at compile time. */ +# if !defined(use_idiv_instructions) + { + unsigned long hwcap = getauxval(AT_HWCAP); + use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0; + } +# endif + if (__ARM_ARCH < 7) { + const char *pl = (const char *)getauxval(AT_PLATFORM); + if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') { + arm_arch = pl[1] - '0'; + } + } +#endif /* GETAUXVAL */ tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff); tcg_regset_set32(tcg_target_call_clobber_regs, 0, @@ -1988,13 +2063,13 @@ static void tcg_target_init(TCGContext *s) } static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_ld32u(s, COND_AL, arg, arg1, arg2); } static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, - TCGReg arg1, tcg_target_long arg2) + TCGReg arg1, intptr_t arg2) { tcg_out_st32(s, COND_AL, arg, arg1, arg2); } @@ -2011,23 +2086,31 @@ static inline void tcg_out_movi(TCGContext *s, TCGType type, tcg_out_movi32(s, COND_AL, ret, arg); } +/* Compute frame size via macros, to share between tcg_target_qemu_prologue + and tcg_register_jit. */ + +#define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long)) + +#define FRAME_SIZE \ + ((PUSH_SIZE \ + + TCG_STATIC_CALL_ARGS_SIZE \ + + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + + TCG_TARGET_STACK_ALIGN - 1) \ + & -TCG_TARGET_STACK_ALIGN) + static void tcg_target_qemu_prologue(TCGContext *s) { - int frame_size; + int stack_addend; /* Calling convention requires us to save r4-r11 and lr. */ /* stmdb sp!, { r4 - r11, lr } */ tcg_out32(s, (COND_AL << 28) | 0x092d4ff0); - /* Allocate the local stack frame. */ - frame_size = TCG_STATIC_CALL_ARGS_SIZE; - frame_size += CPU_TEMP_BUF_NLONGS * sizeof(long); - /* We saved an odd number of registers above; keep an 8 aligned stack. */ - frame_size = ((frame_size + TCG_TARGET_STACK_ALIGN - 1) - & -TCG_TARGET_STACK_ALIGN) + 4; + /* Reserve callee argument and tcg temp space. */ + stack_addend = FRAME_SIZE - PUSH_SIZE; tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK, - TCG_REG_CALL_STACK, frame_size, 1); + TCG_REG_CALL_STACK, stack_addend, 1); tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); @@ -2038,8 +2121,58 @@ static void tcg_target_qemu_prologue(TCGContext *s) /* Epilogue. We branch here via tb_ret_addr. */ tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK, - TCG_REG_CALL_STACK, frame_size, 1); + TCG_REG_CALL_STACK, stack_addend, 1); /* ldmia sp!, { r4 - r11, pc } */ tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0); } + +typedef struct { + DebugFrameCIE cie; + DebugFrameFDEHeader fde; + uint8_t fde_def_cfa[4]; + uint8_t fde_reg_ofs[18]; +} DebugFrame; + +#define ELF_HOST_MACHINE EM_ARM + +/* We're expecting a 2 byte uleb128 encoded value. */ +QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14)); + +static DebugFrame debug_frame = { + .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */ + .cie.id = -1, + .cie.version = 1, + .cie.code_align = 1, + .cie.data_align = 0x7c, /* sleb128 -4 */ + .cie.return_column = 14, + + /* Total FDE size does not include the "len" member. */ + .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset), + + .fde_def_cfa = { + 12, 13, /* DW_CFA_def_cfa sp, ... */ + (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */ + (FRAME_SIZE >> 7) + }, + .fde_reg_ofs = { + /* The following must match the stmdb in the prologue. */ + 0x8e, 1, /* DW_CFA_offset, lr, -4 */ + 0x8b, 2, /* DW_CFA_offset, r11, -8 */ + 0x8a, 3, /* DW_CFA_offset, r10, -12 */ + 0x89, 4, /* DW_CFA_offset, r9, -16 */ + 0x88, 5, /* DW_CFA_offset, r8, -20 */ + 0x87, 6, /* DW_CFA_offset, r7, -24 */ + 0x86, 7, /* DW_CFA_offset, r6, -28 */ + 0x85, 8, /* DW_CFA_offset, r5, -32 */ + 0x84, 9, /* DW_CFA_offset, r4, -36 */ + } +}; + +void tcg_register_jit(void *buf, size_t buf_size) +{ + debug_frame.fde.func_start = (tcg_target_long) buf; + debug_frame.fde.func_len = buf_size; + + tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); +}