2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Andrzej Zaborowski
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 #include "tcg-be-ldst.h"
28 int arm_arch = __ARM_ARCH;
30 #ifndef use_idiv_instructions
31 bool use_idiv_instructions;
34 /* ??? Ought to think about changing CONFIG_SOFTMMU to always defined. */
36 # define USING_SOFTMMU 1
38 # define USING_SOFTMMU 0
41 #ifdef CONFIG_DEBUG_TCG
42 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
62 static const int tcg_target_reg_alloc_order[] = {
80 static const int tcg_target_call_iarg_regs[4] = {
81 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
83 static const int tcg_target_call_oarg_regs[2] = {
84 TCG_REG_R0, TCG_REG_R1
87 #define TCG_REG_TMP TCG_REG_R12
89 static inline void reloc_pc24(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
91 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
92 *code_ptr = (*code_ptr & ~0xffffff) | (offset & 0xffffff);
95 static inline void reloc_pc24_atomic(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
97 ptrdiff_t offset = (tcg_ptr_byte_diff(target, code_ptr) - 8) >> 2;
98 tcg_insn_unit insn = atomic_read(code_ptr);
99 tcg_debug_assert(offset == sextract32(offset, 0, 24));
100 atomic_set(code_ptr, deposit32(insn, 0, 24, offset));
103 static void patch_reloc(tcg_insn_unit *code_ptr, int type,
104 intptr_t value, intptr_t addend)
106 tcg_debug_assert(type == R_ARM_PC24);
107 tcg_debug_assert(addend == 0);
108 reloc_pc24(code_ptr, (tcg_insn_unit *)value);
111 #define TCG_CT_CONST_ARM 0x100
112 #define TCG_CT_CONST_INV 0x200
113 #define TCG_CT_CONST_NEG 0x400
114 #define TCG_CT_CONST_ZERO 0x800
116 /* parse target specific constraints */
117 static const char *target_parse_constraint(TCGArgConstraint *ct,
118 const char *ct_str, TCGType type)
122 ct->ct |= TCG_CT_CONST_ARM;
125 ct->ct |= TCG_CT_CONST_INV;
127 case 'N': /* The gcc constraint letter is L, already used here. */
128 ct->ct |= TCG_CT_CONST_NEG;
131 ct->ct |= TCG_CT_CONST_ZERO;
135 ct->ct |= TCG_CT_REG;
136 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
139 /* qemu_ld address */
141 ct->ct |= TCG_CT_REG;
142 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
143 #ifdef CONFIG_SOFTMMU
144 /* r0-r2,lr will be overwritten when reading the tlb entry,
145 so don't use these. */
146 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
147 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
148 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
149 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
153 /* qemu_st address & data */
155 ct->ct |= TCG_CT_REG;
156 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
157 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
158 and r0-r1 doing the byte swapping, so don't use these. */
159 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
160 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
161 #if defined(CONFIG_SOFTMMU)
162 /* Avoid clashes with registers being used for helper args */
163 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
164 #if TARGET_LONG_BITS == 64
165 /* Avoid clashes with registers being used for helper args */
166 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
168 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
178 static inline uint32_t rotl(uint32_t val, int n)
180 return (val << n) | (val >> (32 - n));
183 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
184 right-rotated by an even amount between 0 and 30. */
185 static inline int encode_imm(uint32_t imm)
189 /* simple case, only lower bits */
190 if ((imm & ~0xff) == 0)
192 /* then try a simple even shift */
193 shift = ctz32(imm) & ~1;
194 if (((imm >> shift) & ~0xff) == 0)
196 /* now try harder with rotations */
197 if ((rotl(imm, 2) & ~0xff) == 0)
199 if ((rotl(imm, 4) & ~0xff) == 0)
201 if ((rotl(imm, 6) & ~0xff) == 0)
203 /* imm can't be encoded */
207 static inline int check_fit_imm(uint32_t imm)
209 return encode_imm(imm) >= 0;
212 /* Test if a constant matches the constraint.
213 * TODO: define constraints for:
215 * ldr/str offset: between -0xfff and 0xfff
216 * ldrh/strh offset: between -0xff and 0xff
217 * mov operand2: values represented with x << (2 * y), x < 0x100
218 * add, sub, eor...: ditto
220 static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
221 const TCGArgConstraint *arg_ct)
225 if (ct & TCG_CT_CONST) {
227 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
229 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
231 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
233 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
240 #define TO_CPSR (1 << 20)
243 ARITH_AND = 0x0 << 21,
244 ARITH_EOR = 0x1 << 21,
245 ARITH_SUB = 0x2 << 21,
246 ARITH_RSB = 0x3 << 21,
247 ARITH_ADD = 0x4 << 21,
248 ARITH_ADC = 0x5 << 21,
249 ARITH_SBC = 0x6 << 21,
250 ARITH_RSC = 0x7 << 21,
251 ARITH_TST = 0x8 << 21 | TO_CPSR,
252 ARITH_CMP = 0xa << 21 | TO_CPSR,
253 ARITH_CMN = 0xb << 21 | TO_CPSR,
254 ARITH_ORR = 0xc << 21,
255 ARITH_MOV = 0xd << 21,
256 ARITH_BIC = 0xe << 21,
257 ARITH_MVN = 0xf << 21,
259 INSN_CLZ = 0x016f0f10,
260 INSN_RBIT = 0x06ff0f30,
262 INSN_LDR_IMM = 0x04100000,
263 INSN_LDR_REG = 0x06100000,
264 INSN_STR_IMM = 0x04000000,
265 INSN_STR_REG = 0x06000000,
267 INSN_LDRH_IMM = 0x005000b0,
268 INSN_LDRH_REG = 0x001000b0,
269 INSN_LDRSH_IMM = 0x005000f0,
270 INSN_LDRSH_REG = 0x001000f0,
271 INSN_STRH_IMM = 0x004000b0,
272 INSN_STRH_REG = 0x000000b0,
274 INSN_LDRB_IMM = 0x04500000,
275 INSN_LDRB_REG = 0x06500000,
276 INSN_LDRSB_IMM = 0x005000d0,
277 INSN_LDRSB_REG = 0x001000d0,
278 INSN_STRB_IMM = 0x04400000,
279 INSN_STRB_REG = 0x06400000,
281 INSN_LDRD_IMM = 0x004000d0,
282 INSN_LDRD_REG = 0x000000d0,
283 INSN_STRD_IMM = 0x004000f0,
284 INSN_STRD_REG = 0x000000f0,
286 INSN_DMB_ISH = 0x5bf07ff5,
287 INSN_DMB_MCR = 0xba0f07ee,
291 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
292 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
293 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
294 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
295 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
296 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
297 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
298 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
300 enum arm_cond_code_e {
303 COND_CS = 0x2, /* Unsigned greater or equal */
304 COND_CC = 0x3, /* Unsigned less than */
305 COND_MI = 0x4, /* Negative */
306 COND_PL = 0x5, /* Zero or greater */
307 COND_VS = 0x6, /* Overflow */
308 COND_VC = 0x7, /* No overflow */
309 COND_HI = 0x8, /* Unsigned greater than */
310 COND_LS = 0x9, /* Unsigned less or equal */
318 static const uint8_t tcg_cond_to_arm_cond[] = {
319 [TCG_COND_EQ] = COND_EQ,
320 [TCG_COND_NE] = COND_NE,
321 [TCG_COND_LT] = COND_LT,
322 [TCG_COND_GE] = COND_GE,
323 [TCG_COND_LE] = COND_LE,
324 [TCG_COND_GT] = COND_GT,
326 [TCG_COND_LTU] = COND_CC,
327 [TCG_COND_GEU] = COND_CS,
328 [TCG_COND_LEU] = COND_LS,
329 [TCG_COND_GTU] = COND_HI,
332 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
334 tcg_out32(s, (cond << 28) | 0x0a000000 |
335 (((offset - 8) >> 2) & 0x00ffffff));
338 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
340 /* We pay attention here to not modify the branch target by masking
341 the corresponding bytes. This ensure that caches and memory are
342 kept coherent during retranslation. */
343 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0a));
346 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
348 /* We pay attention here to not modify the branch target by masking
349 the corresponding bytes. This ensure that caches and memory are
350 kept coherent during retranslation. */
351 tcg_out32(s, deposit32(*s->code_ptr, 24, 8, (cond << 4) | 0x0b));
354 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
356 tcg_out32(s, (cond << 28) | 0x0b000000 |
357 (((offset - 8) >> 2) & 0x00ffffff));
360 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
362 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
365 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
367 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
368 (((offset - 8) >> 2) & 0x00ffffff));
371 static inline void tcg_out_dat_reg(TCGContext *s,
372 int cond, int opc, int rd, int rn, int rm, int shift)
374 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
375 (rn << 16) | (rd << 12) | shift | rm);
378 static inline void tcg_out_nop(TCGContext *s)
380 if (use_armv7_instructions) {
381 /* Architected nop introduced in v6k. */
382 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
383 also Just So Happened to do nothing on pre-v6k so that we
384 don't need to conditionalize it? */
385 tcg_out32(s, 0xe320f000);
387 /* Prior to that the assembler uses mov r0, r0. */
388 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
392 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
394 /* Simple reg-reg move, optimising out the 'do nothing' case */
396 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
400 static inline void tcg_out_bx(TCGContext *s, int cond, TCGReg rn)
402 /* Unless the C portion of QEMU is compiled as thumb, we don't
403 actually need true BX semantics; merely a branch to an address
404 held in a register. */
405 if (use_armv5t_instructions) {
406 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
408 tcg_out_mov_reg(s, cond, TCG_REG_PC, rn);
412 static inline void tcg_out_dat_imm(TCGContext *s,
413 int cond, int opc, int rd, int rn, int im)
415 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
416 (rn << 16) | (rd << 12) | im);
419 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
423 /* For armv7, make sure not to use movw+movt when mov/mvn would do.
424 Speed things up by only checking when movt would be required.
425 Prior to armv7, have one go at fully rotated immediates before
426 doing the decomposition thing below. */
427 if (!use_armv7_instructions || (arg & 0xffff0000)) {
428 rot = encode_imm(arg);
430 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
431 rotl(arg, rot) | (rot << 7));
434 rot = encode_imm(~arg);
436 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
437 rotl(~arg, rot) | (rot << 7));
442 /* Use movw + movt. */
443 if (use_armv7_instructions) {
445 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
446 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
447 if (arg & 0xffff0000) {
449 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
450 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
455 /* TODO: This is very suboptimal, we can easily have a constant
456 pool somewhere after all the instructions. */
459 /* If we have lots of leading 1's, we can shorten the sequence by
460 beginning with mvn and then clearing higher bits with eor. */
461 if (clz32(~arg) > clz32(arg)) {
462 opc = ARITH_MVN, arg = ~arg;
465 int i = ctz32(arg) & ~1;
466 rot = ((32 - i) << 7) & 0xf00;
467 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
475 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
476 TCGArg lhs, TCGArg rhs, int rhs_is_const)
478 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
479 * rhs must satisfy the "rI" constraint.
482 int rot = encode_imm(rhs);
483 tcg_debug_assert(rot >= 0);
484 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
486 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
490 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
491 TCGReg dst, TCGReg lhs, TCGArg rhs,
494 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
495 * rhs must satisfy the "rIK" constraint.
498 int rot = encode_imm(rhs);
501 rot = encode_imm(rhs);
502 tcg_debug_assert(rot >= 0);
505 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
507 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
511 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
512 TCGArg dst, TCGArg lhs, TCGArg rhs,
515 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
516 * rhs must satisfy the "rIN" constraint.
519 int rot = encode_imm(rhs);
522 rot = encode_imm(rhs);
523 tcg_debug_assert(rot >= 0);
526 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
528 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
532 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
533 TCGReg rn, TCGReg rm)
535 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
536 if (!use_armv6_instructions && rd == rn) {
538 /* rd == rn == rm; copy an input to tmp first. */
539 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
540 rm = rn = TCG_REG_TMP;
547 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
550 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
551 TCGReg rd1, TCGReg rn, TCGReg rm)
553 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
554 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
555 if (rd0 == rm || rd1 == rm) {
556 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
565 tcg_out32(s, (cond << 28) | 0x00800090 |
566 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
569 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
570 TCGReg rd1, TCGReg rn, TCGReg rm)
572 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
573 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
574 if (rd0 == rm || rd1 == rm) {
575 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
584 tcg_out32(s, (cond << 28) | 0x00c00090 |
585 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
588 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
590 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
593 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
595 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
598 static inline void tcg_out_ext8s(TCGContext *s, int cond,
601 if (use_armv6_instructions) {
603 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
605 tcg_out_dat_reg(s, cond, ARITH_MOV,
606 rd, 0, rn, SHIFT_IMM_LSL(24));
607 tcg_out_dat_reg(s, cond, ARITH_MOV,
608 rd, 0, rd, SHIFT_IMM_ASR(24));
612 static inline void tcg_out_ext8u(TCGContext *s, int cond,
615 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
618 static inline void tcg_out_ext16s(TCGContext *s, int cond,
621 if (use_armv6_instructions) {
623 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
625 tcg_out_dat_reg(s, cond, ARITH_MOV,
626 rd, 0, rn, SHIFT_IMM_LSL(16));
627 tcg_out_dat_reg(s, cond, ARITH_MOV,
628 rd, 0, rd, SHIFT_IMM_ASR(16));
632 static inline void tcg_out_ext16u(TCGContext *s, int cond,
635 if (use_armv6_instructions) {
637 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
639 tcg_out_dat_reg(s, cond, ARITH_MOV,
640 rd, 0, rn, SHIFT_IMM_LSL(16));
641 tcg_out_dat_reg(s, cond, ARITH_MOV,
642 rd, 0, rd, SHIFT_IMM_LSR(16));
646 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
648 if (use_armv6_instructions) {
650 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
652 tcg_out_dat_reg(s, cond, ARITH_MOV,
653 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
654 tcg_out_dat_reg(s, cond, ARITH_MOV,
655 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
656 tcg_out_dat_reg(s, cond, ARITH_ORR,
657 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
661 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
663 if (use_armv6_instructions) {
665 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
667 tcg_out_dat_reg(s, cond, ARITH_MOV,
668 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
669 tcg_out_dat_reg(s, cond, ARITH_MOV,
670 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
671 tcg_out_dat_reg(s, cond, ARITH_ORR,
672 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
676 /* swap the two low bytes assuming that the two high input bytes and the
677 two high output bit can hold any value. */
678 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
680 if (use_armv6_instructions) {
682 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
684 tcg_out_dat_reg(s, cond, ARITH_MOV,
685 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
686 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
687 tcg_out_dat_reg(s, cond, ARITH_ORR,
688 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
692 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
694 if (use_armv6_instructions) {
696 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
698 tcg_out_dat_reg(s, cond, ARITH_EOR,
699 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
700 tcg_out_dat_imm(s, cond, ARITH_BIC,
701 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
702 tcg_out_dat_reg(s, cond, ARITH_MOV,
703 rd, 0, rn, SHIFT_IMM_ROR(8));
704 tcg_out_dat_reg(s, cond, ARITH_EOR,
705 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
709 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
710 TCGArg a1, int ofs, int len, bool const_a1)
713 /* bfi becomes bfc with rn == 15. */
717 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
718 | (ofs << 7) | ((ofs + len - 1) << 16));
721 static inline void tcg_out_extract(TCGContext *s, int cond, TCGReg rd,
722 TCGArg a1, int ofs, int len)
725 tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | a1
726 | (ofs << 7) | ((len - 1) << 16));
729 static inline void tcg_out_sextract(TCGContext *s, int cond, TCGReg rd,
730 TCGArg a1, int ofs, int len)
733 tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | a1
734 | (ofs << 7) | ((len - 1) << 16));
737 /* Note that this routine is used for both LDR and LDRH formats, so we do
738 not wish to include an immediate shift at this point. */
739 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
740 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
742 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
743 | (w << 21) | (rn << 16) | (rt << 12) | rm);
746 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
747 TCGReg rn, int imm8, bool p, bool w)
754 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
755 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
758 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
759 TCGReg rn, int imm12, bool p, bool w)
766 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
767 (rn << 16) | (rt << 12) | imm12);
770 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
771 TCGReg rn, int imm12)
773 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
776 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
777 TCGReg rn, int imm12)
779 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
782 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
783 TCGReg rn, TCGReg rm)
785 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
788 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
789 TCGReg rn, TCGReg rm)
791 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
794 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
797 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
800 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
801 TCGReg rn, TCGReg rm)
803 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
806 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
809 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
812 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
813 TCGReg rn, TCGReg rm)
815 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
818 /* Register pre-increment with base writeback. */
819 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
820 TCGReg rn, TCGReg rm)
822 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
825 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
826 TCGReg rn, TCGReg rm)
828 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
831 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
834 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
837 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
840 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
843 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
844 TCGReg rn, TCGReg rm)
846 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
849 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
850 TCGReg rn, TCGReg rm)
852 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
855 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
858 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
861 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
862 TCGReg rn, TCGReg rm)
864 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
867 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
868 TCGReg rn, int imm12)
870 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
873 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
874 TCGReg rn, int imm12)
876 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
879 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
880 TCGReg rn, TCGReg rm)
882 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
885 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
886 TCGReg rn, TCGReg rm)
888 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
891 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
894 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
897 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
898 TCGReg rn, TCGReg rm)
900 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
903 static inline void tcg_out_ld32u(TCGContext *s, int cond,
904 int rd, int rn, int32_t offset)
906 if (offset > 0xfff || offset < -0xfff) {
907 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
908 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
910 tcg_out_ld32_12(s, cond, rd, rn, offset);
913 static inline void tcg_out_st32(TCGContext *s, int cond,
914 int rd, int rn, int32_t offset)
916 if (offset > 0xfff || offset < -0xfff) {
917 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
918 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
920 tcg_out_st32_12(s, cond, rd, rn, offset);
923 static inline void tcg_out_ld16u(TCGContext *s, int cond,
924 int rd, int rn, int32_t offset)
926 if (offset > 0xff || offset < -0xff) {
927 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
928 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
930 tcg_out_ld16u_8(s, cond, rd, rn, offset);
933 static inline void tcg_out_ld16s(TCGContext *s, int cond,
934 int rd, int rn, int32_t offset)
936 if (offset > 0xff || offset < -0xff) {
937 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
938 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
940 tcg_out_ld16s_8(s, cond, rd, rn, offset);
943 static inline void tcg_out_st16(TCGContext *s, int cond,
944 int rd, int rn, int32_t offset)
946 if (offset > 0xff || offset < -0xff) {
947 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
948 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
950 tcg_out_st16_8(s, cond, rd, rn, offset);
953 static inline void tcg_out_ld8u(TCGContext *s, int cond,
954 int rd, int rn, int32_t offset)
956 if (offset > 0xfff || offset < -0xfff) {
957 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
958 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
960 tcg_out_ld8_12(s, cond, rd, rn, offset);
963 static inline void tcg_out_ld8s(TCGContext *s, int cond,
964 int rd, int rn, int32_t offset)
966 if (offset > 0xff || offset < -0xff) {
967 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
968 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
970 tcg_out_ld8s_8(s, cond, rd, rn, offset);
973 static inline void tcg_out_st8(TCGContext *s, int cond,
974 int rd, int rn, int32_t offset)
976 if (offset > 0xfff || offset < -0xfff) {
977 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
978 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
980 tcg_out_st8_12(s, cond, rd, rn, offset);
983 /* The _goto case is normally between TBs within the same code buffer, and
984 * with the code buffer limited to 16MB we wouldn't need the long case.
985 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
987 static void tcg_out_goto(TCGContext *s, int cond, tcg_insn_unit *addr)
989 intptr_t addri = (intptr_t)addr;
990 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
992 if ((addri & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
993 tcg_out_b(s, cond, disp);
997 assert(use_armv5t_instructions || (addri & 1) == 0);
998 tcg_out_movi32(s, cond, TCG_REG_TMP, addri);
999 tcg_out_bx(s, cond, TCG_REG_TMP);
1002 /* The call case is mostly used for helpers - so it's not unreasonable
1003 * for them to be beyond branch range */
1004 static void tcg_out_call(TCGContext *s, tcg_insn_unit *addr)
1006 intptr_t addri = (intptr_t)addr;
1007 ptrdiff_t disp = tcg_pcrel_diff(s, addr);
1009 if (disp - 8 < 0x02000000 && disp - 8 >= -0x02000000) {
1011 /* Use BLX if the target is in Thumb mode */
1012 if (!use_armv5t_instructions) {
1015 tcg_out_blx_imm(s, disp);
1017 tcg_out_bl(s, COND_AL, disp);
1019 } else if (use_armv7_instructions) {
1020 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addri);
1021 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1023 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1024 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1025 tcg_out32(s, addri);
1029 void arm_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
1031 tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
1032 tcg_insn_unit *target = (tcg_insn_unit *)addr;
1034 /* we could use a ldr pc, [pc, #-4] kind of branch and avoid the flush */
1035 reloc_pc24_atomic(code_ptr, target);
1036 flush_icache_range(jmp_addr, jmp_addr + 4);
1039 static inline void tcg_out_goto_label(TCGContext *s, int cond, TCGLabel *l)
1042 tcg_out_goto(s, cond, l->u.value_ptr);
1044 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, l, 0);
1045 tcg_out_b_noaddr(s, cond);
1049 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1051 if (use_armv7_instructions) {
1052 tcg_out32(s, INSN_DMB_ISH);
1053 } else if (use_armv6_instructions) {
1054 tcg_out32(s, INSN_DMB_MCR);
1058 #ifdef CONFIG_SOFTMMU
1059 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1060 * int mmu_idx, uintptr_t ra)
1062 static void * const qemu_ld_helpers[16] = {
1063 [MO_UB] = helper_ret_ldub_mmu,
1064 [MO_SB] = helper_ret_ldsb_mmu,
1066 [MO_LEUW] = helper_le_lduw_mmu,
1067 [MO_LEUL] = helper_le_ldul_mmu,
1068 [MO_LEQ] = helper_le_ldq_mmu,
1069 [MO_LESW] = helper_le_ldsw_mmu,
1070 [MO_LESL] = helper_le_ldul_mmu,
1072 [MO_BEUW] = helper_be_lduw_mmu,
1073 [MO_BEUL] = helper_be_ldul_mmu,
1074 [MO_BEQ] = helper_be_ldq_mmu,
1075 [MO_BESW] = helper_be_ldsw_mmu,
1076 [MO_BESL] = helper_be_ldul_mmu,
1079 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1080 * uintxx_t val, int mmu_idx, uintptr_t ra)
1082 static void * const qemu_st_helpers[16] = {
1083 [MO_UB] = helper_ret_stb_mmu,
1084 [MO_LEUW] = helper_le_stw_mmu,
1085 [MO_LEUL] = helper_le_stl_mmu,
1086 [MO_LEQ] = helper_le_stq_mmu,
1087 [MO_BEUW] = helper_be_stw_mmu,
1088 [MO_BEUL] = helper_be_stl_mmu,
1089 [MO_BEQ] = helper_be_stq_mmu,
1092 /* Helper routines for marshalling helper function arguments into
1093 * the correct registers and stack.
1094 * argreg is where we want to put this argument, arg is the argument itself.
1095 * Return value is the updated argreg ready for the next call.
1096 * Note that argreg 0..3 is real registers, 4+ on stack.
1098 * We provide routines for arguments which are: immediate, 32 bit
1099 * value in register, 16 and 8 bit values in register (which must be zero
1100 * extended before use) and 64 bit value in a lo:hi register pair.
1102 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1103 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1106 MOV_ARG(s, COND_AL, argreg, arg); \
1108 int ofs = (argreg - 4) * 4; \
1110 tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1111 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1113 return argreg + 1; \
1116 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1117 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1118 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1119 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1120 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1121 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1122 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1124 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1125 TCGReg arglo, TCGReg arghi)
1127 /* 64 bit arguments must go in even/odd register pairs
1128 * and in 8-aligned stack slots.
1133 if (use_armv6_instructions && argreg >= 4
1134 && (arglo & 1) == 0 && arghi == arglo + 1) {
1135 tcg_out_strd_8(s, COND_AL, arglo,
1136 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1139 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1140 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1145 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1147 /* We're expecting to use an 8-bit immediate and to mask. */
1148 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1150 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1151 Using the offset of the second entry in the last tlb table ensures
1152 that we can index all of the elements of the first entry. */
1153 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1156 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1157 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1159 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1160 TCGMemOp opc, int mem_index, bool is_load)
1162 TCGReg base = TCG_AREG0;
1165 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1166 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1167 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1168 unsigned s_bits = opc & MO_SIZE;
1169 unsigned a_bits = get_alignment_bits(opc);
1171 /* Should generate something like the following:
1172 * shr tmp, addrlo, #TARGET_PAGE_BITS (1)
1173 * add r2, env, #high
1174 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1175 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1176 * ldr r0, [r2, #cmp] (4)
1177 * tst addrlo, #s_mask
1178 * ldr r2, [r2, #add] (5)
1179 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1181 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1182 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1184 /* We checked that the offset is contained within 16 bits above. */
1185 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1186 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1187 (24 << 7) | (cmp_off >> 8));
1189 add_off -= cmp_off & 0xff00;
1193 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1194 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1195 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1196 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1198 /* Load the tlb comparator. Use ldrd if needed and available,
1199 but due to how the pointer needs setting up, ldm isn't useful.
1200 Base arm5 doesn't have ldrd, but armv5te does. */
1201 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1202 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1204 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1205 if (TARGET_LONG_BITS == 64) {
1206 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1210 /* Check alignment. We don't support inline unaligned acceses,
1211 but we can easily support overalignment checks. */
1212 if (a_bits < s_bits) {
1216 tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
1219 /* Load the tlb addend. */
1220 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1222 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1223 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1225 if (TARGET_LONG_BITS == 64) {
1226 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1227 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1233 /* Record the context of a call to the out of line helper code for the slow
1234 path for a load or store, so that we can later generate the correct
1236 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1237 TCGReg datalo, TCGReg datahi, TCGReg addrlo,
1238 TCGReg addrhi, tcg_insn_unit *raddr,
1239 tcg_insn_unit *label_ptr)
1241 TCGLabelQemuLdst *label = new_ldst_label(s);
1243 label->is_ld = is_ld;
1245 label->datalo_reg = datalo;
1246 label->datahi_reg = datahi;
1247 label->addrlo_reg = addrlo;
1248 label->addrhi_reg = addrhi;
1249 label->raddr = raddr;
1250 label->label_ptr[0] = label_ptr;
1253 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1255 TCGReg argreg, datalo, datahi;
1256 TCGMemOpIdx oi = lb->oi;
1257 TCGMemOp opc = get_memop(oi);
1260 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1262 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1263 if (TARGET_LONG_BITS == 64) {
1264 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1266 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1268 argreg = tcg_out_arg_imm32(s, argreg, oi);
1269 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1271 /* For armv6 we can use the canonical unsigned helpers and minimize
1272 icache usage. For pre-armv6, use the signed helpers since we do
1273 not have a single insn sign-extend. */
1274 if (use_armv6_instructions) {
1275 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
1277 func = qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)];
1278 if (opc & MO_SIGN) {
1282 tcg_out_call(s, func);
1284 datalo = lb->datalo_reg;
1285 datahi = lb->datahi_reg;
1286 switch (opc & MO_SSIZE) {
1288 tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
1291 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
1294 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1297 if (datalo != TCG_REG_R1) {
1298 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1299 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1300 } else if (datahi != TCG_REG_R0) {
1301 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1302 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
1304 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1305 tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
1306 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
1311 tcg_out_goto(s, COND_AL, lb->raddr);
1314 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1316 TCGReg argreg, datalo, datahi;
1317 TCGMemOpIdx oi = lb->oi;
1318 TCGMemOp opc = get_memop(oi);
1320 reloc_pc24(lb->label_ptr[0], s->code_ptr);
1322 argreg = TCG_REG_R0;
1323 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1324 if (TARGET_LONG_BITS == 64) {
1325 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1327 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1330 datalo = lb->datalo_reg;
1331 datahi = lb->datahi_reg;
1332 switch (opc & MO_SIZE) {
1334 argreg = tcg_out_arg_reg8(s, argreg, datalo);
1337 argreg = tcg_out_arg_reg16(s, argreg, datalo);
1341 argreg = tcg_out_arg_reg32(s, argreg, datalo);
1344 argreg = tcg_out_arg_reg64(s, argreg, datalo, datahi);
1348 argreg = tcg_out_arg_imm32(s, argreg, oi);
1349 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1351 /* Tail-call to the helper, which will return to the fast path. */
1352 tcg_out_goto(s, COND_AL, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1354 #endif /* SOFTMMU */
1356 static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
1357 TCGReg datalo, TCGReg datahi,
1358 TCGReg addrlo, TCGReg addend)
1360 TCGMemOp bswap = opc & MO_BSWAP;
1362 switch (opc & MO_SSIZE) {
1364 tcg_out_ld8_r(s, COND_AL, datalo, addrlo, addend);
1367 tcg_out_ld8s_r(s, COND_AL, datalo, addrlo, addend);
1370 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1372 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1377 tcg_out_ld16u_r(s, COND_AL, datalo, addrlo, addend);
1378 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1380 tcg_out_ld16s_r(s, COND_AL, datalo, addrlo, addend);
1385 tcg_out_ld32_r(s, COND_AL, datalo, addrlo, addend);
1387 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1392 TCGReg dl = (bswap ? datahi : datalo);
1393 TCGReg dh = (bswap ? datalo : datahi);
1395 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1396 if (USING_SOFTMMU && use_armv6_instructions
1397 && (dl & 1) == 0 && dh == dl + 1) {
1398 tcg_out_ldrd_r(s, COND_AL, dl, addrlo, addend);
1399 } else if (dl != addend) {
1400 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addrlo);
1401 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1403 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1404 addend, addrlo, SHIFT_IMM_LSL(0));
1405 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1406 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1409 tcg_out_bswap32(s, COND_AL, dl, dl);
1410 tcg_out_bswap32(s, COND_AL, dh, dh);
1417 static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
1418 TCGReg datalo, TCGReg datahi,
1421 TCGMemOp bswap = opc & MO_BSWAP;
1423 switch (opc & MO_SSIZE) {
1425 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
1428 tcg_out_ld8s_8(s, COND_AL, datalo, addrlo, 0);
1431 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1433 tcg_out_bswap16(s, COND_AL, datalo, datalo);
1438 tcg_out_ld16u_8(s, COND_AL, datalo, addrlo, 0);
1439 tcg_out_bswap16s(s, COND_AL, datalo, datalo);
1441 tcg_out_ld16s_8(s, COND_AL, datalo, addrlo, 0);
1446 tcg_out_ld32_12(s, COND_AL, datalo, addrlo, 0);
1448 tcg_out_bswap32(s, COND_AL, datalo, datalo);
1453 TCGReg dl = (bswap ? datahi : datalo);
1454 TCGReg dh = (bswap ? datalo : datahi);
1456 /* Avoid ldrd for user-only emulation, to handle unaligned. */
1457 if (USING_SOFTMMU && use_armv6_instructions
1458 && (dl & 1) == 0 && dh == dl + 1) {
1459 tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
1460 } else if (dl == addrlo) {
1461 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1462 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1464 tcg_out_ld32_12(s, COND_AL, dl, addrlo, bswap ? 4 : 0);
1465 tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
1468 tcg_out_bswap32(s, COND_AL, dl, dl);
1469 tcg_out_bswap32(s, COND_AL, dh, dh);
1476 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
1478 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1481 #ifdef CONFIG_SOFTMMU
1484 tcg_insn_unit *label_ptr;
1488 datahi = (is64 ? *args++ : 0);
1490 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1492 opc = get_memop(oi);
1494 #ifdef CONFIG_SOFTMMU
1495 mem_index = get_mmuidx(oi);
1496 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
1498 /* This a conditional BL only to load a pointer within this opcode into LR
1499 for the slow path. We will not be using the value for a tail call. */
1500 label_ptr = s->code_ptr;
1501 tcg_out_bl_noaddr(s, COND_NE);
1503 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend);
1505 add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
1506 s->code_ptr, label_ptr);
1507 #else /* !CONFIG_SOFTMMU */
1509 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1510 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, TCG_REG_TMP);
1512 tcg_out_qemu_ld_direct(s, opc, datalo, datahi, addrlo);
1517 static inline void tcg_out_qemu_st_index(TCGContext *s, int cond, TCGMemOp opc,
1518 TCGReg datalo, TCGReg datahi,
1519 TCGReg addrlo, TCGReg addend)
1521 TCGMemOp bswap = opc & MO_BSWAP;
1523 switch (opc & MO_SIZE) {
1525 tcg_out_st8_r(s, cond, datalo, addrlo, addend);
1529 tcg_out_bswap16st(s, cond, TCG_REG_R0, datalo);
1530 tcg_out_st16_r(s, cond, TCG_REG_R0, addrlo, addend);
1532 tcg_out_st16_r(s, cond, datalo, addrlo, addend);
1538 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1539 tcg_out_st32_r(s, cond, TCG_REG_R0, addrlo, addend);
1541 tcg_out_st32_r(s, cond, datalo, addrlo, addend);
1545 /* Avoid strd for user-only emulation, to handle unaligned. */
1547 tcg_out_bswap32(s, cond, TCG_REG_R0, datahi);
1548 tcg_out_st32_rwb(s, cond, TCG_REG_R0, addend, addrlo);
1549 tcg_out_bswap32(s, cond, TCG_REG_R0, datalo);
1550 tcg_out_st32_12(s, cond, TCG_REG_R0, addend, 4);
1551 } else if (USING_SOFTMMU && use_armv6_instructions
1552 && (datalo & 1) == 0 && datahi == datalo + 1) {
1553 tcg_out_strd_r(s, cond, datalo, addrlo, addend);
1555 tcg_out_st32_rwb(s, cond, datalo, addend, addrlo);
1556 tcg_out_st32_12(s, cond, datahi, addend, 4);
1562 static inline void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp opc,
1563 TCGReg datalo, TCGReg datahi,
1566 TCGMemOp bswap = opc & MO_BSWAP;
1568 switch (opc & MO_SIZE) {
1570 tcg_out_st8_12(s, COND_AL, datalo, addrlo, 0);
1574 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, datalo);
1575 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addrlo, 0);
1577 tcg_out_st16_8(s, COND_AL, datalo, addrlo, 0);
1583 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1584 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1586 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1590 /* Avoid strd for user-only emulation, to handle unaligned. */
1592 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datahi);
1593 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 0);
1594 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, datalo);
1595 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addrlo, 4);
1596 } else if (USING_SOFTMMU && use_armv6_instructions
1597 && (datalo & 1) == 0 && datahi == datalo + 1) {
1598 tcg_out_strd_8(s, COND_AL, datalo, addrlo, 0);
1600 tcg_out_st32_12(s, COND_AL, datalo, addrlo, 0);
1601 tcg_out_st32_12(s, COND_AL, datahi, addrlo, 4);
1607 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
1609 TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
1612 #ifdef CONFIG_SOFTMMU
1615 tcg_insn_unit *label_ptr;
1619 datahi = (is64 ? *args++ : 0);
1621 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1623 opc = get_memop(oi);
1625 #ifdef CONFIG_SOFTMMU
1626 mem_index = get_mmuidx(oi);
1627 addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
1629 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
1631 /* The conditional call must come last, as we're going to return here. */
1632 label_ptr = s->code_ptr;
1633 tcg_out_bl_noaddr(s, COND_NE);
1635 add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
1636 s->code_ptr, label_ptr);
1637 #else /* !CONFIG_SOFTMMU */
1639 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP, guest_base);
1640 tcg_out_qemu_st_index(s, COND_AL, opc, datalo,
1641 datahi, addrlo, TCG_REG_TMP);
1643 tcg_out_qemu_st_direct(s, opc, datalo, datahi, addrlo);
1648 static tcg_insn_unit *tb_ret_addr;
1650 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1651 const TCGArg *args, const int *const_args)
1653 TCGArg a0, a1, a2, a3, a4, a5;
1657 case INDEX_op_exit_tb:
1658 /* Reuse the zeroing that exists for goto_ptr. */
1661 tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
1663 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1664 tcg_out_goto(s, COND_AL, tb_ret_addr);
1667 case INDEX_op_goto_tb:
1668 if (s->tb_jmp_insn_offset) {
1669 /* Direct jump method */
1670 s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s);
1671 tcg_out_b_noaddr(s, COND_AL);
1673 /* Indirect jump method */
1674 intptr_t ptr = (intptr_t)(s->tb_jmp_target_addr + args[0]);
1675 tcg_out_movi32(s, COND_AL, TCG_REG_R0, ptr & ~0xfff);
1676 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, ptr & 0xfff);
1678 s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
1680 case INDEX_op_goto_ptr:
1681 tcg_out_bx(s, COND_AL, args[0]);
1684 tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
1687 case INDEX_op_ld8u_i32:
1688 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1690 case INDEX_op_ld8s_i32:
1691 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1693 case INDEX_op_ld16u_i32:
1694 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1696 case INDEX_op_ld16s_i32:
1697 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1699 case INDEX_op_ld_i32:
1700 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1702 case INDEX_op_st8_i32:
1703 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1705 case INDEX_op_st16_i32:
1706 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1708 case INDEX_op_st_i32:
1709 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1712 case INDEX_op_movcond_i32:
1713 /* Constraints mean that v2 is always in the same register as dest,
1714 * so we only need to do "if condition passed, move v1 to dest".
1716 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1717 args[1], args[2], const_args[2]);
1718 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1719 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1721 case INDEX_op_add_i32:
1722 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1723 args[0], args[1], args[2], const_args[2]);
1725 case INDEX_op_sub_i32:
1726 if (const_args[1]) {
1727 if (const_args[2]) {
1728 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1730 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1731 args[0], args[2], args[1], 1);
1734 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1735 args[0], args[1], args[2], const_args[2]);
1738 case INDEX_op_and_i32:
1739 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1740 args[0], args[1], args[2], const_args[2]);
1742 case INDEX_op_andc_i32:
1743 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1744 args[0], args[1], args[2], const_args[2]);
1746 case INDEX_op_or_i32:
1749 case INDEX_op_xor_i32:
1753 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1755 case INDEX_op_add2_i32:
1756 a0 = args[0], a1 = args[1], a2 = args[2];
1757 a3 = args[3], a4 = args[4], a5 = args[5];
1758 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1761 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1762 a0, a2, a4, const_args[4]);
1763 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1764 a1, a3, a5, const_args[5]);
1765 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1767 case INDEX_op_sub2_i32:
1768 a0 = args[0], a1 = args[1], a2 = args[2];
1769 a3 = args[3], a4 = args[4], a5 = args[5];
1770 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1773 if (const_args[2]) {
1774 if (const_args[4]) {
1775 tcg_out_movi32(s, COND_AL, a0, a4);
1778 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1780 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1781 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1783 if (const_args[3]) {
1784 if (const_args[5]) {
1785 tcg_out_movi32(s, COND_AL, a1, a5);
1788 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1790 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1791 a1, a3, a5, const_args[5]);
1793 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1795 case INDEX_op_neg_i32:
1796 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1798 case INDEX_op_not_i32:
1799 tcg_out_dat_reg(s, COND_AL,
1800 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1802 case INDEX_op_mul_i32:
1803 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1805 case INDEX_op_mulu2_i32:
1806 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1808 case INDEX_op_muls2_i32:
1809 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1811 /* XXX: Perhaps args[2] & 0x1f is wrong */
1812 case INDEX_op_shl_i32:
1814 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1816 case INDEX_op_shr_i32:
1817 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1818 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1820 case INDEX_op_sar_i32:
1821 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1822 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1824 case INDEX_op_rotr_i32:
1825 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1826 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1829 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1832 case INDEX_op_rotl_i32:
1833 if (const_args[2]) {
1834 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1835 ((0x20 - args[2]) & 0x1f) ?
1836 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1839 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
1840 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1841 SHIFT_REG_ROR(TCG_REG_TMP));
1845 case INDEX_op_ctz_i32:
1846 tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
1850 case INDEX_op_clz_i32:
1856 if (c && a2 == 32) {
1857 tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
1860 tcg_out_dat_imm(s, COND_AL, ARITH_CMP, 0, a1, 0);
1861 tcg_out_dat_reg(s, COND_NE, INSN_CLZ, a0, 0, a1, 0);
1862 if (c || a0 != a2) {
1863 tcg_out_dat_rIK(s, COND_EQ, ARITH_MOV, ARITH_MVN, a0, 0, a2, c);
1867 case INDEX_op_brcond_i32:
1868 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1869 args[0], args[1], const_args[1]);
1870 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
1871 arg_label(args[3]));
1873 case INDEX_op_brcond2_i32:
1874 /* The resulting conditions are:
1875 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1876 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1877 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1878 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1879 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1880 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1882 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1883 args[1], args[3], const_args[3]);
1884 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1885 args[0], args[2], const_args[2]);
1886 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]],
1887 arg_label(args[5]));
1889 case INDEX_op_setcond_i32:
1890 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1891 args[1], args[2], const_args[2]);
1892 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1893 ARITH_MOV, args[0], 0, 1);
1894 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1895 ARITH_MOV, args[0], 0, 0);
1897 case INDEX_op_setcond2_i32:
1898 /* See brcond2_i32 comment */
1899 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1900 args[2], args[4], const_args[4]);
1901 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1902 args[1], args[3], const_args[3]);
1903 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1904 ARITH_MOV, args[0], 0, 1);
1905 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1906 ARITH_MOV, args[0], 0, 0);
1909 case INDEX_op_qemu_ld_i32:
1910 tcg_out_qemu_ld(s, args, 0);
1912 case INDEX_op_qemu_ld_i64:
1913 tcg_out_qemu_ld(s, args, 1);
1915 case INDEX_op_qemu_st_i32:
1916 tcg_out_qemu_st(s, args, 0);
1918 case INDEX_op_qemu_st_i64:
1919 tcg_out_qemu_st(s, args, 1);
1922 case INDEX_op_bswap16_i32:
1923 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1925 case INDEX_op_bswap32_i32:
1926 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1929 case INDEX_op_ext8s_i32:
1930 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1932 case INDEX_op_ext16s_i32:
1933 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1935 case INDEX_op_ext16u_i32:
1936 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1939 case INDEX_op_deposit_i32:
1940 tcg_out_deposit(s, COND_AL, args[0], args[2],
1941 args[3], args[4], const_args[2]);
1943 case INDEX_op_extract_i32:
1944 tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
1946 case INDEX_op_sextract_i32:
1947 tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
1950 case INDEX_op_div_i32:
1951 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1953 case INDEX_op_divu_i32:
1954 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1958 tcg_out_mb(s, args[0]);
1961 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1962 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1963 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1969 static const TCGTargetOpDef arm_op_defs[] = {
1970 { INDEX_op_exit_tb, { } },
1971 { INDEX_op_goto_tb, { } },
1972 { INDEX_op_br, { } },
1973 { INDEX_op_goto_ptr, { "r" } },
1975 { INDEX_op_ld8u_i32, { "r", "r" } },
1976 { INDEX_op_ld8s_i32, { "r", "r" } },
1977 { INDEX_op_ld16u_i32, { "r", "r" } },
1978 { INDEX_op_ld16s_i32, { "r", "r" } },
1979 { INDEX_op_ld_i32, { "r", "r" } },
1980 { INDEX_op_st8_i32, { "r", "r" } },
1981 { INDEX_op_st16_i32, { "r", "r" } },
1982 { INDEX_op_st_i32, { "r", "r" } },
1984 /* TODO: "r", "r", "ri" */
1985 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1986 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1987 { INDEX_op_mul_i32, { "r", "r", "r" } },
1988 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1989 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1990 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1991 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1992 { INDEX_op_or_i32, { "r", "r", "rI" } },
1993 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1994 { INDEX_op_neg_i32, { "r", "r" } },
1995 { INDEX_op_not_i32, { "r", "r" } },
1997 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1998 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1999 { INDEX_op_sar_i32, { "r", "r", "ri" } },
2000 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
2001 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
2002 { INDEX_op_clz_i32, { "r", "r", "rIK" } },
2003 { INDEX_op_ctz_i32, { "r", "r", "rIK" } },
2005 { INDEX_op_brcond_i32, { "r", "rIN" } },
2006 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
2007 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
2009 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
2010 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
2011 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
2012 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
2014 #if TARGET_LONG_BITS == 32
2015 { INDEX_op_qemu_ld_i32, { "r", "l" } },
2016 { INDEX_op_qemu_ld_i64, { "r", "r", "l" } },
2017 { INDEX_op_qemu_st_i32, { "s", "s" } },
2018 { INDEX_op_qemu_st_i64, { "s", "s", "s" } },
2020 { INDEX_op_qemu_ld_i32, { "r", "l", "l" } },
2021 { INDEX_op_qemu_ld_i64, { "r", "r", "l", "l" } },
2022 { INDEX_op_qemu_st_i32, { "s", "s", "s" } },
2023 { INDEX_op_qemu_st_i64, { "s", "s", "s", "s" } },
2026 { INDEX_op_bswap16_i32, { "r", "r" } },
2027 { INDEX_op_bswap32_i32, { "r", "r" } },
2029 { INDEX_op_ext8s_i32, { "r", "r" } },
2030 { INDEX_op_ext16s_i32, { "r", "r" } },
2031 { INDEX_op_ext16u_i32, { "r", "r" } },
2033 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2034 { INDEX_op_extract_i32, { "r", "r" } },
2035 { INDEX_op_sextract_i32, { "r", "r" } },
2037 { INDEX_op_div_i32, { "r", "r", "r" } },
2038 { INDEX_op_divu_i32, { "r", "r", "r" } },
2040 { INDEX_op_mb, { } },
2044 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
2046 int i, n = ARRAY_SIZE(arm_op_defs);
2048 for (i = 0; i < n; ++i) {
2049 if (arm_op_defs[i].op == op) {
2050 return &arm_op_defs[i];
2056 static void tcg_target_init(TCGContext *s)
2058 /* Only probe for the platform and capabilities if we havn't already
2059 determined maximum values at compile time. */
2060 #ifndef use_idiv_instructions
2062 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
2063 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2066 if (__ARM_ARCH < 7) {
2067 const char *pl = (const char *)qemu_getauxval(AT_PLATFORM);
2068 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2069 arm_arch = pl[1] - '0';
2073 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2074 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2079 (1 << TCG_REG_R12) |
2080 (1 << TCG_REG_R14));
2082 tcg_regset_clear(s->reserved_regs);
2083 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2084 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2085 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2088 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2089 TCGReg arg1, intptr_t arg2)
2091 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2094 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2095 TCGReg arg1, intptr_t arg2)
2097 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2100 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
2101 TCGReg base, intptr_t ofs)
2106 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2107 TCGReg ret, TCGReg arg)
2109 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2112 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2113 TCGReg ret, tcg_target_long arg)
2115 tcg_out_movi32(s, COND_AL, ret, arg);
2118 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2119 and tcg_register_jit. */
2121 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2123 #define FRAME_SIZE \
2125 + TCG_STATIC_CALL_ARGS_SIZE \
2126 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2127 + TCG_TARGET_STACK_ALIGN - 1) \
2128 & -TCG_TARGET_STACK_ALIGN)
2130 static void tcg_target_qemu_prologue(TCGContext *s)
2134 /* Calling convention requires us to save r4-r11 and lr. */
2135 /* stmdb sp!, { r4 - r11, lr } */
2136 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2138 /* Reserve callee argument and tcg temp space. */
2139 stack_addend = FRAME_SIZE - PUSH_SIZE;
2141 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2142 TCG_REG_CALL_STACK, stack_addend, 1);
2143 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2144 CPU_TEMP_BUF_NLONGS * sizeof(long));
2146 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2148 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2151 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2152 * and fall through to the rest of the epilogue.
2154 s->code_gen_epilogue = s->code_ptr;
2155 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
2158 tb_ret_addr = s->code_ptr;
2159 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2160 TCG_REG_CALL_STACK, stack_addend, 1);
2162 /* ldmia sp!, { r4 - r11, pc } */
2163 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2168 uint8_t fde_def_cfa[4];
2169 uint8_t fde_reg_ofs[18];
2172 #define ELF_HOST_MACHINE EM_ARM
2174 /* We're expecting a 2 byte uleb128 encoded value. */
2175 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2177 static const DebugFrame debug_frame = {
2178 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2181 .h.cie.code_align = 1,
2182 .h.cie.data_align = 0x7c, /* sleb128 -4 */
2183 .h.cie.return_column = 14,
2185 /* Total FDE size does not include the "len" member. */
2186 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2189 12, 13, /* DW_CFA_def_cfa sp, ... */
2190 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2194 /* The following must match the stmdb in the prologue. */
2195 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2196 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2197 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2198 0x89, 4, /* DW_CFA_offset, r9, -16 */
2199 0x88, 5, /* DW_CFA_offset, r8, -20 */
2200 0x87, 6, /* DW_CFA_offset, r7, -24 */
2201 0x86, 7, /* DW_CFA_offset, r6, -28 */
2202 0x85, 8, /* DW_CFA_offset, r5, -32 */
2203 0x84, 9, /* DW_CFA_offset, r4, -36 */
2207 void tcg_register_jit(void *buf, size_t buf_size)
2209 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));