2 * Tiny Code Generator for QEMU
4 * Copyright (c) 2008 Andrzej Zaborowski
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "tcg-be-ldst.h"
27 /* The __ARM_ARCH define is provided by gcc 4.8. Construct it otherwise. */
29 # if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
30 || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
31 || defined(__ARM_ARCH_7EM__)
33 # elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
34 || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
35 || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
37 # elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5E__) \
38 || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
39 || defined(__ARM_ARCH_5TEJ__)
46 static int arm_arch = __ARM_ARCH;
48 #if defined(__ARM_ARCH_5T__) \
49 || defined(__ARM_ARCH_5TE__) || defined(__ARM_ARCH_5TEJ__)
50 # define use_armv5t_instructions 1
52 # define use_armv5t_instructions use_armv6_instructions
55 #define use_armv6_instructions (__ARM_ARCH >= 6 || arm_arch >= 6)
56 #define use_armv7_instructions (__ARM_ARCH >= 7 || arm_arch >= 7)
58 #ifndef use_idiv_instructions
59 bool use_idiv_instructions;
61 #ifdef CONFIG_GETAUXVAL
62 # include <sys/auxv.h>
66 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
86 static const int tcg_target_reg_alloc_order[] = {
104 static const int tcg_target_call_iarg_regs[4] = {
105 TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R3
107 static const int tcg_target_call_oarg_regs[2] = {
108 TCG_REG_R0, TCG_REG_R1
111 #define TCG_REG_TMP TCG_REG_R12
113 static inline void reloc_abs32(void *code_ptr, intptr_t target)
115 *(uint32_t *) code_ptr = target;
118 static inline void reloc_pc24(void *code_ptr, intptr_t target)
120 uint32_t offset = ((target - ((intptr_t)code_ptr + 8)) >> 2);
122 *(uint32_t *) code_ptr = ((*(uint32_t *) code_ptr) & ~0xffffff)
123 | (offset & 0xffffff);
126 static void patch_reloc(uint8_t *code_ptr, int type,
127 intptr_t value, intptr_t addend)
131 reloc_abs32(code_ptr, value);
140 reloc_pc24(code_ptr, value);
145 #define TCG_CT_CONST_ARM 0x100
146 #define TCG_CT_CONST_INV 0x200
147 #define TCG_CT_CONST_NEG 0x400
148 #define TCG_CT_CONST_ZERO 0x800
150 /* parse target specific constraints */
151 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
158 ct->ct |= TCG_CT_CONST_ARM;
161 ct->ct |= TCG_CT_CONST_INV;
163 case 'N': /* The gcc constraint letter is L, already used here. */
164 ct->ct |= TCG_CT_CONST_NEG;
167 ct->ct |= TCG_CT_CONST_ZERO;
171 ct->ct |= TCG_CT_REG;
172 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
175 /* qemu_ld address */
177 ct->ct |= TCG_CT_REG;
178 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
179 #ifdef CONFIG_SOFTMMU
180 /* r0-r2,lr will be overwritten when reading the tlb entry,
181 so don't use these. */
182 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
183 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
184 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
185 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
189 /* qemu_st address & data_reg */
191 ct->ct |= TCG_CT_REG;
192 tcg_regset_set32(ct->u.regs, 0, (1 << TCG_TARGET_NB_REGS) - 1);
193 /* r0-r2 will be overwritten when reading the tlb entry (softmmu only)
194 and r0-r1 doing the byte swapping, so don't use these. */
195 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
196 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
197 #if defined(CONFIG_SOFTMMU)
198 /* Avoid clashes with registers being used for helper args */
199 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
200 #if TARGET_LONG_BITS == 64
201 /* Avoid clashes with registers being used for helper args */
202 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
204 tcg_regset_reset_reg(ct->u.regs, TCG_REG_R14);
217 static inline uint32_t rotl(uint32_t val, int n)
219 return (val << n) | (val >> (32 - n));
222 /* ARM immediates for ALU instructions are made of an unsigned 8-bit
223 right-rotated by an even amount between 0 and 30. */
224 static inline int encode_imm(uint32_t imm)
228 /* simple case, only lower bits */
229 if ((imm & ~0xff) == 0)
231 /* then try a simple even shift */
232 shift = ctz32(imm) & ~1;
233 if (((imm >> shift) & ~0xff) == 0)
235 /* now try harder with rotations */
236 if ((rotl(imm, 2) & ~0xff) == 0)
238 if ((rotl(imm, 4) & ~0xff) == 0)
240 if ((rotl(imm, 6) & ~0xff) == 0)
242 /* imm can't be encoded */
246 static inline int check_fit_imm(uint32_t imm)
248 return encode_imm(imm) >= 0;
251 /* Test if a constant matches the constraint.
252 * TODO: define constraints for:
254 * ldr/str offset: between -0xfff and 0xfff
255 * ldrh/strh offset: between -0xff and 0xff
256 * mov operand2: values represented with x << (2 * y), x < 0x100
257 * add, sub, eor...: ditto
259 static inline int tcg_target_const_match(tcg_target_long val,
260 const TCGArgConstraint *arg_ct)
264 if (ct & TCG_CT_CONST) {
266 } else if ((ct & TCG_CT_CONST_ARM) && check_fit_imm(val)) {
268 } else if ((ct & TCG_CT_CONST_INV) && check_fit_imm(~val)) {
270 } else if ((ct & TCG_CT_CONST_NEG) && check_fit_imm(-val)) {
272 } else if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
279 #define TO_CPSR (1 << 20)
282 ARITH_AND = 0x0 << 21,
283 ARITH_EOR = 0x1 << 21,
284 ARITH_SUB = 0x2 << 21,
285 ARITH_RSB = 0x3 << 21,
286 ARITH_ADD = 0x4 << 21,
287 ARITH_ADC = 0x5 << 21,
288 ARITH_SBC = 0x6 << 21,
289 ARITH_RSC = 0x7 << 21,
290 ARITH_TST = 0x8 << 21 | TO_CPSR,
291 ARITH_CMP = 0xa << 21 | TO_CPSR,
292 ARITH_CMN = 0xb << 21 | TO_CPSR,
293 ARITH_ORR = 0xc << 21,
294 ARITH_MOV = 0xd << 21,
295 ARITH_BIC = 0xe << 21,
296 ARITH_MVN = 0xf << 21,
298 INSN_LDR_IMM = 0x04100000,
299 INSN_LDR_REG = 0x06100000,
300 INSN_STR_IMM = 0x04000000,
301 INSN_STR_REG = 0x06000000,
303 INSN_LDRH_IMM = 0x005000b0,
304 INSN_LDRH_REG = 0x001000b0,
305 INSN_LDRSH_IMM = 0x005000f0,
306 INSN_LDRSH_REG = 0x001000f0,
307 INSN_STRH_IMM = 0x004000b0,
308 INSN_STRH_REG = 0x000000b0,
310 INSN_LDRB_IMM = 0x04500000,
311 INSN_LDRB_REG = 0x06500000,
312 INSN_LDRSB_IMM = 0x005000d0,
313 INSN_LDRSB_REG = 0x001000d0,
314 INSN_STRB_IMM = 0x04400000,
315 INSN_STRB_REG = 0x06400000,
317 INSN_LDRD_IMM = 0x004000d0,
318 INSN_LDRD_REG = 0x000000d0,
319 INSN_STRD_IMM = 0x004000f0,
320 INSN_STRD_REG = 0x000000f0,
323 #define SHIFT_IMM_LSL(im) (((im) << 7) | 0x00)
324 #define SHIFT_IMM_LSR(im) (((im) << 7) | 0x20)
325 #define SHIFT_IMM_ASR(im) (((im) << 7) | 0x40)
326 #define SHIFT_IMM_ROR(im) (((im) << 7) | 0x60)
327 #define SHIFT_REG_LSL(rs) (((rs) << 8) | 0x10)
328 #define SHIFT_REG_LSR(rs) (((rs) << 8) | 0x30)
329 #define SHIFT_REG_ASR(rs) (((rs) << 8) | 0x50)
330 #define SHIFT_REG_ROR(rs) (((rs) << 8) | 0x70)
332 enum arm_cond_code_e {
335 COND_CS = 0x2, /* Unsigned greater or equal */
336 COND_CC = 0x3, /* Unsigned less than */
337 COND_MI = 0x4, /* Negative */
338 COND_PL = 0x5, /* Zero or greater */
339 COND_VS = 0x6, /* Overflow */
340 COND_VC = 0x7, /* No overflow */
341 COND_HI = 0x8, /* Unsigned greater than */
342 COND_LS = 0x9, /* Unsigned less or equal */
350 static const uint8_t tcg_cond_to_arm_cond[] = {
351 [TCG_COND_EQ] = COND_EQ,
352 [TCG_COND_NE] = COND_NE,
353 [TCG_COND_LT] = COND_LT,
354 [TCG_COND_GE] = COND_GE,
355 [TCG_COND_LE] = COND_LE,
356 [TCG_COND_GT] = COND_GT,
358 [TCG_COND_LTU] = COND_CC,
359 [TCG_COND_GEU] = COND_CS,
360 [TCG_COND_LEU] = COND_LS,
361 [TCG_COND_GTU] = COND_HI,
364 static inline void tcg_out_bx(TCGContext *s, int cond, int rn)
366 tcg_out32(s, (cond << 28) | 0x012fff10 | rn);
369 static inline void tcg_out_b(TCGContext *s, int cond, int32_t offset)
371 tcg_out32(s, (cond << 28) | 0x0a000000 |
372 (((offset - 8) >> 2) & 0x00ffffff));
375 static inline void tcg_out_b_noaddr(TCGContext *s, int cond)
377 /* We pay attention here to not modify the branch target by skipping
378 the corresponding bytes. This ensure that caches and memory are
379 kept coherent during retranslation. */
381 tcg_out8(s, (cond << 4) | 0x0a);
384 static inline void tcg_out_bl_noaddr(TCGContext *s, int cond)
386 /* We pay attention here to not modify the branch target by skipping
387 the corresponding bytes. This ensure that caches and memory are
388 kept coherent during retranslation. */
390 tcg_out8(s, (cond << 4) | 0x0b);
393 static inline void tcg_out_bl(TCGContext *s, int cond, int32_t offset)
395 tcg_out32(s, (cond << 28) | 0x0b000000 |
396 (((offset - 8) >> 2) & 0x00ffffff));
399 static inline void tcg_out_blx(TCGContext *s, int cond, int rn)
401 tcg_out32(s, (cond << 28) | 0x012fff30 | rn);
404 static inline void tcg_out_blx_imm(TCGContext *s, int32_t offset)
406 tcg_out32(s, 0xfa000000 | ((offset & 2) << 23) |
407 (((offset - 8) >> 2) & 0x00ffffff));
410 static inline void tcg_out_dat_reg(TCGContext *s,
411 int cond, int opc, int rd, int rn, int rm, int shift)
413 tcg_out32(s, (cond << 28) | (0 << 25) | opc |
414 (rn << 16) | (rd << 12) | shift | rm);
417 static inline void tcg_out_nop(TCGContext *s)
419 if (use_armv7_instructions) {
420 /* Architected nop introduced in v6k. */
421 /* ??? This is an MSR (imm) 0,0,0 insn. Anyone know if this
422 also Just So Happened to do nothing on pre-v6k so that we
423 don't need to conditionalize it? */
424 tcg_out32(s, 0xe320f000);
426 /* Prior to that the assembler uses mov r0, r0. */
427 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, 0, 0, 0, SHIFT_IMM_LSL(0));
431 static inline void tcg_out_mov_reg(TCGContext *s, int cond, int rd, int rm)
433 /* Simple reg-reg move, optimising out the 'do nothing' case */
435 tcg_out_dat_reg(s, cond, ARITH_MOV, rd, 0, rm, SHIFT_IMM_LSL(0));
439 static inline void tcg_out_dat_imm(TCGContext *s,
440 int cond, int opc, int rd, int rn, int im)
442 tcg_out32(s, (cond << 28) | (1 << 25) | opc |
443 (rn << 16) | (rd << 12) | im);
446 static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg)
450 /* For armv7, make sure not to use movw+movt when mov/mvn would do.
451 Speed things up by only checking when movt would be required.
452 Prior to armv7, have one go at fully rotated immediates before
453 doing the decomposition thing below. */
454 if (!use_armv7_instructions || (arg & 0xffff0000)) {
455 rot = encode_imm(arg);
457 tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0,
458 rotl(arg, rot) | (rot << 7));
461 rot = encode_imm(~arg);
463 tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0,
464 rotl(~arg, rot) | (rot << 7));
469 /* Use movw + movt. */
470 if (use_armv7_instructions) {
472 tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12)
473 | ((arg << 4) & 0x000f0000) | (arg & 0xfff));
474 if (arg & 0xffff0000) {
476 tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12)
477 | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff));
482 /* TODO: This is very suboptimal, we can easily have a constant
483 pool somewhere after all the instructions. */
486 /* If we have lots of leading 1's, we can shorten the sequence by
487 beginning with mvn and then clearing higher bits with eor. */
488 if (clz32(~arg) > clz32(arg)) {
489 opc = ARITH_MVN, arg = ~arg;
492 int i = ctz32(arg) & ~1;
493 rot = ((32 - i) << 7) & 0xf00;
494 tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot);
502 static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst,
503 TCGArg lhs, TCGArg rhs, int rhs_is_const)
505 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
506 * rhs must satisfy the "rI" constraint.
509 int rot = encode_imm(rhs);
511 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
513 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
517 static void tcg_out_dat_rIK(TCGContext *s, int cond, int opc, int opinv,
518 TCGReg dst, TCGReg lhs, TCGArg rhs,
521 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
522 * rhs must satisfy the "rIK" constraint.
525 int rot = encode_imm(rhs);
528 rot = encode_imm(rhs);
532 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
534 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
538 static void tcg_out_dat_rIN(TCGContext *s, int cond, int opc, int opneg,
539 TCGArg dst, TCGArg lhs, TCGArg rhs,
542 /* Emit either the reg,imm or reg,reg form of a data-processing insn.
543 * rhs must satisfy the "rIN" constraint.
546 int rot = encode_imm(rhs);
549 rot = encode_imm(rhs);
553 tcg_out_dat_imm(s, cond, opc, dst, lhs, rotl(rhs, rot) | (rot << 7));
555 tcg_out_dat_reg(s, cond, opc, dst, lhs, rhs, SHIFT_IMM_LSL(0));
559 static inline void tcg_out_mul32(TCGContext *s, int cond, TCGReg rd,
560 TCGReg rn, TCGReg rm)
562 /* if ArchVersion() < 6 && d == n then UNPREDICTABLE; */
563 if (!use_armv6_instructions && rd == rn) {
565 /* rd == rn == rm; copy an input to tmp first. */
566 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
567 rm = rn = TCG_REG_TMP;
574 tcg_out32(s, (cond << 28) | 0x90 | (rd << 16) | (rm << 8) | rn);
577 static inline void tcg_out_umull32(TCGContext *s, int cond, TCGReg rd0,
578 TCGReg rd1, TCGReg rn, TCGReg rm)
580 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
581 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
582 if (rd0 == rm || rd1 == rm) {
583 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
592 tcg_out32(s, (cond << 28) | 0x00800090 |
593 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
596 static inline void tcg_out_smull32(TCGContext *s, int cond, TCGReg rd0,
597 TCGReg rd1, TCGReg rn, TCGReg rm)
599 /* if ArchVersion() < 6 && (dHi == n || dLo == n) then UNPREDICTABLE; */
600 if (!use_armv6_instructions && (rd0 == rn || rd1 == rn)) {
601 if (rd0 == rm || rd1 == rm) {
602 tcg_out_mov_reg(s, cond, TCG_REG_TMP, rn);
611 tcg_out32(s, (cond << 28) | 0x00c00090 |
612 (rd1 << 16) | (rd0 << 12) | (rm << 8) | rn);
615 static inline void tcg_out_sdiv(TCGContext *s, int cond, int rd, int rn, int rm)
617 tcg_out32(s, 0x0710f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
620 static inline void tcg_out_udiv(TCGContext *s, int cond, int rd, int rn, int rm)
622 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
625 static inline void tcg_out_ext8s(TCGContext *s, int cond,
628 if (use_armv6_instructions) {
630 tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
632 tcg_out_dat_reg(s, cond, ARITH_MOV,
633 rd, 0, rn, SHIFT_IMM_LSL(24));
634 tcg_out_dat_reg(s, cond, ARITH_MOV,
635 rd, 0, rd, SHIFT_IMM_ASR(24));
639 static inline void tcg_out_ext8u(TCGContext *s, int cond,
642 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
645 static inline void tcg_out_ext16s(TCGContext *s, int cond,
648 if (use_armv6_instructions) {
650 tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
652 tcg_out_dat_reg(s, cond, ARITH_MOV,
653 rd, 0, rn, SHIFT_IMM_LSL(16));
654 tcg_out_dat_reg(s, cond, ARITH_MOV,
655 rd, 0, rd, SHIFT_IMM_ASR(16));
659 static inline void tcg_out_ext16u(TCGContext *s, int cond,
662 if (use_armv6_instructions) {
664 tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
666 tcg_out_dat_reg(s, cond, ARITH_MOV,
667 rd, 0, rn, SHIFT_IMM_LSL(16));
668 tcg_out_dat_reg(s, cond, ARITH_MOV,
669 rd, 0, rd, SHIFT_IMM_LSR(16));
673 static inline void tcg_out_bswap16s(TCGContext *s, int cond, int rd, int rn)
675 if (use_armv6_instructions) {
677 tcg_out32(s, 0x06ff0fb0 | (cond << 28) | (rd << 12) | rn);
679 tcg_out_dat_reg(s, cond, ARITH_MOV,
680 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
681 tcg_out_dat_reg(s, cond, ARITH_MOV,
682 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_ASR(16));
683 tcg_out_dat_reg(s, cond, ARITH_ORR,
684 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
688 static inline void tcg_out_bswap16(TCGContext *s, int cond, int rd, int rn)
690 if (use_armv6_instructions) {
692 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
694 tcg_out_dat_reg(s, cond, ARITH_MOV,
695 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSL(24));
696 tcg_out_dat_reg(s, cond, ARITH_MOV,
697 TCG_REG_TMP, 0, TCG_REG_TMP, SHIFT_IMM_LSR(16));
698 tcg_out_dat_reg(s, cond, ARITH_ORR,
699 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSR(8));
703 /* swap the two low bytes assuming that the two high input bytes and the
704 two high output bit can hold any value. */
705 static inline void tcg_out_bswap16st(TCGContext *s, int cond, int rd, int rn)
707 if (use_armv6_instructions) {
709 tcg_out32(s, 0x06bf0fb0 | (cond << 28) | (rd << 12) | rn);
711 tcg_out_dat_reg(s, cond, ARITH_MOV,
712 TCG_REG_TMP, 0, rn, SHIFT_IMM_LSR(8));
713 tcg_out_dat_imm(s, cond, ARITH_AND, TCG_REG_TMP, TCG_REG_TMP, 0xff);
714 tcg_out_dat_reg(s, cond, ARITH_ORR,
715 rd, TCG_REG_TMP, rn, SHIFT_IMM_LSL(8));
719 static inline void tcg_out_bswap32(TCGContext *s, int cond, int rd, int rn)
721 if (use_armv6_instructions) {
723 tcg_out32(s, 0x06bf0f30 | (cond << 28) | (rd << 12) | rn);
725 tcg_out_dat_reg(s, cond, ARITH_EOR,
726 TCG_REG_TMP, rn, rn, SHIFT_IMM_ROR(16));
727 tcg_out_dat_imm(s, cond, ARITH_BIC,
728 TCG_REG_TMP, TCG_REG_TMP, 0xff | 0x800);
729 tcg_out_dat_reg(s, cond, ARITH_MOV,
730 rd, 0, rn, SHIFT_IMM_ROR(8));
731 tcg_out_dat_reg(s, cond, ARITH_EOR,
732 rd, rd, TCG_REG_TMP, SHIFT_IMM_LSR(8));
736 bool tcg_target_deposit_valid(int ofs, int len)
738 /* ??? Without bfi, we could improve over generic code by combining
739 the right-shift from a non-zero ofs with the orr. We do run into
740 problems when rd == rs, and the mask generated from ofs+len doesn't
741 fit into an immediate. We would have to be careful not to pessimize
742 wrt the optimizations performed on the expanded code. */
743 return use_armv7_instructions;
746 static inline void tcg_out_deposit(TCGContext *s, int cond, TCGReg rd,
747 TCGArg a1, int ofs, int len, bool const_a1)
750 /* bfi becomes bfc with rn == 15. */
754 tcg_out32(s, 0x07c00010 | (cond << 28) | (rd << 12) | a1
755 | (ofs << 7) | ((ofs + len - 1) << 16));
758 /* Note that this routine is used for both LDR and LDRH formats, so we do
759 not wish to include an immediate shift at this point. */
760 static void tcg_out_memop_r(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
761 TCGReg rn, TCGReg rm, bool u, bool p, bool w)
763 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24)
764 | (w << 21) | (rn << 16) | (rt << 12) | rm);
767 static void tcg_out_memop_8(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
768 TCGReg rn, int imm8, bool p, bool w)
775 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
776 (rn << 16) | (rt << 12) | ((imm8 & 0xf0) << 4) | (imm8 & 0xf));
779 static void tcg_out_memop_12(TCGContext *s, int cond, ARMInsn opc, TCGReg rt,
780 TCGReg rn, int imm12, bool p, bool w)
787 tcg_out32(s, (cond << 28) | opc | (u << 23) | (p << 24) | (w << 21) |
788 (rn << 16) | (rt << 12) | imm12);
791 static inline void tcg_out_ld32_12(TCGContext *s, int cond, TCGReg rt,
792 TCGReg rn, int imm12)
794 tcg_out_memop_12(s, cond, INSN_LDR_IMM, rt, rn, imm12, 1, 0);
797 static inline void tcg_out_st32_12(TCGContext *s, int cond, TCGReg rt,
798 TCGReg rn, int imm12)
800 tcg_out_memop_12(s, cond, INSN_STR_IMM, rt, rn, imm12, 1, 0);
803 static inline void tcg_out_ld32_r(TCGContext *s, int cond, TCGReg rt,
804 TCGReg rn, TCGReg rm)
806 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 0);
809 static inline void tcg_out_st32_r(TCGContext *s, int cond, TCGReg rt,
810 TCGReg rn, TCGReg rm)
812 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 0);
815 static inline void tcg_out_ldrd_8(TCGContext *s, int cond, TCGReg rt,
818 tcg_out_memop_8(s, cond, INSN_LDRD_IMM, rt, rn, imm8, 1, 0);
821 static inline void tcg_out_ldrd_r(TCGContext *s, int cond, TCGReg rt,
822 TCGReg rn, TCGReg rm)
824 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 0);
827 static inline void tcg_out_strd_8(TCGContext *s, int cond, TCGReg rt,
830 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
833 static inline void tcg_out_strd_r(TCGContext *s, int cond, TCGReg rt,
834 TCGReg rn, TCGReg rm)
836 tcg_out_memop_r(s, cond, INSN_STRD_REG, rt, rn, rm, 1, 1, 0);
839 /* Register pre-increment with base writeback. */
840 static inline void tcg_out_ld32_rwb(TCGContext *s, int cond, TCGReg rt,
841 TCGReg rn, TCGReg rm)
843 tcg_out_memop_r(s, cond, INSN_LDR_REG, rt, rn, rm, 1, 1, 1);
846 static inline void tcg_out_st32_rwb(TCGContext *s, int cond, TCGReg rt,
847 TCGReg rn, TCGReg rm)
849 tcg_out_memop_r(s, cond, INSN_STR_REG, rt, rn, rm, 1, 1, 1);
852 static inline void tcg_out_ld16u_8(TCGContext *s, int cond, TCGReg rt,
855 tcg_out_memop_8(s, cond, INSN_LDRH_IMM, rt, rn, imm8, 1, 0);
858 static inline void tcg_out_st16_8(TCGContext *s, int cond, TCGReg rt,
861 tcg_out_memop_8(s, cond, INSN_STRH_IMM, rt, rn, imm8, 1, 0);
864 static inline void tcg_out_ld16u_r(TCGContext *s, int cond, TCGReg rt,
865 TCGReg rn, TCGReg rm)
867 tcg_out_memop_r(s, cond, INSN_LDRH_REG, rt, rn, rm, 1, 1, 0);
870 static inline void tcg_out_st16_r(TCGContext *s, int cond, TCGReg rt,
871 TCGReg rn, TCGReg rm)
873 tcg_out_memop_r(s, cond, INSN_STRH_REG, rt, rn, rm, 1, 1, 0);
876 static inline void tcg_out_ld16s_8(TCGContext *s, int cond, TCGReg rt,
879 tcg_out_memop_8(s, cond, INSN_LDRSH_IMM, rt, rn, imm8, 1, 0);
882 static inline void tcg_out_ld16s_r(TCGContext *s, int cond, TCGReg rt,
883 TCGReg rn, TCGReg rm)
885 tcg_out_memop_r(s, cond, INSN_LDRSH_REG, rt, rn, rm, 1, 1, 0);
888 static inline void tcg_out_ld8_12(TCGContext *s, int cond, TCGReg rt,
889 TCGReg rn, int imm12)
891 tcg_out_memop_12(s, cond, INSN_LDRB_IMM, rt, rn, imm12, 1, 0);
894 static inline void tcg_out_st8_12(TCGContext *s, int cond, TCGReg rt,
895 TCGReg rn, int imm12)
897 tcg_out_memop_12(s, cond, INSN_STRB_IMM, rt, rn, imm12, 1, 0);
900 static inline void tcg_out_ld8_r(TCGContext *s, int cond, TCGReg rt,
901 TCGReg rn, TCGReg rm)
903 tcg_out_memop_r(s, cond, INSN_LDRB_REG, rt, rn, rm, 1, 1, 0);
906 static inline void tcg_out_st8_r(TCGContext *s, int cond, TCGReg rt,
907 TCGReg rn, TCGReg rm)
909 tcg_out_memop_r(s, cond, INSN_STRB_REG, rt, rn, rm, 1, 1, 0);
912 static inline void tcg_out_ld8s_8(TCGContext *s, int cond, TCGReg rt,
915 tcg_out_memop_8(s, cond, INSN_LDRSB_IMM, rt, rn, imm8, 1, 0);
918 static inline void tcg_out_ld8s_r(TCGContext *s, int cond, TCGReg rt,
919 TCGReg rn, TCGReg rm)
921 tcg_out_memop_r(s, cond, INSN_LDRSB_REG, rt, rn, rm, 1, 1, 0);
924 static inline void tcg_out_ld32u(TCGContext *s, int cond,
925 int rd, int rn, int32_t offset)
927 if (offset > 0xfff || offset < -0xfff) {
928 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
929 tcg_out_ld32_r(s, cond, rd, rn, TCG_REG_TMP);
931 tcg_out_ld32_12(s, cond, rd, rn, offset);
934 static inline void tcg_out_st32(TCGContext *s, int cond,
935 int rd, int rn, int32_t offset)
937 if (offset > 0xfff || offset < -0xfff) {
938 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
939 tcg_out_st32_r(s, cond, rd, rn, TCG_REG_TMP);
941 tcg_out_st32_12(s, cond, rd, rn, offset);
944 static inline void tcg_out_ld16u(TCGContext *s, int cond,
945 int rd, int rn, int32_t offset)
947 if (offset > 0xff || offset < -0xff) {
948 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
949 tcg_out_ld16u_r(s, cond, rd, rn, TCG_REG_TMP);
951 tcg_out_ld16u_8(s, cond, rd, rn, offset);
954 static inline void tcg_out_ld16s(TCGContext *s, int cond,
955 int rd, int rn, int32_t offset)
957 if (offset > 0xff || offset < -0xff) {
958 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
959 tcg_out_ld16s_r(s, cond, rd, rn, TCG_REG_TMP);
961 tcg_out_ld16s_8(s, cond, rd, rn, offset);
964 static inline void tcg_out_st16(TCGContext *s, int cond,
965 int rd, int rn, int32_t offset)
967 if (offset > 0xff || offset < -0xff) {
968 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
969 tcg_out_st16_r(s, cond, rd, rn, TCG_REG_TMP);
971 tcg_out_st16_8(s, cond, rd, rn, offset);
974 static inline void tcg_out_ld8u(TCGContext *s, int cond,
975 int rd, int rn, int32_t offset)
977 if (offset > 0xfff || offset < -0xfff) {
978 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
979 tcg_out_ld8_r(s, cond, rd, rn, TCG_REG_TMP);
981 tcg_out_ld8_12(s, cond, rd, rn, offset);
984 static inline void tcg_out_ld8s(TCGContext *s, int cond,
985 int rd, int rn, int32_t offset)
987 if (offset > 0xff || offset < -0xff) {
988 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
989 tcg_out_ld8s_r(s, cond, rd, rn, TCG_REG_TMP);
991 tcg_out_ld8s_8(s, cond, rd, rn, offset);
994 static inline void tcg_out_st8(TCGContext *s, int cond,
995 int rd, int rn, int32_t offset)
997 if (offset > 0xfff || offset < -0xfff) {
998 tcg_out_movi32(s, cond, TCG_REG_TMP, offset);
999 tcg_out_st8_r(s, cond, rd, rn, TCG_REG_TMP);
1001 tcg_out_st8_12(s, cond, rd, rn, offset);
1004 /* The _goto case is normally between TBs within the same code buffer, and
1005 * with the code buffer limited to 16MB we wouldn't need the long case.
1006 * But we also use it for the tail-call to the qemu_ld/st helpers, which does.
1008 static inline void tcg_out_goto(TCGContext *s, int cond, uint32_t addr)
1010 int32_t disp = addr - (tcg_target_long) s->code_ptr;
1012 if ((addr & 1) == 0 && disp - 8 < 0x01fffffd && disp - 8 > -0x01fffffd) {
1013 tcg_out_b(s, cond, disp);
1017 tcg_out_movi32(s, cond, TCG_REG_TMP, addr);
1018 if (use_armv5t_instructions) {
1019 tcg_out_bx(s, cond, TCG_REG_TMP);
1024 tcg_out_mov_reg(s, cond, TCG_REG_PC, TCG_REG_TMP);
1028 /* The call case is mostly used for helpers - so it's not unreasonable
1029 * for them to be beyond branch range */
1030 static inline void tcg_out_call(TCGContext *s, uint32_t addr)
1034 val = addr - (tcg_target_long) s->code_ptr;
1035 if (val - 8 < 0x02000000 && val - 8 >= -0x02000000) {
1037 /* Use BLX if the target is in Thumb mode */
1038 if (!use_armv5t_instructions) {
1041 tcg_out_blx_imm(s, val);
1043 tcg_out_bl(s, COND_AL, val);
1045 } else if (use_armv7_instructions) {
1046 tcg_out_movi32(s, COND_AL, TCG_REG_TMP, addr);
1047 tcg_out_blx(s, COND_AL, TCG_REG_TMP);
1049 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R14, TCG_REG_PC, 4);
1050 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1055 static inline void tcg_out_callr(TCGContext *s, int cond, int arg)
1057 if (use_armv5t_instructions) {
1058 tcg_out_blx(s, cond, arg);
1060 tcg_out_dat_reg(s, cond, ARITH_MOV, TCG_REG_R14, 0,
1061 TCG_REG_PC, SHIFT_IMM_LSL(0));
1062 tcg_out_bx(s, cond, arg);
1066 static inline void tcg_out_goto_label(TCGContext *s, int cond, int label_index)
1068 TCGLabel *l = &s->labels[label_index];
1071 tcg_out_goto(s, cond, l->u.value);
1073 tcg_out_reloc(s, s->code_ptr, R_ARM_PC24, label_index, 31337);
1074 tcg_out_b_noaddr(s, cond);
1078 #ifdef CONFIG_SOFTMMU
1079 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1080 * int mmu_idx, uintptr_t ra)
1082 static const void * const qemu_ld_helpers[8] = {
1083 helper_ret_ldub_mmu,
1084 helper_ret_lduw_mmu,
1085 helper_ret_ldul_mmu,
1088 helper_ret_ldsb_mmu,
1089 helper_ret_ldsw_mmu,
1090 helper_ret_ldul_mmu,
1094 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1095 * uintxx_t val, int mmu_idx, uintptr_t ra)
1097 static const void * const qemu_st_helpers[4] = {
1104 /* Helper routines for marshalling helper function arguments into
1105 * the correct registers and stack.
1106 * argreg is where we want to put this argument, arg is the argument itself.
1107 * Return value is the updated argreg ready for the next call.
1108 * Note that argreg 0..3 is real registers, 4+ on stack.
1110 * We provide routines for arguments which are: immediate, 32 bit
1111 * value in register, 16 and 8 bit values in register (which must be zero
1112 * extended before use) and 64 bit value in a lo:hi register pair.
1114 #define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG) \
1115 static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg) \
1118 MOV_ARG(s, COND_AL, argreg, arg); \
1120 int ofs = (argreg - 4) * 4; \
1122 assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE); \
1123 tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
1125 return argreg + 1; \
1128 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
1129 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1130 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
1131 (tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1132 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
1133 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
1134 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
1136 static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
1137 TCGReg arglo, TCGReg arghi)
1139 /* 64 bit arguments must go in even/odd register pairs
1140 * and in 8-aligned stack slots.
1145 if (use_armv6_instructions && argreg >= 4
1146 && (arglo & 1) == 0 && arghi == arglo + 1) {
1147 tcg_out_strd_8(s, COND_AL, arglo,
1148 TCG_REG_CALL_STACK, (argreg - 4) * 4);
1151 argreg = tcg_out_arg_reg32(s, argreg, arglo);
1152 argreg = tcg_out_arg_reg32(s, argreg, arghi);
1157 #define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
1159 /* We're expecting to use an 8-bit immediate and to mask. */
1160 QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
1162 /* We're expecting to use an 8-bit immediate add + 8-bit ldrd offset.
1163 Using the offset of the second entry in the last tlb table ensures
1164 that we can index all of the elements of the first entry. */
1165 QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
1168 /* Load and compare a TLB entry, leaving the flags set. Returns the register
1169 containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
1171 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
1172 int s_bits, int mem_index, bool is_load)
1174 TCGReg base = TCG_AREG0;
1177 ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1178 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
1179 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
1181 /* Should generate something like the following:
1182 * shr tmp, addr_reg, #TARGET_PAGE_BITS (1)
1183 * add r2, env, #high
1184 * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
1185 * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
1186 * ldr r0, [r2, #cmp] (4)
1187 * tst addr_reg, #s_mask
1188 * ldr r1, [r2, #add] (5)
1189 * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS
1191 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
1192 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
1194 /* We checked that the offset is contained within 16 bits above. */
1195 if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
1196 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1197 (24 << 7) | (cmp_off >> 8));
1199 add_off -= cmp_off & 0xff00;
1203 tcg_out_dat_imm(s, COND_AL, ARITH_AND,
1204 TCG_REG_R0, TCG_REG_TMP, CPU_TLB_SIZE - 1);
1205 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
1206 TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
1208 /* Load the tlb comparator. Use ldrd if needed and available,
1209 but due to how the pointer needs setting up, ldm isn't useful.
1210 Base arm5 doesn't have ldrd, but armv5te does. */
1211 if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
1212 tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1214 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
1215 if (TARGET_LONG_BITS == 64) {
1216 tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
1220 /* Check alignment. */
1222 tcg_out_dat_imm(s, COND_AL, ARITH_TST,
1223 0, addrlo, (1 << s_bits) - 1);
1226 /* Load the tlb addend. */
1227 tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R2, add_off);
1229 tcg_out_dat_reg(s, (s_bits ? COND_EQ : COND_AL), ARITH_CMP, 0,
1230 TCG_REG_R0, TCG_REG_TMP, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
1232 if (TARGET_LONG_BITS == 64) {
1233 tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
1234 TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
1240 /* Record the context of a call to the out of line helper code for the slow
1241 path for a load or store, so that we can later generate the correct
1243 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
1244 int data_reg, int data_reg2, int addrlo_reg,
1245 int addrhi_reg, int mem_index,
1246 uint8_t *raddr, uint8_t *label_ptr)
1248 TCGLabelQemuLdst *label = new_ldst_label(s);
1250 label->is_ld = is_ld;
1252 label->datalo_reg = data_reg;
1253 label->datahi_reg = data_reg2;
1254 label->addrlo_reg = addrlo_reg;
1255 label->addrhi_reg = addrhi_reg;
1256 label->mem_index = mem_index;
1257 label->raddr = raddr;
1258 label->label_ptr[0] = label_ptr;
1261 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1263 TCGReg argreg, data_reg, data_reg2;
1267 reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
1269 argreg = tcg_out_arg_reg32(s, TCG_REG_R0, TCG_AREG0);
1270 if (TARGET_LONG_BITS == 64) {
1271 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1273 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1275 argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1276 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1278 /* For armv6 we can use the canonical unsigned helpers and minimize
1279 icache usage. For pre-armv6, use the signed helpers since we do
1280 not have a single insn sign-extend. */
1281 if (use_armv6_instructions) {
1282 func = (uintptr_t)qemu_ld_helpers[opc & 3];
1284 func = (uintptr_t)qemu_ld_helpers[opc];
1289 tcg_out_call(s, func);
1291 data_reg = lb->datalo_reg;
1292 data_reg2 = lb->datahi_reg;
1295 tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
1298 tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
1301 tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
1304 if (data_reg != TCG_REG_R1) {
1305 tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
1306 tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
1307 } else if (data_reg2 != TCG_REG_R0) {
1308 tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
1309 tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
1311 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
1312 tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
1313 tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_TMP);
1318 tcg_out_goto(s, COND_AL, (tcg_target_long)lb->raddr);
1321 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1323 TCGReg argreg, data_reg, data_reg2;
1325 reloc_pc24(lb->label_ptr[0], (tcg_target_long)s->code_ptr);
1327 argreg = TCG_REG_R0;
1328 argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
1329 if (TARGET_LONG_BITS == 64) {
1330 argreg = tcg_out_arg_reg64(s, argreg, lb->addrlo_reg, lb->addrhi_reg);
1332 argreg = tcg_out_arg_reg32(s, argreg, lb->addrlo_reg);
1335 data_reg = lb->datalo_reg;
1336 data_reg2 = lb->datahi_reg;
1339 argreg = tcg_out_arg_reg8(s, argreg, data_reg);
1342 argreg = tcg_out_arg_reg16(s, argreg, data_reg);
1345 argreg = tcg_out_arg_reg32(s, argreg, data_reg);
1348 argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2);
1352 argreg = tcg_out_arg_imm32(s, argreg, lb->mem_index);
1353 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
1355 /* Tail-call to the helper, which will return to the fast path. */
1356 tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_st_helpers[lb->opc & 3]);
1358 #endif /* SOFTMMU */
1360 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1362 TCGReg addr_reg, data_reg, data_reg2;
1364 #ifdef CONFIG_SOFTMMU
1365 int mem_index, s_bits;
1366 TCGReg addr_reg2, addend;
1369 #ifdef TARGET_WORDS_BIGENDIAN
1376 data_reg2 = (opc == 3 ? *args++ : 0);
1378 #ifdef CONFIG_SOFTMMU
1379 addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1383 addend = tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 1);
1385 /* This a conditional BL only to load a pointer within this opcode into LR
1386 for the slow path. We will not be using the value for a tail call. */
1387 label_ptr = s->code_ptr;
1388 tcg_out_bl_noaddr(s, COND_NE);
1392 tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, addend);
1395 tcg_out_ld8s_r(s, COND_AL, data_reg, addr_reg, addend);
1398 tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, addend);
1400 tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1405 tcg_out_ld16u_r(s, COND_AL, data_reg, addr_reg, addend);
1406 tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1408 tcg_out_ld16s_r(s, COND_AL, data_reg, addr_reg, addend);
1413 tcg_out_ld32_r(s, COND_AL, data_reg, addr_reg, addend);
1415 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1420 /* Be careful not to modify data_reg and data_reg2
1421 for the slow path below. */
1422 TCGReg dl = (bswap ? data_reg2 : data_reg);
1423 TCGReg dh = (bswap ? data_reg : data_reg2);
1425 if (use_armv6_instructions && (dl & 1) == 0 && dh == dl + 1) {
1426 tcg_out_ldrd_r(s, COND_AL, dl, addr_reg, addend);
1427 } else if (dl != addend) {
1428 tcg_out_ld32_rwb(s, COND_AL, dl, addend, addr_reg);
1429 tcg_out_ld32_12(s, COND_AL, dh, addend, 4);
1431 tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_TMP,
1432 addend, addr_reg, SHIFT_IMM_LSL(0));
1433 tcg_out_ld32_12(s, COND_AL, dl, TCG_REG_TMP, 0);
1434 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
1437 tcg_out_bswap32(s, COND_AL, dh, dh);
1438 tcg_out_bswap32(s, COND_AL, dl, dl);
1444 add_qemu_ldst_label(s, 1, opc, data_reg, data_reg2, addr_reg, addr_reg2,
1445 mem_index, s->code_ptr, label_ptr);
1446 #else /* !CONFIG_SOFTMMU */
1448 uint32_t offset = GUEST_BASE;
1452 i = ctz32(offset) & ~1;
1453 rot = ((32 - i) << 7) & 0xf00;
1455 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addr_reg,
1456 ((offset >> i) & 0xff) | rot);
1457 addr_reg = TCG_REG_TMP;
1458 offset &= ~(0xff << i);
1463 tcg_out_ld8_12(s, COND_AL, data_reg, addr_reg, 0);
1466 tcg_out_ld8s_8(s, COND_AL, data_reg, addr_reg, 0);
1469 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1471 tcg_out_bswap16(s, COND_AL, data_reg, data_reg);
1476 tcg_out_ld16u_8(s, COND_AL, data_reg, addr_reg, 0);
1477 tcg_out_bswap16s(s, COND_AL, data_reg, data_reg);
1479 tcg_out_ld16s_8(s, COND_AL, data_reg, addr_reg, 0);
1484 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, 0);
1486 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1490 if (use_armv6_instructions && !bswap
1491 && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1492 tcg_out_ldrd_8(s, COND_AL, data_reg, addr_reg, 0);
1493 } else if (use_armv6_instructions && bswap
1494 && (data_reg2 & 1) == 0 && data_reg == data_reg2 + 1) {
1495 tcg_out_ldrd_8(s, COND_AL, data_reg2, addr_reg, 0);
1496 } else if (data_reg == addr_reg) {
1497 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1498 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1500 tcg_out_ld32_12(s, COND_AL, data_reg, addr_reg, bswap ? 4 : 0);
1501 tcg_out_ld32_12(s, COND_AL, data_reg2, addr_reg, bswap ? 0 : 4);
1504 tcg_out_bswap32(s, COND_AL, data_reg, data_reg);
1505 tcg_out_bswap32(s, COND_AL, data_reg2, data_reg2);
1512 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1514 TCGReg addr_reg, data_reg, data_reg2;
1516 #ifdef CONFIG_SOFTMMU
1517 int mem_index, s_bits;
1518 TCGReg addr_reg2, addend;
1521 #ifdef TARGET_WORDS_BIGENDIAN
1528 data_reg2 = (opc == 3 ? *args++ : 0);
1530 #ifdef CONFIG_SOFTMMU
1531 addr_reg2 = (TARGET_LONG_BITS == 64 ? *args++ : 0);
1535 addend = tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 0);
1539 tcg_out_st8_r(s, COND_EQ, data_reg, addr_reg, addend);
1543 tcg_out_bswap16st(s, COND_EQ, TCG_REG_R0, data_reg);
1544 tcg_out_st16_r(s, COND_EQ, TCG_REG_R0, addr_reg, addend);
1546 tcg_out_st16_r(s, COND_EQ, data_reg, addr_reg, addend);
1552 tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
1553 tcg_out_st32_r(s, COND_EQ, TCG_REG_R0, addr_reg, addend);
1555 tcg_out_st32_r(s, COND_EQ, data_reg, addr_reg, addend);
1560 tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg2);
1561 tcg_out_st32_rwb(s, COND_EQ, TCG_REG_R0, addend, addr_reg);
1562 tcg_out_bswap32(s, COND_EQ, TCG_REG_R0, data_reg);
1563 tcg_out_st32_12(s, COND_EQ, TCG_REG_R0, addend, 4);
1564 } else if (use_armv6_instructions
1565 && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1566 tcg_out_strd_r(s, COND_EQ, data_reg, addr_reg, addend);
1568 tcg_out_st32_rwb(s, COND_EQ, data_reg, addend, addr_reg);
1569 tcg_out_st32_12(s, COND_EQ, data_reg2, addend, 4);
1574 /* The conditional call must come last, as we're going to return here. */
1575 label_ptr = s->code_ptr;
1576 tcg_out_bl_noaddr(s, COND_NE);
1578 add_qemu_ldst_label(s, 0, opc, data_reg, data_reg2, addr_reg, addr_reg2,
1579 mem_index, s->code_ptr, label_ptr);
1580 #else /* !CONFIG_SOFTMMU */
1582 uint32_t offset = GUEST_BASE;
1587 i = ctz32(offset) & ~1;
1588 rot = ((32 - i) << 7) & 0xf00;
1590 tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R1, addr_reg,
1591 ((offset >> i) & 0xff) | rot);
1592 addr_reg = TCG_REG_R1;
1593 offset &= ~(0xff << i);
1598 tcg_out_st8_12(s, COND_AL, data_reg, addr_reg, 0);
1602 tcg_out_bswap16st(s, COND_AL, TCG_REG_R0, data_reg);
1603 tcg_out_st16_8(s, COND_AL, TCG_REG_R0, addr_reg, 0);
1605 tcg_out_st16_8(s, COND_AL, data_reg, addr_reg, 0);
1611 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
1612 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
1614 tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
1619 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg2);
1620 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 0);
1621 tcg_out_bswap32(s, COND_AL, TCG_REG_R0, data_reg);
1622 tcg_out_st32_12(s, COND_AL, TCG_REG_R0, addr_reg, 4);
1623 } else if (use_armv6_instructions
1624 && (data_reg & 1) == 0 && data_reg2 == data_reg + 1) {
1625 tcg_out_strd_8(s, COND_AL, data_reg, addr_reg, 0);
1627 tcg_out_st32_12(s, COND_AL, data_reg, addr_reg, 0);
1628 tcg_out_st32_12(s, COND_AL, data_reg2, addr_reg, 4);
1635 static uint8_t *tb_ret_addr;
1637 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1638 const TCGArg *args, const int *const_args)
1640 TCGArg a0, a1, a2, a3, a4, a5;
1644 case INDEX_op_exit_tb:
1645 if (use_armv7_instructions || check_fit_imm(args[0])) {
1646 tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
1647 tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
1649 uint8_t *ld_ptr = s->code_ptr;
1650 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
1651 tcg_out_goto(s, COND_AL, (tcg_target_ulong) tb_ret_addr);
1652 *ld_ptr = (uint8_t) (s->code_ptr - ld_ptr) - 8;
1653 tcg_out32(s, args[0]);
1656 case INDEX_op_goto_tb:
1657 if (s->tb_jmp_offset) {
1658 /* Direct jump method */
1659 #if defined(USE_DIRECT_JUMP)
1660 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1661 tcg_out_b_noaddr(s, COND_AL);
1663 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, -4);
1664 s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
1668 /* Indirect jump method */
1670 c = (int) (s->tb_next + args[0]) - ((int) s->code_ptr + 8);
1671 if (c > 0xfff || c < -0xfff) {
1672 tcg_out_movi32(s, COND_AL, TCG_REG_R0,
1673 (tcg_target_long) (s->tb_next + args[0]));
1674 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
1676 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, c);
1678 tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_PC, 0);
1679 tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, 0);
1680 tcg_out32(s, (tcg_target_long) (s->tb_next + args[0]));
1683 s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
1687 tcg_out_call(s, args[0]);
1689 tcg_out_callr(s, COND_AL, args[0]);
1692 tcg_out_goto_label(s, COND_AL, args[0]);
1695 case INDEX_op_ld8u_i32:
1696 tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
1698 case INDEX_op_ld8s_i32:
1699 tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
1701 case INDEX_op_ld16u_i32:
1702 tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
1704 case INDEX_op_ld16s_i32:
1705 tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
1707 case INDEX_op_ld_i32:
1708 tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
1710 case INDEX_op_st8_i32:
1711 tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
1713 case INDEX_op_st16_i32:
1714 tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
1716 case INDEX_op_st_i32:
1717 tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
1720 case INDEX_op_mov_i32:
1721 tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
1722 args[0], 0, args[1], SHIFT_IMM_LSL(0));
1724 case INDEX_op_movi_i32:
1725 tcg_out_movi32(s, COND_AL, args[0], args[1]);
1727 case INDEX_op_movcond_i32:
1728 /* Constraints mean that v2 is always in the same register as dest,
1729 * so we only need to do "if condition passed, move v1 to dest".
1731 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1732 args[1], args[2], const_args[2]);
1733 tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
1734 ARITH_MVN, args[0], 0, args[3], const_args[3]);
1736 case INDEX_op_add_i32:
1737 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
1738 args[0], args[1], args[2], const_args[2]);
1740 case INDEX_op_sub_i32:
1741 if (const_args[1]) {
1742 if (const_args[2]) {
1743 tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
1745 tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
1746 args[0], args[2], args[1], 1);
1749 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
1750 args[0], args[1], args[2], const_args[2]);
1753 case INDEX_op_and_i32:
1754 tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
1755 args[0], args[1], args[2], const_args[2]);
1757 case INDEX_op_andc_i32:
1758 tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
1759 args[0], args[1], args[2], const_args[2]);
1761 case INDEX_op_or_i32:
1764 case INDEX_op_xor_i32:
1768 tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2], const_args[2]);
1770 case INDEX_op_add2_i32:
1771 a0 = args[0], a1 = args[1], a2 = args[2];
1772 a3 = args[3], a4 = args[4], a5 = args[5];
1773 if (a0 == a3 || (a0 == a5 && !const_args[5])) {
1776 tcg_out_dat_rIN(s, COND_AL, ARITH_ADD | TO_CPSR, ARITH_SUB | TO_CPSR,
1777 a0, a2, a4, const_args[4]);
1778 tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
1779 a1, a3, a5, const_args[5]);
1780 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1782 case INDEX_op_sub2_i32:
1783 a0 = args[0], a1 = args[1], a2 = args[2];
1784 a3 = args[3], a4 = args[4], a5 = args[5];
1785 if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
1788 if (const_args[2]) {
1789 if (const_args[4]) {
1790 tcg_out_movi32(s, COND_AL, a0, a4);
1793 tcg_out_dat_rI(s, COND_AL, ARITH_RSB | TO_CPSR, a0, a4, a2, 1);
1795 tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
1796 ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
1798 if (const_args[3]) {
1799 if (const_args[5]) {
1800 tcg_out_movi32(s, COND_AL, a1, a5);
1803 tcg_out_dat_rI(s, COND_AL, ARITH_RSC, a1, a5, a3, 1);
1805 tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
1806 a1, a3, a5, const_args[5]);
1808 tcg_out_mov_reg(s, COND_AL, args[0], a0);
1810 case INDEX_op_neg_i32:
1811 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
1813 case INDEX_op_not_i32:
1814 tcg_out_dat_reg(s, COND_AL,
1815 ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
1817 case INDEX_op_mul_i32:
1818 tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
1820 case INDEX_op_mulu2_i32:
1821 tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1823 case INDEX_op_muls2_i32:
1824 tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
1826 /* XXX: Perhaps args[2] & 0x1f is wrong */
1827 case INDEX_op_shl_i32:
1829 SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
1831 case INDEX_op_shr_i32:
1832 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
1833 SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
1835 case INDEX_op_sar_i32:
1836 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
1837 SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
1839 case INDEX_op_rotr_i32:
1840 c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
1841 SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
1844 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
1847 case INDEX_op_rotl_i32:
1848 if (const_args[2]) {
1849 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1850 ((0x20 - args[2]) & 0x1f) ?
1851 SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
1854 tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[1], 0x20);
1855 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
1856 SHIFT_REG_ROR(TCG_REG_TMP));
1860 case INDEX_op_brcond_i32:
1861 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1862 args[0], args[1], const_args[1]);
1863 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]], args[3]);
1865 case INDEX_op_brcond2_i32:
1866 /* The resulting conditions are:
1867 * TCG_COND_EQ --> a0 == a2 && a1 == a3,
1868 * TCG_COND_NE --> (a0 != a2 && a1 == a3) || a1 != a3,
1869 * TCG_COND_LT(U) --> (a0 < a2 && a1 == a3) || a1 < a3,
1870 * TCG_COND_GE(U) --> (a0 >= a2 && a1 == a3) || (a1 >= a3 && a1 != a3),
1871 * TCG_COND_LE(U) --> (a0 <= a2 && a1 == a3) || (a1 <= a3 && a1 != a3),
1872 * TCG_COND_GT(U) --> (a0 > a2 && a1 == a3) || a1 > a3,
1874 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1875 args[1], args[3], const_args[3]);
1876 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1877 args[0], args[2], const_args[2]);
1878 tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[4]], args[5]);
1880 case INDEX_op_setcond_i32:
1881 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1882 args[1], args[2], const_args[2]);
1883 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
1884 ARITH_MOV, args[0], 0, 1);
1885 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
1886 ARITH_MOV, args[0], 0, 0);
1888 case INDEX_op_setcond2_i32:
1889 /* See brcond2_i32 comment */
1890 tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
1891 args[2], args[4], const_args[4]);
1892 tcg_out_dat_rIN(s, COND_EQ, ARITH_CMP, ARITH_CMN, 0,
1893 args[1], args[3], const_args[3]);
1894 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[5]],
1895 ARITH_MOV, args[0], 0, 1);
1896 tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[5])],
1897 ARITH_MOV, args[0], 0, 0);
1900 case INDEX_op_qemu_ld8u:
1901 tcg_out_qemu_ld(s, args, 0);
1903 case INDEX_op_qemu_ld8s:
1904 tcg_out_qemu_ld(s, args, 0 | 4);
1906 case INDEX_op_qemu_ld16u:
1907 tcg_out_qemu_ld(s, args, 1);
1909 case INDEX_op_qemu_ld16s:
1910 tcg_out_qemu_ld(s, args, 1 | 4);
1912 case INDEX_op_qemu_ld32:
1913 tcg_out_qemu_ld(s, args, 2);
1915 case INDEX_op_qemu_ld64:
1916 tcg_out_qemu_ld(s, args, 3);
1919 case INDEX_op_qemu_st8:
1920 tcg_out_qemu_st(s, args, 0);
1922 case INDEX_op_qemu_st16:
1923 tcg_out_qemu_st(s, args, 1);
1925 case INDEX_op_qemu_st32:
1926 tcg_out_qemu_st(s, args, 2);
1928 case INDEX_op_qemu_st64:
1929 tcg_out_qemu_st(s, args, 3);
1932 case INDEX_op_bswap16_i32:
1933 tcg_out_bswap16(s, COND_AL, args[0], args[1]);
1935 case INDEX_op_bswap32_i32:
1936 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
1939 case INDEX_op_ext8s_i32:
1940 tcg_out_ext8s(s, COND_AL, args[0], args[1]);
1942 case INDEX_op_ext16s_i32:
1943 tcg_out_ext16s(s, COND_AL, args[0], args[1]);
1945 case INDEX_op_ext16u_i32:
1946 tcg_out_ext16u(s, COND_AL, args[0], args[1]);
1949 case INDEX_op_deposit_i32:
1950 tcg_out_deposit(s, COND_AL, args[0], args[2],
1951 args[3], args[4], const_args[2]);
1954 case INDEX_op_div_i32:
1955 tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
1957 case INDEX_op_divu_i32:
1958 tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
1966 static const TCGTargetOpDef arm_op_defs[] = {
1967 { INDEX_op_exit_tb, { } },
1968 { INDEX_op_goto_tb, { } },
1969 { INDEX_op_call, { "ri" } },
1970 { INDEX_op_br, { } },
1972 { INDEX_op_mov_i32, { "r", "r" } },
1973 { INDEX_op_movi_i32, { "r" } },
1975 { INDEX_op_ld8u_i32, { "r", "r" } },
1976 { INDEX_op_ld8s_i32, { "r", "r" } },
1977 { INDEX_op_ld16u_i32, { "r", "r" } },
1978 { INDEX_op_ld16s_i32, { "r", "r" } },
1979 { INDEX_op_ld_i32, { "r", "r" } },
1980 { INDEX_op_st8_i32, { "r", "r" } },
1981 { INDEX_op_st16_i32, { "r", "r" } },
1982 { INDEX_op_st_i32, { "r", "r" } },
1984 /* TODO: "r", "r", "ri" */
1985 { INDEX_op_add_i32, { "r", "r", "rIN" } },
1986 { INDEX_op_sub_i32, { "r", "rI", "rIN" } },
1987 { INDEX_op_mul_i32, { "r", "r", "r" } },
1988 { INDEX_op_mulu2_i32, { "r", "r", "r", "r" } },
1989 { INDEX_op_muls2_i32, { "r", "r", "r", "r" } },
1990 { INDEX_op_and_i32, { "r", "r", "rIK" } },
1991 { INDEX_op_andc_i32, { "r", "r", "rIK" } },
1992 { INDEX_op_or_i32, { "r", "r", "rI" } },
1993 { INDEX_op_xor_i32, { "r", "r", "rI" } },
1994 { INDEX_op_neg_i32, { "r", "r" } },
1995 { INDEX_op_not_i32, { "r", "r" } },
1997 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1998 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1999 { INDEX_op_sar_i32, { "r", "r", "ri" } },
2000 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
2001 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
2003 { INDEX_op_brcond_i32, { "r", "rIN" } },
2004 { INDEX_op_setcond_i32, { "r", "r", "rIN" } },
2005 { INDEX_op_movcond_i32, { "r", "r", "rIN", "rIK", "0" } },
2007 { INDEX_op_add2_i32, { "r", "r", "r", "r", "rIN", "rIK" } },
2008 { INDEX_op_sub2_i32, { "r", "r", "rI", "rI", "rIN", "rIK" } },
2009 { INDEX_op_brcond2_i32, { "r", "r", "rIN", "rIN" } },
2010 { INDEX_op_setcond2_i32, { "r", "r", "r", "rIN", "rIN" } },
2012 #if TARGET_LONG_BITS == 32
2013 { INDEX_op_qemu_ld8u, { "r", "l" } },
2014 { INDEX_op_qemu_ld8s, { "r", "l" } },
2015 { INDEX_op_qemu_ld16u, { "r", "l" } },
2016 { INDEX_op_qemu_ld16s, { "r", "l" } },
2017 { INDEX_op_qemu_ld32, { "r", "l" } },
2018 { INDEX_op_qemu_ld64, { "r", "r", "l" } },
2020 { INDEX_op_qemu_st8, { "s", "s" } },
2021 { INDEX_op_qemu_st16, { "s", "s" } },
2022 { INDEX_op_qemu_st32, { "s", "s" } },
2023 { INDEX_op_qemu_st64, { "s", "s", "s" } },
2025 { INDEX_op_qemu_ld8u, { "r", "l", "l" } },
2026 { INDEX_op_qemu_ld8s, { "r", "l", "l" } },
2027 { INDEX_op_qemu_ld16u, { "r", "l", "l" } },
2028 { INDEX_op_qemu_ld16s, { "r", "l", "l" } },
2029 { INDEX_op_qemu_ld32, { "r", "l", "l" } },
2030 { INDEX_op_qemu_ld64, { "r", "r", "l", "l" } },
2032 { INDEX_op_qemu_st8, { "s", "s", "s" } },
2033 { INDEX_op_qemu_st16, { "s", "s", "s" } },
2034 { INDEX_op_qemu_st32, { "s", "s", "s" } },
2035 { INDEX_op_qemu_st64, { "s", "s", "s", "s" } },
2038 { INDEX_op_bswap16_i32, { "r", "r" } },
2039 { INDEX_op_bswap32_i32, { "r", "r" } },
2041 { INDEX_op_ext8s_i32, { "r", "r" } },
2042 { INDEX_op_ext16s_i32, { "r", "r" } },
2043 { INDEX_op_ext16u_i32, { "r", "r" } },
2045 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
2047 { INDEX_op_div_i32, { "r", "r", "r" } },
2048 { INDEX_op_divu_i32, { "r", "r", "r" } },
2053 static void tcg_target_init(TCGContext *s)
2055 #if defined(CONFIG_GETAUXVAL)
2056 /* Only probe for the platform and capabilities if we havn't already
2057 determined maximum values at compile time. */
2058 # if !defined(use_idiv_instructions)
2060 unsigned long hwcap = getauxval(AT_HWCAP);
2061 use_idiv_instructions = (hwcap & HWCAP_ARM_IDIVA) != 0;
2064 if (__ARM_ARCH < 7) {
2065 const char *pl = (const char *)getauxval(AT_PLATFORM);
2066 if (pl != NULL && pl[0] == 'v' && pl[1] >= '4' && pl[1] <= '9') {
2067 arm_arch = pl[1] - '0';
2070 #endif /* GETAUXVAL */
2072 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffff);
2073 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
2078 (1 << TCG_REG_R12) |
2079 (1 << TCG_REG_R14));
2081 tcg_regset_clear(s->reserved_regs);
2082 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
2083 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
2084 tcg_regset_set_reg(s->reserved_regs, TCG_REG_PC);
2086 tcg_add_target_add_op_defs(arm_op_defs);
2089 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
2090 TCGReg arg1, intptr_t arg2)
2092 tcg_out_ld32u(s, COND_AL, arg, arg1, arg2);
2095 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
2096 TCGReg arg1, intptr_t arg2)
2098 tcg_out_st32(s, COND_AL, arg, arg1, arg2);
2101 static inline void tcg_out_mov(TCGContext *s, TCGType type,
2102 TCGReg ret, TCGReg arg)
2104 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, ret, 0, arg, SHIFT_IMM_LSL(0));
2107 static inline void tcg_out_movi(TCGContext *s, TCGType type,
2108 TCGReg ret, tcg_target_long arg)
2110 tcg_out_movi32(s, COND_AL, ret, arg);
2113 /* Compute frame size via macros, to share between tcg_target_qemu_prologue
2114 and tcg_register_jit. */
2116 #define PUSH_SIZE ((11 - 4 + 1 + 1) * sizeof(tcg_target_long))
2118 #define FRAME_SIZE \
2120 + TCG_STATIC_CALL_ARGS_SIZE \
2121 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
2122 + TCG_TARGET_STACK_ALIGN - 1) \
2123 & -TCG_TARGET_STACK_ALIGN)
2125 static void tcg_target_qemu_prologue(TCGContext *s)
2129 /* Calling convention requires us to save r4-r11 and lr. */
2130 /* stmdb sp!, { r4 - r11, lr } */
2131 tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
2133 /* Reserve callee argument and tcg temp space. */
2134 stack_addend = FRAME_SIZE - PUSH_SIZE;
2136 tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
2137 TCG_REG_CALL_STACK, stack_addend, 1);
2138 tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
2139 CPU_TEMP_BUF_NLONGS * sizeof(long));
2141 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2143 tcg_out_bx(s, COND_AL, tcg_target_call_iarg_regs[1]);
2144 tb_ret_addr = s->code_ptr;
2146 /* Epilogue. We branch here via tb_ret_addr. */
2147 tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
2148 TCG_REG_CALL_STACK, stack_addend, 1);
2150 /* ldmia sp!, { r4 - r11, pc } */
2151 tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);
2156 DebugFrameFDEHeader fde;
2157 uint8_t fde_def_cfa[4];
2158 uint8_t fde_reg_ofs[18];
2161 #define ELF_HOST_MACHINE EM_ARM
2163 /* We're expecting a 2 byte uleb128 encoded value. */
2164 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
2166 static DebugFrame debug_frame = {
2167 .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2170 .cie.code_align = 1,
2171 .cie.data_align = 0x7c, /* sleb128 -4 */
2172 .cie.return_column = 14,
2174 /* Total FDE size does not include the "len" member. */
2175 .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
2178 12, 13, /* DW_CFA_def_cfa sp, ... */
2179 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2183 /* The following must match the stmdb in the prologue. */
2184 0x8e, 1, /* DW_CFA_offset, lr, -4 */
2185 0x8b, 2, /* DW_CFA_offset, r11, -8 */
2186 0x8a, 3, /* DW_CFA_offset, r10, -12 */
2187 0x89, 4, /* DW_CFA_offset, r9, -16 */
2188 0x88, 5, /* DW_CFA_offset, r8, -20 */
2189 0x87, 6, /* DW_CFA_offset, r7, -24 */
2190 0x86, 7, /* DW_CFA_offset, r6, -28 */
2191 0x85, 8, /* DW_CFA_offset, r5, -32 */
2192 0x84, 9, /* DW_CFA_offset, r4, -36 */
2196 void tcg_register_jit(void *buf, size_t buf_size)
2198 debug_frame.fde.func_start = (tcg_target_long) buf;
2199 debug_frame.fde.func_len = buf_size;
2201 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));