2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28",
27 "%fp", /* frame pointer */
28 "%lr", /* link register */
29 "%sp", /* stack pointer */
33 #ifdef TARGET_WORDS_BIGENDIAN
34 #define TCG_LDST_BSWAP 1
36 #define TCG_LDST_BSWAP 0
39 static const int tcg_target_reg_alloc_order[] = {
40 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
41 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
42 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
44 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
45 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
46 TCG_REG_X16, TCG_REG_X17,
48 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
53 TCG_REG_X8, /* will not use, see tcg_target_init */
56 static const int tcg_target_call_iarg_regs[8] = {
57 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
58 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
60 static const int tcg_target_call_oarg_regs[1] = {
64 #define TCG_REG_TMP TCG_REG_X8
66 #ifndef CONFIG_SOFTMMU
67 # if defined(CONFIG_USE_GUEST_BASE)
68 # define TCG_REG_GUEST_BASE TCG_REG_X28
70 # define TCG_REG_GUEST_BASE TCG_REG_XZR
74 static inline void reloc_pc26(void *code_ptr, intptr_t target)
76 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
77 /* read instruction, mask away previous PC_REL26 parameter contents,
78 set the proper offset, then write back the instruction. */
79 uint32_t insn = *(uint32_t *)code_ptr;
80 insn = deposit32(insn, 0, 26, offset);
81 *(uint32_t *)code_ptr = insn;
84 static inline void reloc_pc19(void *code_ptr, intptr_t target)
86 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
87 /* read instruction, mask away previous PC_REL19 parameter contents,
88 set the proper offset, then write back the instruction. */
89 uint32_t insn = *(uint32_t *)code_ptr;
90 insn = deposit32(insn, 5, 19, offset);
91 *(uint32_t *)code_ptr = insn;
94 static inline void patch_reloc(uint8_t *code_ptr, int type,
95 intptr_t value, intptr_t addend)
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, value);
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, value);
113 #define TCG_CT_CONST_IS32 0x100
114 #define TCG_CT_CONST_AIMM 0x200
115 #define TCG_CT_CONST_LIMM 0x400
116 #define TCG_CT_CONST_ZERO 0x800
117 #define TCG_CT_CONST_MONE 0x1000
119 /* parse target specific constraints */
120 static int target_parse_constraint(TCGArgConstraint *ct,
121 const char **pct_str)
123 const char *ct_str = *pct_str;
127 ct->ct |= TCG_CT_REG;
128 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
130 case 'l': /* qemu_ld / qemu_st address, data_reg */
131 ct->ct |= TCG_CT_REG;
132 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
133 #ifdef CONFIG_SOFTMMU
134 /* x0 and x1 will be overwritten when reading the tlb entry,
135 and x2, and x3 for helper args, better to avoid using them. */
136 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
137 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
138 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
139 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
142 case 'w': /* The operand should be considered 32-bit. */
143 ct->ct |= TCG_CT_CONST_IS32;
145 case 'A': /* Valid for arithmetic immediate (positive or negative). */
146 ct->ct |= TCG_CT_CONST_AIMM;
148 case 'L': /* Valid for logical immediate. */
149 ct->ct |= TCG_CT_CONST_LIMM;
151 case 'M': /* minus one */
152 ct->ct |= TCG_CT_CONST_MONE;
155 ct->ct |= TCG_CT_CONST_ZERO;
166 static inline bool is_aimm(uint64_t val)
168 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
171 static inline bool is_limm(uint64_t val)
173 /* Taking a simplified view of the logical immediates for now, ignoring
174 the replication that can happen across the field. Match bit patterns
178 and their inverses. */
180 /* Make things easier below, by testing the form with msb clear. */
181 if ((int64_t)val < 0) {
188 return (val & (val - 1)) == 0;
191 static int tcg_target_const_match(tcg_target_long val,
192 const TCGArgConstraint *arg_ct)
196 if (ct & TCG_CT_CONST) {
199 if (ct & TCG_CT_CONST_IS32) {
202 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
205 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
208 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
211 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
218 enum aarch64_cond_code {
221 COND_CS = 0x2, /* Unsigned greater or equal */
222 COND_HS = COND_CS, /* ALIAS greater or equal */
223 COND_CC = 0x3, /* Unsigned less than */
224 COND_LO = COND_CC, /* ALIAS Lower */
225 COND_MI = 0x4, /* Negative */
226 COND_PL = 0x5, /* Zero or greater */
227 COND_VS = 0x6, /* Overflow */
228 COND_VC = 0x7, /* No overflow */
229 COND_HI = 0x8, /* Unsigned greater than */
230 COND_LS = 0x9, /* Unsigned less or equal */
236 COND_NV = 0xf, /* behaves like COND_AL here */
239 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
240 [TCG_COND_EQ] = COND_EQ,
241 [TCG_COND_NE] = COND_NE,
242 [TCG_COND_LT] = COND_LT,
243 [TCG_COND_GE] = COND_GE,
244 [TCG_COND_LE] = COND_LE,
245 [TCG_COND_GT] = COND_GT,
247 [TCG_COND_LTU] = COND_LO,
248 [TCG_COND_GTU] = COND_HI,
249 [TCG_COND_GEU] = COND_HS,
250 [TCG_COND_LEU] = COND_LS,
253 /* opcodes for LDR / STR instructions with base + simm9 addressing */
254 enum aarch64_ldst_op_data { /* size of the data moved */
260 enum aarch64_ldst_op_type { /* type of operation */
261 LDST_ST = 0x0, /* store */
262 LDST_LD = 0x4, /* load */
263 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
264 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
267 /* We encode the format of the insn into the beginning of the name, so that
268 we can have the preprocessor help "typecheck" the insn vs the output
269 function. Arm didn't provide us with nice names for the formats, so we
270 use the section number of the architecture reference manual in which the
271 instruction group is described. */
273 /* Conditional branch (immediate). */
274 I3202_B_C = 0x54000000,
276 /* Unconditional branch (immediate). */
277 I3206_B = 0x14000000,
278 I3206_BL = 0x94000000,
280 /* Unconditional branch (register). */
281 I3207_BR = 0xd61f0000,
282 I3207_BLR = 0xd63f0000,
283 I3207_RET = 0xd65f0000,
285 /* Add/subtract immediate instructions. */
286 I3401_ADDI = 0x11000000,
287 I3401_ADDSI = 0x31000000,
288 I3401_SUBI = 0x51000000,
289 I3401_SUBSI = 0x71000000,
291 /* Bitfield instructions. */
292 I3402_BFM = 0x33000000,
293 I3402_SBFM = 0x13000000,
294 I3402_UBFM = 0x53000000,
296 /* Extract instruction. */
297 I3403_EXTR = 0x13800000,
299 /* Logical immediate instructions. */
300 I3404_ANDI = 0x12000000,
301 I3404_ORRI = 0x32000000,
302 I3404_EORI = 0x52000000,
304 /* Move wide immediate instructions. */
305 I3405_MOVN = 0x12800000,
306 I3405_MOVZ = 0x52800000,
307 I3405_MOVK = 0x72800000,
309 /* PC relative addressing instructions. */
310 I3406_ADR = 0x10000000,
311 I3406_ADRP = 0x90000000,
313 /* Add/subtract shifted register instructions (without a shift). */
314 I3502_ADD = 0x0b000000,
315 I3502_ADDS = 0x2b000000,
316 I3502_SUB = 0x4b000000,
317 I3502_SUBS = 0x6b000000,
319 /* Add/subtract shifted register instructions (with a shift). */
320 I3502S_ADD_LSL = I3502_ADD,
322 /* Add/subtract with carry instructions. */
323 I3503_ADC = 0x1a000000,
324 I3503_SBC = 0x5a000000,
326 /* Conditional select instructions. */
327 I3506_CSEL = 0x1a800000,
328 I3506_CSINC = 0x1a800400,
330 /* Data-processing (2 source) instructions. */
331 I3508_LSLV = 0x1ac02000,
332 I3508_LSRV = 0x1ac02400,
333 I3508_ASRV = 0x1ac02800,
334 I3508_RORV = 0x1ac02c00,
335 I3508_SMULH = 0x9b407c00,
336 I3508_UMULH = 0x9bc07c00,
337 I3508_UDIV = 0x1ac00800,
338 I3508_SDIV = 0x1ac00c00,
340 /* Data-processing (3 source) instructions. */
341 I3509_MADD = 0x1b000000,
342 I3509_MSUB = 0x1b008000,
344 /* Logical shifted register instructions (without a shift). */
345 I3510_AND = 0x0a000000,
346 I3510_BIC = 0x0a200000,
347 I3510_ORR = 0x2a000000,
348 I3510_ORN = 0x2a200000,
349 I3510_EOR = 0x4a000000,
350 I3510_EON = 0x4a200000,
351 I3510_ANDS = 0x6a000000,
354 static inline enum aarch64_ldst_op_data
355 aarch64_ldst_get_data(TCGOpcode tcg_op)
358 case INDEX_op_ld8u_i32:
359 case INDEX_op_ld8s_i32:
360 case INDEX_op_ld8u_i64:
361 case INDEX_op_ld8s_i64:
362 case INDEX_op_st8_i32:
363 case INDEX_op_st8_i64:
366 case INDEX_op_ld16u_i32:
367 case INDEX_op_ld16s_i32:
368 case INDEX_op_ld16u_i64:
369 case INDEX_op_ld16s_i64:
370 case INDEX_op_st16_i32:
371 case INDEX_op_st16_i64:
374 case INDEX_op_ld_i32:
375 case INDEX_op_st_i32:
376 case INDEX_op_ld32u_i64:
377 case INDEX_op_ld32s_i64:
378 case INDEX_op_st32_i64:
381 case INDEX_op_ld_i64:
382 case INDEX_op_st_i64:
390 static inline enum aarch64_ldst_op_type
391 aarch64_ldst_get_type(TCGOpcode tcg_op)
394 case INDEX_op_st8_i32:
395 case INDEX_op_st16_i32:
396 case INDEX_op_st8_i64:
397 case INDEX_op_st16_i64:
398 case INDEX_op_st_i32:
399 case INDEX_op_st32_i64:
400 case INDEX_op_st_i64:
403 case INDEX_op_ld8u_i32:
404 case INDEX_op_ld16u_i32:
405 case INDEX_op_ld8u_i64:
406 case INDEX_op_ld16u_i64:
407 case INDEX_op_ld_i32:
408 case INDEX_op_ld32u_i64:
409 case INDEX_op_ld_i64:
412 case INDEX_op_ld8s_i32:
413 case INDEX_op_ld16s_i32:
416 case INDEX_op_ld8s_i64:
417 case INDEX_op_ld16s_i64:
418 case INDEX_op_ld32s_i64:
426 static inline uint32_t tcg_in32(TCGContext *s)
428 uint32_t v = *(uint32_t *)s->code_ptr;
432 /* Emit an opcode with "type-checking" of the format. */
433 #define tcg_out_insn(S, FMT, OP, ...) \
434 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
436 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
437 TCGCond c, int imm19)
439 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
442 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
444 tcg_out32(s, insn | (imm26 & 0x03ffffff));
447 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
449 tcg_out32(s, insn | rn << 5);
452 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
453 TCGReg rd, TCGReg rn, uint64_t aimm)
456 assert((aimm & 0xfff) == 0);
458 assert(aimm <= 0xfff);
459 aimm |= 1 << 12; /* apply LSL 12 */
461 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
464 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
465 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
466 that feed the DecodeBitMasks pseudo function. */
467 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
468 TCGReg rd, TCGReg rn, int n, int immr, int imms)
470 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
474 #define tcg_out_insn_3404 tcg_out_insn_3402
476 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
477 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
479 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
483 /* This function is used for the Move (wide immediate) instruction group.
484 Note that SHIFT is a full shift count, not the 2 bit HW field. */
485 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
486 TCGReg rd, uint16_t half, unsigned shift)
488 assert((shift & ~0x30) == 0);
489 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
492 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
493 TCGReg rd, int64_t disp)
495 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
498 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
499 the rare occasion when we actually want to supply a shift amount. */
500 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
501 TCGType ext, TCGReg rd, TCGReg rn,
504 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
507 /* This function is for 3.5.2 (Add/subtract shifted register),
508 and 3.5.10 (Logical shifted register), for the vast majorty of cases
509 when we don't want to apply a shift. Thus it can also be used for
510 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
511 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
512 TCGReg rd, TCGReg rn, TCGReg rm)
514 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
517 #define tcg_out_insn_3503 tcg_out_insn_3502
518 #define tcg_out_insn_3508 tcg_out_insn_3502
519 #define tcg_out_insn_3510 tcg_out_insn_3502
521 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
522 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
524 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
525 | tcg_cond_to_aarch64[c] << 12);
528 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
529 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
531 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
535 static inline void tcg_out_ldst_9(TCGContext *s,
536 enum aarch64_ldst_op_data op_data,
537 enum aarch64_ldst_op_type op_type,
538 TCGReg rd, TCGReg rn, intptr_t offset)
540 /* use LDUR with BASE register with 9bit signed unscaled offset */
541 tcg_out32(s, op_data << 24 | op_type << 20
542 | (offset & 0x1ff) << 12 | rn << 5 | rd);
545 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
546 static inline void tcg_out_ldst_12(TCGContext *s,
547 enum aarch64_ldst_op_data op_data,
548 enum aarch64_ldst_op_type op_type,
549 TCGReg rd, TCGReg rn,
550 tcg_target_ulong scaled_uimm)
552 tcg_out32(s, (op_data | 1) << 24
553 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
556 /* Register to register move using ORR (shifted register with no shift). */
557 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
559 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
562 /* Register to register move using ADDI (move to/from SP). */
563 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
565 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
568 /* This function is used for the Logical (immediate) instruction group.
569 The value of LIMM must satisfy IS_LIMM. See the comment above about
570 only supporting simplified logical immediates. */
571 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
572 TCGReg rd, TCGReg rn, uint64_t limm)
576 assert(is_limm(limm));
581 r = 0; /* form 0....01....1 */
582 c = ctz64(~limm) - 1;
584 r = clz64(~limm); /* form 1..10..01..1 */
588 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
591 if (ext == TCG_TYPE_I32) {
596 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
599 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
600 tcg_target_long value)
603 int i, wantinv, shift;
604 tcg_target_long svalue = value;
605 tcg_target_long ivalue = ~value;
606 tcg_target_long imask;
608 /* For 32-bit values, discard potential garbage in value. For 64-bit
609 values within [2**31, 2**32-1], we can create smaller sequences by
610 interpreting this as a negative 32-bit number, while ensuring that
611 the high 32 bits are cleared by setting SF=0. */
612 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
613 svalue = (int32_t)value;
614 value = (uint32_t)value;
615 ivalue = (uint32_t)ivalue;
619 /* Speed things up by handling the common case of small positive
620 and negative values specially. */
621 if ((value & ~0xffffull) == 0) {
622 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
624 } else if ((ivalue & ~0xffffull) == 0) {
625 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
629 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
630 use the sign-extended value. That lets us match rotated values such
631 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
632 if (is_limm(svalue)) {
633 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
637 /* Look for host pointer values within 4G of the PC. This happens
638 often when loading pointers to QEMU's own data structures. */
639 if (type == TCG_TYPE_I64) {
640 tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
641 if (disp == sextract64(disp, 0, 21)) {
642 tcg_out_insn(s, 3406, ADRP, rd, disp);
644 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
650 /* Would it take fewer insns to begin with MOVN? For the value and its
651 inverse, count the number of 16-bit lanes that are 0. */
652 for (i = wantinv = imask = 0; i < 64; i += 16) {
653 tcg_target_long mask = 0xffffull << i;
654 if ((value & mask) == 0) {
657 if ((ivalue & mask) == 0) {
663 /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN. */
670 /* Find the lowest lane that is not 0x0000. */
671 shift = ctz64(value) & (63 & -16);
672 tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
675 /* Re-invert the value, so MOVK sees non-inverted bits. */
677 /* Clear out all the 0xffff lanes. */
680 /* Clear out the lane that we just set. */
681 value &= ~(0xffffUL << shift);
683 /* Iterate until all lanes have been set, and thus cleared from VALUE. */
685 shift = ctz64(value) & (63 & -16);
686 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
687 value &= ~(0xffffUL << shift);
691 static inline void tcg_out_ldst_r(TCGContext *s,
692 enum aarch64_ldst_op_data op_data,
693 enum aarch64_ldst_op_type op_type,
694 TCGReg rd, TCGReg base, TCGReg regoff)
696 /* load from memory to register using base + 64bit register offset */
697 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
698 /* the 0x6000 is for the "no extend field" */
699 tcg_out32(s, 0x00206800
700 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
703 /* solve the whole ldst problem */
704 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
705 enum aarch64_ldst_op_type type,
706 TCGReg rd, TCGReg rn, intptr_t offset)
708 if (offset >= -256 && offset < 256) {
709 tcg_out_ldst_9(s, data, type, rd, rn, offset);
714 /* if the offset is naturally aligned and in range,
715 then we can use the scaled uimm12 encoding */
716 unsigned int s_bits = data >> 6;
717 if (!(offset & ((1 << s_bits) - 1))) {
718 tcg_target_ulong scaled_uimm = offset >> s_bits;
719 if (scaled_uimm <= 0xfff) {
720 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
726 /* worst-case scenario, move offset to temp register, use reg offset */
727 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
728 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
731 static inline void tcg_out_mov(TCGContext *s,
732 TCGType type, TCGReg ret, TCGReg arg)
735 tcg_out_movr(s, type, ret, arg);
739 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
740 TCGReg arg1, intptr_t arg2)
742 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
746 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
747 TCGReg arg1, intptr_t arg2)
749 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
753 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
754 TCGReg rn, unsigned int a, unsigned int b)
756 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
759 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
760 TCGReg rn, unsigned int a, unsigned int b)
762 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
765 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
766 TCGReg rn, unsigned int a, unsigned int b)
768 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
771 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
772 TCGReg rn, TCGReg rm, unsigned int a)
774 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
777 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
778 TCGReg rd, TCGReg rn, unsigned int m)
780 int bits = ext ? 64 : 32;
782 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
785 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
786 TCGReg rd, TCGReg rn, unsigned int m)
788 int max = ext ? 63 : 31;
789 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
792 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
793 TCGReg rd, TCGReg rn, unsigned int m)
795 int max = ext ? 63 : 31;
796 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
799 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
800 TCGReg rd, TCGReg rn, unsigned int m)
802 int max = ext ? 63 : 31;
803 tcg_out_extr(s, ext, rd, rn, rn, m & max);
806 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
807 TCGReg rd, TCGReg rn, unsigned int m)
809 int bits = ext ? 64 : 32;
811 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
814 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
815 TCGReg rn, unsigned lsb, unsigned width)
817 unsigned size = ext ? 64 : 32;
818 unsigned a = (size - lsb) & (size - 1);
819 unsigned b = width - 1;
820 tcg_out_bfm(s, ext, rd, rn, a, b);
823 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
824 tcg_target_long b, bool const_b)
827 /* Using CMP or CMN aliases. */
829 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
831 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
834 /* Using CMP alias SUBS wzr, Wn, Wm */
835 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
839 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
841 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
843 if (offset < -0x02000000 || offset >= 0x02000000) {
844 /* out of 26bit range */
848 tcg_out_insn(s, 3206, B, offset);
851 static inline void tcg_out_goto_noaddr(TCGContext *s)
853 /* We pay attention here to not modify the branch target by reading from
854 the buffer. This ensure that caches and memory are kept coherent during
855 retranslation. Mask away possible garbage in the high bits for the
856 first translation, while keeping the offset bits for retranslation. */
857 uint32_t old = tcg_in32(s);
858 tcg_out_insn(s, 3206, B, old);
861 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
863 /* See comments in tcg_out_goto_noaddr. */
864 uint32_t old = tcg_in32(s) >> 5;
865 tcg_out_insn(s, 3202, B_C, c, old);
868 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
870 tcg_out_insn(s, 3207, BLR, reg);
873 static inline void tcg_out_call(TCGContext *s, intptr_t target)
875 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
877 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
878 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
879 tcg_out_callr(s, TCG_REG_TMP);
881 tcg_out_insn(s, 3206, BL, offset);
885 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
887 intptr_t target = addr;
888 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
890 if (offset < -0x02000000 || offset >= 0x02000000) {
891 /* out of 26bit range */
895 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
896 flush_icache_range(jmp_addr, jmp_addr + 4);
899 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
901 TCGLabel *l = &s->labels[label_index];
904 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
905 tcg_out_goto_noaddr(s);
907 tcg_out_goto(s, l->u.value);
911 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
912 TCGArg b, bool b_const, int label)
914 TCGLabel *l = &s->labels[label];
917 tcg_out_cmp(s, ext, a, b, b_const);
920 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
921 offset = tcg_in32(s) >> 5;
923 offset = l->u.value - (uintptr_t)s->code_ptr;
925 assert(offset >= -0x40000 && offset < 0x40000);
928 tcg_out_insn(s, 3202, B_C, c, offset);
931 static inline void tcg_out_rev(TCGContext *s, TCGType ext,
932 TCGReg rd, TCGReg rm)
934 /* using REV 0x5ac00800 */
935 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
936 tcg_out32(s, base | rm << 5 | rd);
939 static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
940 TCGReg rd, TCGReg rm)
942 /* using REV16 0x5ac00400 */
943 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
944 tcg_out32(s, base | rm << 5 | rd);
947 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
948 TCGReg rd, TCGReg rn)
950 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
951 int bits = (8 << s_bits) - 1;
952 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
955 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
956 TCGReg rd, TCGReg rn)
958 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
959 int bits = (8 << s_bits) - 1;
960 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
963 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
964 TCGReg rn, int64_t aimm)
967 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
969 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
973 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
974 TCGReg rh, TCGReg al, TCGReg ah,
975 tcg_target_long bl, tcg_target_long bh,
976 bool const_bl, bool const_bh, bool sub)
981 if (rl == ah || (!const_bh && rl == bh)) {
987 if ((bl < 0) ^ sub) {
991 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
993 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
998 /* Note that the only two constants we support are 0 and -1, and
999 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1000 if ((bh != 0) ^ sub) {
1007 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1009 if (rl != orig_rl) {
1010 tcg_out_movr(s, ext, orig_rl, rl);
1014 #ifdef CONFIG_SOFTMMU
1015 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1016 * int mmu_idx, uintptr_t ra)
1018 static const void * const qemu_ld_helpers[4] = {
1019 helper_ret_ldub_mmu,
1020 helper_ret_lduw_mmu,
1021 helper_ret_ldul_mmu,
1025 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1026 * uintxx_t val, int mmu_idx, uintptr_t ra)
1028 static const void * const qemu_st_helpers[4] = {
1035 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1037 TCGMemOp opc = lb->opc;
1038 TCGMemOp size = opc & MO_SIZE;
1040 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1042 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1043 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1044 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
1045 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (intptr_t)lb->raddr);
1046 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)qemu_ld_helpers[size]);
1047 tcg_out_callr(s, TCG_REG_TMP);
1048 if (opc & MO_SIGN) {
1049 tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1051 tcg_out_movr(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_X0);
1054 tcg_out_goto(s, (intptr_t)lb->raddr);
1057 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1059 TCGMemOp size = lb->opc;
1061 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1063 tcg_out_movr(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1064 tcg_out_movr(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1065 tcg_out_movr(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1066 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1067 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
1068 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)qemu_st_helpers[size]);
1069 tcg_out_callr(s, TCG_REG_TMP);
1070 tcg_out_goto(s, (intptr_t)lb->raddr);
1073 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
1074 TCGReg data_reg, TCGReg addr_reg,
1076 uint8_t *raddr, uint8_t *label_ptr)
1078 TCGLabelQemuLdst *label = new_ldst_label(s);
1080 label->is_ld = is_ld;
1082 label->datalo_reg = data_reg;
1083 label->addrlo_reg = addr_reg;
1084 label->mem_index = mem_index;
1085 label->raddr = raddr;
1086 label->label_ptr[0] = label_ptr;
1089 /* Load and compare a TLB entry, emitting the conditional jump to the
1090 slow path for the failure case, which will be patched later when finalizing
1091 the slow path. Generated code returns the host addend in X1,
1092 clobbers X0,X2,X3,TMP. */
1093 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
1094 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
1096 TCGReg base = TCG_AREG0;
1097 int tlb_offset = is_read ?
1098 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1099 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1100 /* Extract the TLB index from the address into X0.
1101 X0<CPU_TLB_BITS:0> =
1102 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1103 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
1104 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1105 /* Store the page mask part of the address and the low s_bits into X3.
1106 Later this allows checking for equality and alignment at the same time.
1107 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1108 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1109 addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1110 /* Add any "high bits" from the tlb offset to the env address into X2,
1111 to take advantage of the LSL12 form of the ADDI instruction.
1112 X2 = env + (tlb_offset & 0xfff000) */
1113 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1114 tlb_offset & 0xfff000);
1115 /* Merge the tlb index contribution into X2.
1116 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1117 tcg_out_insn(s, 3502S, ADD_LSL, 1, TCG_REG_X2, TCG_REG_X2,
1118 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1119 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1120 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1121 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
1122 LDST_LD, TCG_REG_X0, TCG_REG_X2,
1123 (tlb_offset & 0xfff));
1124 /* Load the tlb addend. Do that early to avoid stalling.
1125 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1126 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
1127 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1128 (is_read ? offsetof(CPUTLBEntry, addr_read)
1129 : offsetof(CPUTLBEntry, addr_write)));
1130 /* Perform the address comparison. */
1131 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1132 *label_ptr = s->code_ptr;
1133 /* If not equal, we jump to the slow path. */
1134 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1137 #endif /* CONFIG_SOFTMMU */
1139 static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
1140 TCGReg addr_r, TCGReg off_r)
1144 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
1147 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
1150 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1151 if (TCG_LDST_BSWAP) {
1152 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1156 if (TCG_LDST_BSWAP) {
1157 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
1158 tcg_out_rev16(s, TCG_TYPE_I32, data_r, data_r);
1159 tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1161 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
1165 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1166 if (TCG_LDST_BSWAP) {
1167 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1171 if (TCG_LDST_BSWAP) {
1172 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
1173 tcg_out_rev(s, TCG_TYPE_I32, data_r, data_r);
1174 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1176 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
1180 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
1181 if (TCG_LDST_BSWAP) {
1182 tcg_out_rev(s, TCG_TYPE_I64, data_r, data_r);
1190 static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
1191 TCGReg addr_r, TCGReg off_r)
1195 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
1198 if (TCG_LDST_BSWAP) {
1199 tcg_out_rev16(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1200 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1202 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
1206 if (TCG_LDST_BSWAP) {
1207 tcg_out_rev(s, TCG_TYPE_I32, TCG_REG_TMP, data_r);
1208 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1210 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
1214 if (TCG_LDST_BSWAP) {
1215 tcg_out_rev(s, TCG_TYPE_I64, TCG_REG_TMP, data_r);
1216 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
1218 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
1226 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
1228 TCGReg addr_reg, data_reg;
1229 #ifdef CONFIG_SOFTMMU
1230 int mem_index, s_bits;
1236 #ifdef CONFIG_SOFTMMU
1237 mem_index = args[2];
1239 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1240 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1241 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
1242 mem_index, s->code_ptr, label_ptr);
1243 #else /* !CONFIG_SOFTMMU */
1244 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1245 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1246 #endif /* CONFIG_SOFTMMU */
1249 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1251 TCGReg addr_reg, data_reg;
1252 #ifdef CONFIG_SOFTMMU
1253 int mem_index, s_bits;
1259 #ifdef CONFIG_SOFTMMU
1260 mem_index = args[2];
1263 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1264 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1265 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1266 mem_index, s->code_ptr, label_ptr);
1267 #else /* !CONFIG_SOFTMMU */
1268 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1269 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1270 #endif /* CONFIG_SOFTMMU */
1273 static uint8_t *tb_ret_addr;
1275 /* callee stack use example:
1276 stp x29, x30, [sp,#-32]!
1278 stp x1, x2, [sp,#16]
1280 ldp x1, x2, [sp,#16]
1281 ldp x29, x30, [sp],#32
1285 /* push r1 and r2, and alloc stack space for a total of
1286 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1287 static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1288 TCGReg r1, TCGReg r2, int alloc_n)
1290 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1291 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1292 assert(alloc_n > 0 && alloc_n < 0x20);
1293 alloc_n = (-alloc_n) & 0x3f;
1294 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1297 /* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1298 static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1299 TCGReg r1, TCGReg r2, int alloc_n)
1301 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1302 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1303 assert(alloc_n > 0 && alloc_n < 0x20);
1304 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1307 static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1308 TCGReg r1, TCGReg r2, int idx)
1310 /* using register pair offset simm7 STP 0x29000000 | (ext)
1311 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1312 assert(idx > 0 && idx < 0x20);
1313 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1316 static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1317 TCGReg r1, TCGReg r2, int idx)
1319 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1320 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1321 assert(idx > 0 && idx < 0x20);
1322 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1325 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1326 const TCGArg args[TCG_MAX_OP_ARGS],
1327 const int const_args[TCG_MAX_OP_ARGS])
1329 /* 99% of the time, we can signal the use of extension registers
1330 by looking to see if the opcode handles 64-bit data. */
1331 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1333 /* Hoist the loads of the most common arguments. */
1334 TCGArg a0 = args[0];
1335 TCGArg a1 = args[1];
1336 TCGArg a2 = args[2];
1337 int c2 = const_args[2];
1339 /* Some operands are defined with "rZ" constraint, a register or
1340 the zero register. These need not actually test args[I] == 0. */
1341 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1344 case INDEX_op_exit_tb:
1345 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1346 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1349 case INDEX_op_goto_tb:
1350 #ifndef USE_DIRECT_JUMP
1351 #error "USE_DIRECT_JUMP required for aarch64"
1353 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1354 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1355 /* actual branch destination will be patched by
1356 aarch64_tb_set_jmp_target later, beware retranslation. */
1357 tcg_out_goto_noaddr(s);
1358 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1362 if (const_args[0]) {
1363 tcg_out_call(s, a0);
1365 tcg_out_callr(s, a0);
1370 tcg_out_goto_label(s, a0);
1373 case INDEX_op_ld_i32:
1374 case INDEX_op_ld_i64:
1375 case INDEX_op_st_i32:
1376 case INDEX_op_st_i64:
1377 case INDEX_op_ld8u_i32:
1378 case INDEX_op_ld8s_i32:
1379 case INDEX_op_ld16u_i32:
1380 case INDEX_op_ld16s_i32:
1381 case INDEX_op_ld8u_i64:
1382 case INDEX_op_ld8s_i64:
1383 case INDEX_op_ld16u_i64:
1384 case INDEX_op_ld16s_i64:
1385 case INDEX_op_ld32u_i64:
1386 case INDEX_op_ld32s_i64:
1387 case INDEX_op_st8_i32:
1388 case INDEX_op_st8_i64:
1389 case INDEX_op_st16_i32:
1390 case INDEX_op_st16_i64:
1391 case INDEX_op_st32_i64:
1392 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1396 case INDEX_op_add_i32:
1399 case INDEX_op_add_i64:
1401 tcg_out_addsubi(s, ext, a0, a1, a2);
1403 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1407 case INDEX_op_sub_i32:
1410 case INDEX_op_sub_i64:
1412 tcg_out_addsubi(s, ext, a0, a1, -a2);
1414 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1418 case INDEX_op_neg_i64:
1419 case INDEX_op_neg_i32:
1420 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1423 case INDEX_op_and_i32:
1426 case INDEX_op_and_i64:
1428 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1430 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1434 case INDEX_op_andc_i32:
1437 case INDEX_op_andc_i64:
1439 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1441 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1445 case INDEX_op_or_i32:
1448 case INDEX_op_or_i64:
1450 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1452 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1456 case INDEX_op_orc_i32:
1459 case INDEX_op_orc_i64:
1461 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1463 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1467 case INDEX_op_xor_i32:
1470 case INDEX_op_xor_i64:
1472 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1474 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1478 case INDEX_op_eqv_i32:
1481 case INDEX_op_eqv_i64:
1483 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1485 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1489 case INDEX_op_not_i64:
1490 case INDEX_op_not_i32:
1491 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1494 case INDEX_op_mul_i64:
1495 case INDEX_op_mul_i32:
1496 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1499 case INDEX_op_div_i64:
1500 case INDEX_op_div_i32:
1501 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1503 case INDEX_op_divu_i64:
1504 case INDEX_op_divu_i32:
1505 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1508 case INDEX_op_rem_i64:
1509 case INDEX_op_rem_i32:
1510 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1511 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1513 case INDEX_op_remu_i64:
1514 case INDEX_op_remu_i32:
1515 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1516 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1519 case INDEX_op_shl_i64:
1520 case INDEX_op_shl_i32:
1522 tcg_out_shl(s, ext, a0, a1, a2);
1524 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1528 case INDEX_op_shr_i64:
1529 case INDEX_op_shr_i32:
1531 tcg_out_shr(s, ext, a0, a1, a2);
1533 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1537 case INDEX_op_sar_i64:
1538 case INDEX_op_sar_i32:
1540 tcg_out_sar(s, ext, a0, a1, a2);
1542 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1546 case INDEX_op_rotr_i64:
1547 case INDEX_op_rotr_i32:
1549 tcg_out_rotr(s, ext, a0, a1, a2);
1551 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1555 case INDEX_op_rotl_i64:
1556 case INDEX_op_rotl_i32:
1558 tcg_out_rotl(s, ext, a0, a1, a2);
1560 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1561 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1565 case INDEX_op_brcond_i32:
1568 case INDEX_op_brcond_i64:
1569 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]);
1572 case INDEX_op_setcond_i32:
1575 case INDEX_op_setcond_i64:
1576 tcg_out_cmp(s, ext, a1, a2, c2);
1577 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1578 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1579 TCG_REG_XZR, tcg_invert_cond(args[3]));
1582 case INDEX_op_movcond_i32:
1585 case INDEX_op_movcond_i64:
1586 tcg_out_cmp(s, ext, a1, a2, c2);
1587 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1590 case INDEX_op_qemu_ld8u:
1591 tcg_out_qemu_ld(s, args, 0 | 0);
1593 case INDEX_op_qemu_ld8s:
1594 tcg_out_qemu_ld(s, args, 4 | 0);
1596 case INDEX_op_qemu_ld16u:
1597 tcg_out_qemu_ld(s, args, 0 | 1);
1599 case INDEX_op_qemu_ld16s:
1600 tcg_out_qemu_ld(s, args, 4 | 1);
1602 case INDEX_op_qemu_ld32u:
1603 tcg_out_qemu_ld(s, args, 0 | 2);
1605 case INDEX_op_qemu_ld32s:
1606 tcg_out_qemu_ld(s, args, 4 | 2);
1608 case INDEX_op_qemu_ld32:
1609 tcg_out_qemu_ld(s, args, 0 | 2);
1611 case INDEX_op_qemu_ld64:
1612 tcg_out_qemu_ld(s, args, 0 | 3);
1614 case INDEX_op_qemu_st8:
1615 tcg_out_qemu_st(s, args, 0);
1617 case INDEX_op_qemu_st16:
1618 tcg_out_qemu_st(s, args, 1);
1620 case INDEX_op_qemu_st32:
1621 tcg_out_qemu_st(s, args, 2);
1623 case INDEX_op_qemu_st64:
1624 tcg_out_qemu_st(s, args, 3);
1627 case INDEX_op_bswap32_i64:
1628 /* Despite the _i64, this is a 32-bit bswap. */
1631 case INDEX_op_bswap64_i64:
1632 case INDEX_op_bswap32_i32:
1633 tcg_out_rev(s, ext, a0, a1);
1635 case INDEX_op_bswap16_i64:
1636 case INDEX_op_bswap16_i32:
1637 tcg_out_rev16(s, TCG_TYPE_I32, a0, a1);
1640 case INDEX_op_ext8s_i64:
1641 case INDEX_op_ext8s_i32:
1642 tcg_out_sxt(s, ext, MO_8, a0, a1);
1644 case INDEX_op_ext16s_i64:
1645 case INDEX_op_ext16s_i32:
1646 tcg_out_sxt(s, ext, MO_16, a0, a1);
1648 case INDEX_op_ext32s_i64:
1649 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1651 case INDEX_op_ext8u_i64:
1652 case INDEX_op_ext8u_i32:
1653 tcg_out_uxt(s, MO_8, a0, a1);
1655 case INDEX_op_ext16u_i64:
1656 case INDEX_op_ext16u_i32:
1657 tcg_out_uxt(s, MO_16, a0, a1);
1659 case INDEX_op_ext32u_i64:
1660 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1663 case INDEX_op_deposit_i64:
1664 case INDEX_op_deposit_i32:
1665 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1668 case INDEX_op_add2_i32:
1669 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1670 (int32_t)args[4], args[5], const_args[4],
1671 const_args[5], false);
1673 case INDEX_op_add2_i64:
1674 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1675 args[5], const_args[4], const_args[5], false);
1677 case INDEX_op_sub2_i32:
1678 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1679 (int32_t)args[4], args[5], const_args[4],
1680 const_args[5], true);
1682 case INDEX_op_sub2_i64:
1683 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1684 args[5], const_args[4], const_args[5], true);
1687 case INDEX_op_muluh_i64:
1688 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1690 case INDEX_op_mulsh_i64:
1691 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1694 case INDEX_op_mov_i64:
1695 case INDEX_op_mov_i32:
1696 case INDEX_op_movi_i64:
1697 case INDEX_op_movi_i32:
1698 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1700 /* Opcode not implemented. */
1707 static const TCGTargetOpDef aarch64_op_defs[] = {
1708 { INDEX_op_exit_tb, { } },
1709 { INDEX_op_goto_tb, { } },
1710 { INDEX_op_call, { "ri" } },
1711 { INDEX_op_br, { } },
1713 { INDEX_op_mov_i32, { "r", "r" } },
1714 { INDEX_op_mov_i64, { "r", "r" } },
1716 { INDEX_op_movi_i32, { "r" } },
1717 { INDEX_op_movi_i64, { "r" } },
1719 { INDEX_op_ld8u_i32, { "r", "r" } },
1720 { INDEX_op_ld8s_i32, { "r", "r" } },
1721 { INDEX_op_ld16u_i32, { "r", "r" } },
1722 { INDEX_op_ld16s_i32, { "r", "r" } },
1723 { INDEX_op_ld_i32, { "r", "r" } },
1724 { INDEX_op_ld8u_i64, { "r", "r" } },
1725 { INDEX_op_ld8s_i64, { "r", "r" } },
1726 { INDEX_op_ld16u_i64, { "r", "r" } },
1727 { INDEX_op_ld16s_i64, { "r", "r" } },
1728 { INDEX_op_ld32u_i64, { "r", "r" } },
1729 { INDEX_op_ld32s_i64, { "r", "r" } },
1730 { INDEX_op_ld_i64, { "r", "r" } },
1732 { INDEX_op_st8_i32, { "r", "r" } },
1733 { INDEX_op_st16_i32, { "r", "r" } },
1734 { INDEX_op_st_i32, { "r", "r" } },
1735 { INDEX_op_st8_i64, { "r", "r" } },
1736 { INDEX_op_st16_i64, { "r", "r" } },
1737 { INDEX_op_st32_i64, { "r", "r" } },
1738 { INDEX_op_st_i64, { "r", "r" } },
1740 { INDEX_op_add_i32, { "r", "r", "rwA" } },
1741 { INDEX_op_add_i64, { "r", "r", "rA" } },
1742 { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1743 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1744 { INDEX_op_mul_i32, { "r", "r", "r" } },
1745 { INDEX_op_mul_i64, { "r", "r", "r" } },
1746 { INDEX_op_div_i32, { "r", "r", "r" } },
1747 { INDEX_op_div_i64, { "r", "r", "r" } },
1748 { INDEX_op_divu_i32, { "r", "r", "r" } },
1749 { INDEX_op_divu_i64, { "r", "r", "r" } },
1750 { INDEX_op_rem_i32, { "r", "r", "r" } },
1751 { INDEX_op_rem_i64, { "r", "r", "r" } },
1752 { INDEX_op_remu_i32, { "r", "r", "r" } },
1753 { INDEX_op_remu_i64, { "r", "r", "r" } },
1754 { INDEX_op_and_i32, { "r", "r", "rwL" } },
1755 { INDEX_op_and_i64, { "r", "r", "rL" } },
1756 { INDEX_op_or_i32, { "r", "r", "rwL" } },
1757 { INDEX_op_or_i64, { "r", "r", "rL" } },
1758 { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1759 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1760 { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1761 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1762 { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1763 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1764 { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1765 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1767 { INDEX_op_neg_i32, { "r", "r" } },
1768 { INDEX_op_neg_i64, { "r", "r" } },
1769 { INDEX_op_not_i32, { "r", "r" } },
1770 { INDEX_op_not_i64, { "r", "r" } },
1772 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1773 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1774 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1775 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1776 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1777 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1778 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1779 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1780 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1781 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1783 { INDEX_op_brcond_i32, { "r", "rwA" } },
1784 { INDEX_op_brcond_i64, { "r", "rA" } },
1785 { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1786 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1787 { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1788 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1790 { INDEX_op_qemu_ld8u, { "r", "l" } },
1791 { INDEX_op_qemu_ld8s, { "r", "l" } },
1792 { INDEX_op_qemu_ld16u, { "r", "l" } },
1793 { INDEX_op_qemu_ld16s, { "r", "l" } },
1794 { INDEX_op_qemu_ld32u, { "r", "l" } },
1795 { INDEX_op_qemu_ld32s, { "r", "l" } },
1797 { INDEX_op_qemu_ld32, { "r", "l" } },
1798 { INDEX_op_qemu_ld64, { "r", "l" } },
1800 { INDEX_op_qemu_st8, { "l", "l" } },
1801 { INDEX_op_qemu_st16, { "l", "l" } },
1802 { INDEX_op_qemu_st32, { "l", "l" } },
1803 { INDEX_op_qemu_st64, { "l", "l" } },
1805 { INDEX_op_bswap16_i32, { "r", "r" } },
1806 { INDEX_op_bswap32_i32, { "r", "r" } },
1807 { INDEX_op_bswap16_i64, { "r", "r" } },
1808 { INDEX_op_bswap32_i64, { "r", "r" } },
1809 { INDEX_op_bswap64_i64, { "r", "r" } },
1811 { INDEX_op_ext8s_i32, { "r", "r" } },
1812 { INDEX_op_ext16s_i32, { "r", "r" } },
1813 { INDEX_op_ext8u_i32, { "r", "r" } },
1814 { INDEX_op_ext16u_i32, { "r", "r" } },
1816 { INDEX_op_ext8s_i64, { "r", "r" } },
1817 { INDEX_op_ext16s_i64, { "r", "r" } },
1818 { INDEX_op_ext32s_i64, { "r", "r" } },
1819 { INDEX_op_ext8u_i64, { "r", "r" } },
1820 { INDEX_op_ext16u_i64, { "r", "r" } },
1821 { INDEX_op_ext32u_i64, { "r", "r" } },
1823 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1824 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1826 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1827 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1828 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1829 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1831 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1832 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1837 static void tcg_target_init(TCGContext *s)
1839 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1840 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1842 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1843 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1844 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1845 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1846 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1847 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1848 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1849 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1850 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1851 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1852 (1 << TCG_REG_X18));
1854 tcg_regset_clear(s->reserved_regs);
1855 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1856 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1857 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1858 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1860 tcg_add_target_add_op_defs(aarch64_op_defs);
1863 static void tcg_target_qemu_prologue(TCGContext *s)
1865 /* NB: frame sizes are in 16 byte stack units! */
1866 int frame_size_callee_saved, frame_size_tcg_locals;
1869 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1870 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1872 /* frame size requirement for TCG local variables */
1873 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1874 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1875 + (TCG_TARGET_STACK_ALIGN - 1);
1876 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1877 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1879 /* push (FP, LR) and update sp */
1880 tcg_out_push_pair(s, TCG_REG_SP,
1881 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1883 /* FP -> callee_saved */
1884 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1886 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1887 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1888 int idx = (r - TCG_REG_X19) / 2 + 1;
1889 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1892 /* Make stack space for TCG locals. */
1893 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1894 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1896 /* inform TCG about how to find TCG locals with register, offset, size */
1897 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1898 CPU_TEMP_BUF_NLONGS * sizeof(long));
1900 #if defined(CONFIG_USE_GUEST_BASE)
1902 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1903 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1907 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1908 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1910 tb_ret_addr = s->code_ptr;
1912 /* Remove TCG locals stack space. */
1913 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1914 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1916 /* restore registers x19..x28.
1917 FP must be preserved, so it still points to callee_saved area */
1918 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1919 int idx = (r - TCG_REG_X19) / 2 + 1;
1920 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1923 /* pop (FP, LR), restore SP to previous frame, return */
1924 tcg_out_pop_pair(s, TCG_REG_SP,
1925 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1926 tcg_out_insn(s, 3207, RET, TCG_REG_LR);