2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "qemu/bitops.h"
15 /* We're going to re-use TCGType in setting of the SF bit, which controls
16 the size of the operation performed. If we know the values match, it
17 makes things much cleaner. */
18 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
20 #ifdef CONFIG_DEBUG_TCG
21 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
22 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
23 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
24 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
25 "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
27 #endif /* CONFIG_DEBUG_TCG */
29 static const int tcg_target_reg_alloc_order[] = {
30 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
31 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
32 TCG_REG_X28, /* we will reserve this for guest_base if configured */
34 TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
35 TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
36 TCG_REG_X16, TCG_REG_X17,
38 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
39 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
41 /* X18 reserved by system */
42 /* X19 reserved for AREG0 */
43 /* X29 reserved as fp */
44 /* X30 reserved as temporary */
47 static const int tcg_target_call_iarg_regs[8] = {
48 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
49 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
51 static const int tcg_target_call_oarg_regs[1] = {
55 #define TCG_REG_TMP TCG_REG_X30
57 #ifndef CONFIG_SOFTMMU
58 /* Note that XZR cannot be encoded in the address base register slot,
59 as that actaully encodes SP. So if we need to zero-extend the guest
60 address, via the address index register slot, we need to load even
61 a zero guest base into a register. */
62 #define USE_GUEST_BASE (guest_base != 0 || TARGET_LONG_BITS == 32)
63 #define TCG_REG_GUEST_BASE TCG_REG_X28
66 static inline void reloc_pc26(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
68 ptrdiff_t offset = target - code_ptr;
69 tcg_debug_assert(offset == sextract64(offset, 0, 26));
70 /* read instruction, mask away previous PC_REL26 parameter contents,
71 set the proper offset, then write back the instruction. */
72 *code_ptr = deposit32(*code_ptr, 0, 26, offset);
75 static inline void reloc_pc26_atomic(tcg_insn_unit *code_ptr,
76 tcg_insn_unit *target)
78 ptrdiff_t offset = target - code_ptr;
80 tcg_debug_assert(offset == sextract64(offset, 0, 26));
81 /* read instruction, mask away previous PC_REL26 parameter contents,
82 set the proper offset, then write back the instruction. */
83 insn = atomic_read(code_ptr);
84 atomic_set(code_ptr, deposit32(insn, 0, 26, offset));
87 static inline void reloc_pc19(tcg_insn_unit *code_ptr, tcg_insn_unit *target)
89 ptrdiff_t offset = target - code_ptr;
90 tcg_debug_assert(offset == sextract64(offset, 0, 19));
91 *code_ptr = deposit32(*code_ptr, 5, 19, offset);
94 static inline void patch_reloc(tcg_insn_unit *code_ptr, int type,
95 intptr_t value, intptr_t addend)
97 tcg_debug_assert(addend == 0);
99 case R_AARCH64_JUMP26:
100 case R_AARCH64_CALL26:
101 reloc_pc26(code_ptr, (tcg_insn_unit *)value);
103 case R_AARCH64_CONDBR19:
104 reloc_pc19(code_ptr, (tcg_insn_unit *)value);
111 #define TCG_CT_CONST_AIMM 0x100
112 #define TCG_CT_CONST_LIMM 0x200
113 #define TCG_CT_CONST_ZERO 0x400
114 #define TCG_CT_CONST_MONE 0x800
116 /* parse target specific constraints */
117 static const char *target_parse_constraint(TCGArgConstraint *ct,
118 const char *ct_str, TCGType type)
122 ct->ct |= TCG_CT_REG;
123 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
125 case 'l': /* qemu_ld / qemu_st address, data_reg */
126 ct->ct |= TCG_CT_REG;
127 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
128 #ifdef CONFIG_SOFTMMU
129 /* x0 and x1 will be overwritten when reading the tlb entry,
130 and x2, and x3 for helper args, better to avoid using them. */
131 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
134 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
137 case 'A': /* Valid for arithmetic immediate (positive or negative). */
138 ct->ct |= TCG_CT_CONST_AIMM;
140 case 'L': /* Valid for logical immediate. */
141 ct->ct |= TCG_CT_CONST_LIMM;
143 case 'M': /* minus one */
144 ct->ct |= TCG_CT_CONST_MONE;
147 ct->ct |= TCG_CT_CONST_ZERO;
155 static inline bool is_aimm(uint64_t val)
157 return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
160 static inline bool is_limm(uint64_t val)
162 /* Taking a simplified view of the logical immediates for now, ignoring
163 the replication that can happen across the field. Match bit patterns
167 and their inverses. */
169 /* Make things easier below, by testing the form with msb clear. */
170 if ((int64_t)val < 0) {
177 return (val & (val - 1)) == 0;
180 static int tcg_target_const_match(tcg_target_long val, TCGType type,
181 const TCGArgConstraint *arg_ct)
185 if (ct & TCG_CT_CONST) {
188 if (type == TCG_TYPE_I32) {
191 if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
194 if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
197 if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
200 if ((ct & TCG_CT_CONST_MONE) && val == -1) {
207 enum aarch64_cond_code {
210 COND_CS = 0x2, /* Unsigned greater or equal */
211 COND_HS = COND_CS, /* ALIAS greater or equal */
212 COND_CC = 0x3, /* Unsigned less than */
213 COND_LO = COND_CC, /* ALIAS Lower */
214 COND_MI = 0x4, /* Negative */
215 COND_PL = 0x5, /* Zero or greater */
216 COND_VS = 0x6, /* Overflow */
217 COND_VC = 0x7, /* No overflow */
218 COND_HI = 0x8, /* Unsigned greater than */
219 COND_LS = 0x9, /* Unsigned less or equal */
225 COND_NV = 0xf, /* behaves like COND_AL here */
228 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
229 [TCG_COND_EQ] = COND_EQ,
230 [TCG_COND_NE] = COND_NE,
231 [TCG_COND_LT] = COND_LT,
232 [TCG_COND_GE] = COND_GE,
233 [TCG_COND_LE] = COND_LE,
234 [TCG_COND_GT] = COND_GT,
236 [TCG_COND_LTU] = COND_LO,
237 [TCG_COND_GTU] = COND_HI,
238 [TCG_COND_GEU] = COND_HS,
239 [TCG_COND_LEU] = COND_LS,
243 LDST_ST = 0, /* store */
244 LDST_LD = 1, /* load */
245 LDST_LD_S_X = 2, /* load and sign-extend into Xt */
246 LDST_LD_S_W = 3, /* load and sign-extend into Wt */
249 /* We encode the format of the insn into the beginning of the name, so that
250 we can have the preprocessor help "typecheck" the insn vs the output
251 function. Arm didn't provide us with nice names for the formats, so we
252 use the section number of the architecture reference manual in which the
253 instruction group is described. */
255 /* Compare and branch (immediate). */
256 I3201_CBZ = 0x34000000,
257 I3201_CBNZ = 0x35000000,
259 /* Conditional branch (immediate). */
260 I3202_B_C = 0x54000000,
262 /* Unconditional branch (immediate). */
263 I3206_B = 0x14000000,
264 I3206_BL = 0x94000000,
266 /* Unconditional branch (register). */
267 I3207_BR = 0xd61f0000,
268 I3207_BLR = 0xd63f0000,
269 I3207_RET = 0xd65f0000,
271 /* Load literal for loading the address at pc-relative offset */
272 I3305_LDR = 0x58000000,
273 /* Load/store register. Described here as 3.3.12, but the helper
274 that emits them can transform to 3.3.10 or 3.3.13. */
275 I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
276 I3312_STRH = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
277 I3312_STRW = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
278 I3312_STRX = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
280 I3312_LDRB = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
281 I3312_LDRH = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
282 I3312_LDRW = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
283 I3312_LDRX = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
285 I3312_LDRSBW = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
286 I3312_LDRSHW = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
288 I3312_LDRSBX = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
289 I3312_LDRSHX = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
290 I3312_LDRSWX = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
292 I3312_TO_I3310 = 0x00200800,
293 I3312_TO_I3313 = 0x01000000,
295 /* Load/store register pair instructions. */
296 I3314_LDP = 0x28400000,
297 I3314_STP = 0x28000000,
299 /* Add/subtract immediate instructions. */
300 I3401_ADDI = 0x11000000,
301 I3401_ADDSI = 0x31000000,
302 I3401_SUBI = 0x51000000,
303 I3401_SUBSI = 0x71000000,
305 /* Bitfield instructions. */
306 I3402_BFM = 0x33000000,
307 I3402_SBFM = 0x13000000,
308 I3402_UBFM = 0x53000000,
310 /* Extract instruction. */
311 I3403_EXTR = 0x13800000,
313 /* Logical immediate instructions. */
314 I3404_ANDI = 0x12000000,
315 I3404_ORRI = 0x32000000,
316 I3404_EORI = 0x52000000,
318 /* Move wide immediate instructions. */
319 I3405_MOVN = 0x12800000,
320 I3405_MOVZ = 0x52800000,
321 I3405_MOVK = 0x72800000,
323 /* PC relative addressing instructions. */
324 I3406_ADR = 0x10000000,
325 I3406_ADRP = 0x90000000,
327 /* Add/subtract shifted register instructions (without a shift). */
328 I3502_ADD = 0x0b000000,
329 I3502_ADDS = 0x2b000000,
330 I3502_SUB = 0x4b000000,
331 I3502_SUBS = 0x6b000000,
333 /* Add/subtract shifted register instructions (with a shift). */
334 I3502S_ADD_LSL = I3502_ADD,
336 /* Add/subtract with carry instructions. */
337 I3503_ADC = 0x1a000000,
338 I3503_SBC = 0x5a000000,
340 /* Conditional select instructions. */
341 I3506_CSEL = 0x1a800000,
342 I3506_CSINC = 0x1a800400,
343 I3506_CSINV = 0x5a800000,
344 I3506_CSNEG = 0x5a800400,
346 /* Data-processing (1 source) instructions. */
347 I3507_CLZ = 0x5ac01000,
348 I3507_RBIT = 0x5ac00000,
349 I3507_REV16 = 0x5ac00400,
350 I3507_REV32 = 0x5ac00800,
351 I3507_REV64 = 0x5ac00c00,
353 /* Data-processing (2 source) instructions. */
354 I3508_LSLV = 0x1ac02000,
355 I3508_LSRV = 0x1ac02400,
356 I3508_ASRV = 0x1ac02800,
357 I3508_RORV = 0x1ac02c00,
358 I3508_SMULH = 0x9b407c00,
359 I3508_UMULH = 0x9bc07c00,
360 I3508_UDIV = 0x1ac00800,
361 I3508_SDIV = 0x1ac00c00,
363 /* Data-processing (3 source) instructions. */
364 I3509_MADD = 0x1b000000,
365 I3509_MSUB = 0x1b008000,
367 /* Logical shifted register instructions (without a shift). */
368 I3510_AND = 0x0a000000,
369 I3510_BIC = 0x0a200000,
370 I3510_ORR = 0x2a000000,
371 I3510_ORN = 0x2a200000,
372 I3510_EOR = 0x4a000000,
373 I3510_EON = 0x4a200000,
374 I3510_ANDS = 0x6a000000,
377 /* System instructions. */
378 DMB_ISH = 0xd50338bf,
383 static inline uint32_t tcg_in32(TCGContext *s)
385 uint32_t v = *(uint32_t *)s->code_ptr;
389 /* Emit an opcode with "type-checking" of the format. */
390 #define tcg_out_insn(S, FMT, OP, ...) \
391 glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
393 static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
395 tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
398 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
399 TCGReg rt, int imm19)
401 tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
404 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
405 TCGCond c, int imm19)
407 tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
410 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
412 tcg_out32(s, insn | (imm26 & 0x03ffffff));
415 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
417 tcg_out32(s, insn | rn << 5);
420 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
421 TCGReg r1, TCGReg r2, TCGReg rn,
422 tcg_target_long ofs, bool pre, bool w)
424 insn |= 1u << 31; /* ext */
428 tcg_debug_assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
429 insn |= (ofs & (0x7f << 3)) << (15 - 3);
431 tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
434 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
435 TCGReg rd, TCGReg rn, uint64_t aimm)
438 tcg_debug_assert((aimm & 0xfff) == 0);
440 tcg_debug_assert(aimm <= 0xfff);
441 aimm |= 1 << 12; /* apply LSL 12 */
443 tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
446 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
447 (Logical immediate). Both insn groups have N, IMMR and IMMS fields
448 that feed the DecodeBitMasks pseudo function. */
449 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
450 TCGReg rd, TCGReg rn, int n, int immr, int imms)
452 tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
456 #define tcg_out_insn_3404 tcg_out_insn_3402
458 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
459 TCGReg rd, TCGReg rn, TCGReg rm, int imms)
461 tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
465 /* This function is used for the Move (wide immediate) instruction group.
466 Note that SHIFT is a full shift count, not the 2 bit HW field. */
467 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
468 TCGReg rd, uint16_t half, unsigned shift)
470 tcg_debug_assert((shift & ~0x30) == 0);
471 tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
474 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
475 TCGReg rd, int64_t disp)
477 tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
480 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
481 the rare occasion when we actually want to supply a shift amount. */
482 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
483 TCGType ext, TCGReg rd, TCGReg rn,
486 tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
489 /* This function is for 3.5.2 (Add/subtract shifted register),
490 and 3.5.10 (Logical shifted register), for the vast majorty of cases
491 when we don't want to apply a shift. Thus it can also be used for
492 3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source). */
493 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
494 TCGReg rd, TCGReg rn, TCGReg rm)
496 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
499 #define tcg_out_insn_3503 tcg_out_insn_3502
500 #define tcg_out_insn_3508 tcg_out_insn_3502
501 #define tcg_out_insn_3510 tcg_out_insn_3502
503 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
504 TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
506 tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
507 | tcg_cond_to_aarch64[c] << 12);
510 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
511 TCGReg rd, TCGReg rn)
513 tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
516 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
517 TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
519 tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
522 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
523 TCGReg rd, TCGReg base, TCGType ext,
526 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
527 tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 |
528 0x4000 | ext << 13 | base << 5 | rd);
531 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
532 TCGReg rd, TCGReg rn, intptr_t offset)
534 tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
537 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
538 TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
540 /* Note the AArch64Insn constants above are for C3.3.12. Adjust. */
541 tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
544 /* Register to register move using ORR (shifted register with no shift). */
545 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
547 tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
550 /* Register to register move using ADDI (move to/from SP). */
551 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
553 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
556 /* This function is used for the Logical (immediate) instruction group.
557 The value of LIMM must satisfy IS_LIMM. See the comment above about
558 only supporting simplified logical immediates. */
559 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
560 TCGReg rd, TCGReg rn, uint64_t limm)
564 tcg_debug_assert(is_limm(limm));
569 r = 0; /* form 0....01....1 */
570 c = ctz64(~limm) - 1;
572 r = clz64(~limm); /* form 1..10..01..1 */
576 r = 64 - l; /* form 1....10....0 or 0..01..10..0 */
579 if (ext == TCG_TYPE_I32) {
584 tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
587 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
588 tcg_target_long value)
590 int i, wantinv, shift;
591 tcg_target_long svalue = value;
592 tcg_target_long ivalue = ~value;
594 /* For 32-bit values, discard potential garbage in value. For 64-bit
595 values within [2**31, 2**32-1], we can create smaller sequences by
596 interpreting this as a negative 32-bit number, while ensuring that
597 the high 32 bits are cleared by setting SF=0. */
598 if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
599 svalue = (int32_t)value;
600 value = (uint32_t)value;
601 ivalue = (uint32_t)ivalue;
605 /* Speed things up by handling the common case of small positive
606 and negative values specially. */
607 if ((value & ~0xffffull) == 0) {
608 tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
610 } else if ((ivalue & ~0xffffull) == 0) {
611 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
615 /* Check for bitfield immediates. For the benefit of 32-bit quantities,
616 use the sign-extended value. That lets us match rotated values such
617 as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
618 if (is_limm(svalue)) {
619 tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
623 /* Look for host pointer values within 4G of the PC. This happens
624 often when loading pointers to QEMU's own data structures. */
625 if (type == TCG_TYPE_I64) {
626 tcg_target_long disp = value - (intptr_t)s->code_ptr;
627 if (disp == sextract64(disp, 0, 21)) {
628 tcg_out_insn(s, 3406, ADR, rd, disp);
631 disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
632 if (disp == sextract64(disp, 0, 21)) {
633 tcg_out_insn(s, 3406, ADRP, rd, disp);
635 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
641 /* Would it take fewer insns to begin with MOVN? For the value and its
642 inverse, count the number of 16-bit lanes that are 0. */
643 for (i = wantinv = 0; i < 64; i += 16) {
644 tcg_target_long mask = 0xffffull << i;
645 wantinv -= ((value & mask) == 0);
646 wantinv += ((ivalue & mask) == 0);
650 /* Find the lowest lane that is not 0x0000. */
651 shift = ctz64(value) & (63 & -16);
652 tcg_out_insn(s, 3405, MOVZ, type, rd, value >> shift, shift);
653 /* Clear out the lane that we just set. */
654 value &= ~(0xffffUL << shift);
655 /* Iterate until all non-zero lanes have been processed. */
657 shift = ctz64(value) & (63 & -16);
658 tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
659 value &= ~(0xffffUL << shift);
662 /* Like above, but with the inverted value and MOVN to start. */
663 shift = ctz64(ivalue) & (63 & -16);
664 tcg_out_insn(s, 3405, MOVN, type, rd, ivalue >> shift, shift);
665 ivalue &= ~(0xffffUL << shift);
667 shift = ctz64(ivalue) & (63 & -16);
668 /* Provide MOVK with the non-inverted value. */
669 tcg_out_insn(s, 3405, MOVK, type, rd, ~(ivalue >> shift), shift);
670 ivalue &= ~(0xffffUL << shift);
675 /* Define something more legible for general use. */
676 #define tcg_out_ldst_r tcg_out_insn_3310
678 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
679 TCGReg rd, TCGReg rn, intptr_t offset)
681 TCGMemOp size = (uint32_t)insn >> 30;
683 /* If the offset is naturally aligned and in range, then we can
684 use the scaled uimm12 encoding */
685 if (offset >= 0 && !(offset & ((1 << size) - 1))) {
686 uintptr_t scaled_uimm = offset >> size;
687 if (scaled_uimm <= 0xfff) {
688 tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
693 /* Small signed offsets can use the unscaled encoding. */
694 if (offset >= -256 && offset < 256) {
695 tcg_out_insn_3312(s, insn, rd, rn, offset);
699 /* Worst-case scenario, move offset to temp register, use reg offset. */
700 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
701 tcg_out_ldst_r(s, insn, rd, rn, TCG_TYPE_I64, TCG_REG_TMP);
704 static inline void tcg_out_mov(TCGContext *s,
705 TCGType type, TCGReg ret, TCGReg arg)
708 tcg_out_movr(s, type, ret, arg);
712 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
713 TCGReg arg1, intptr_t arg2)
715 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
719 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
720 TCGReg arg1, intptr_t arg2)
722 tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
726 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
727 TCGReg base, intptr_t ofs)
730 tcg_out_st(s, type, TCG_REG_XZR, base, ofs);
736 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
737 TCGReg rn, unsigned int a, unsigned int b)
739 tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
742 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
743 TCGReg rn, unsigned int a, unsigned int b)
745 tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
748 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
749 TCGReg rn, unsigned int a, unsigned int b)
751 tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
754 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
755 TCGReg rn, TCGReg rm, unsigned int a)
757 tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
760 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
761 TCGReg rd, TCGReg rn, unsigned int m)
763 int bits = ext ? 64 : 32;
765 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
768 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
769 TCGReg rd, TCGReg rn, unsigned int m)
771 int max = ext ? 63 : 31;
772 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
775 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
776 TCGReg rd, TCGReg rn, unsigned int m)
778 int max = ext ? 63 : 31;
779 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
782 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
783 TCGReg rd, TCGReg rn, unsigned int m)
785 int max = ext ? 63 : 31;
786 tcg_out_extr(s, ext, rd, rn, rn, m & max);
789 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
790 TCGReg rd, TCGReg rn, unsigned int m)
792 int bits = ext ? 64 : 32;
794 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
797 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
798 TCGReg rn, unsigned lsb, unsigned width)
800 unsigned size = ext ? 64 : 32;
801 unsigned a = (size - lsb) & (size - 1);
802 unsigned b = width - 1;
803 tcg_out_bfm(s, ext, rd, rn, a, b);
806 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
807 tcg_target_long b, bool const_b)
810 /* Using CMP or CMN aliases. */
812 tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
814 tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
817 /* Using CMP alias SUBS wzr, Wn, Wm */
818 tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
822 static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
824 ptrdiff_t offset = target - s->code_ptr;
825 tcg_debug_assert(offset == sextract64(offset, 0, 26));
826 tcg_out_insn(s, 3206, B, offset);
829 static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
831 ptrdiff_t offset = target - s->code_ptr;
832 if (offset == sextract64(offset, 0, 26)) {
833 tcg_out_insn(s, 3206, BL, offset);
835 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
836 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
840 static inline void tcg_out_goto_noaddr(TCGContext *s)
842 /* We pay attention here to not modify the branch target by reading from
843 the buffer. This ensure that caches and memory are kept coherent during
844 retranslation. Mask away possible garbage in the high bits for the
845 first translation, while keeping the offset bits for retranslation. */
846 uint32_t old = tcg_in32(s);
847 tcg_out_insn(s, 3206, B, old);
850 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
852 /* See comments in tcg_out_goto_noaddr. */
853 uint32_t old = tcg_in32(s) >> 5;
854 tcg_out_insn(s, 3202, B_C, c, old);
857 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
859 tcg_out_insn(s, 3207, BLR, reg);
862 static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
864 ptrdiff_t offset = target - s->code_ptr;
865 if (offset == sextract64(offset, 0, 26)) {
866 tcg_out_insn(s, 3206, BL, offset);
868 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
869 tcg_out_callr(s, TCG_REG_TMP);
873 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
876 tcg_insn_unit i1, i2;
877 TCGType rt = TCG_TYPE_I64;
878 TCGReg rd = TCG_REG_TMP;
881 ptrdiff_t offset = addr - jmp_addr;
883 if (offset == sextract64(offset, 0, 26)) {
884 i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
887 offset = (addr >> 12) - (jmp_addr >> 12);
890 i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
892 i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
894 pair = (uint64_t)i2 << 32 | i1;
895 atomic_set((uint64_t *)jmp_addr, pair);
896 flush_icache_range(jmp_addr, jmp_addr + 8);
899 static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
902 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, l, 0);
903 tcg_out_goto_noaddr(s);
905 tcg_out_goto(s, l->u.value_ptr);
909 static void tcg_out_brcond(TCGContext *s, TCGType ext, TCGCond c, TCGArg a,
910 TCGArg b, bool b_const, TCGLabel *l)
915 if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
919 tcg_out_cmp(s, ext, a, b, b_const);
923 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, l, 0);
924 offset = tcg_in32(s) >> 5;
926 offset = l->u.value_ptr - s->code_ptr;
927 tcg_debug_assert(offset == sextract64(offset, 0, 19));
931 tcg_out_insn(s, 3202, B_C, c, offset);
932 } else if (c == TCG_COND_EQ) {
933 tcg_out_insn(s, 3201, CBZ, ext, a, offset);
935 tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
939 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
941 tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
944 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
946 tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
949 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
951 tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
954 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
955 TCGReg rd, TCGReg rn)
957 /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
958 int bits = (8 << s_bits) - 1;
959 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
962 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
963 TCGReg rd, TCGReg rn)
965 /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
966 int bits = (8 << s_bits) - 1;
967 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
970 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
971 TCGReg rn, int64_t aimm)
974 tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
976 tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
980 static inline void tcg_out_addsub2(TCGContext *s, TCGType ext, TCGReg rl,
981 TCGReg rh, TCGReg al, TCGReg ah,
982 tcg_target_long bl, tcg_target_long bh,
983 bool const_bl, bool const_bh, bool sub)
988 if (rl == ah || (!const_bh && rl == bh)) {
994 if ((bl < 0) ^ sub) {
998 if (unlikely(al == TCG_REG_XZR)) {
999 /* ??? We want to allow al to be zero for the benefit of
1000 negation via subtraction. However, that leaves open the
1001 possibility of adding 0+const in the low part, and the
1002 immediate add instructions encode XSP not XZR. Don't try
1003 anything more elaborate here than loading another zero. */
1005 tcg_out_movi(s, ext, al, 0);
1007 tcg_out_insn_3401(s, insn, ext, rl, al, bl);
1009 tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
1014 /* Note that the only two constants we support are 0 and -1, and
1015 that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa. */
1016 if ((bh != 0) ^ sub) {
1023 tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
1025 tcg_out_mov(s, ext, orig_rl, rl);
1028 static inline void tcg_out_mb(TCGContext *s, TCGArg a0)
1030 static const uint32_t sync[] = {
1031 [0 ... TCG_MO_ALL] = DMB_ISH | DMB_LD | DMB_ST,
1032 [TCG_MO_ST_ST] = DMB_ISH | DMB_ST,
1033 [TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1034 [TCG_MO_LD_ST] = DMB_ISH | DMB_LD,
1035 [TCG_MO_LD_ST | TCG_MO_LD_LD] = DMB_ISH | DMB_LD,
1037 tcg_out32(s, sync[a0 & TCG_MO_ALL]);
1040 static void tcg_out_cltz(TCGContext *s, TCGType ext, TCGReg d,
1041 TCGReg a0, TCGArg b, bool const_b, bool is_ctz)
1046 tcg_out_insn(s, 3507, RBIT, ext, a1, a0);
1048 if (const_b && b == (ext ? 64 : 32)) {
1049 tcg_out_insn(s, 3507, CLZ, ext, d, a1);
1051 AArch64Insn sel = I3506_CSEL;
1053 tcg_out_cmp(s, ext, a0, 0, 1);
1054 tcg_out_insn(s, 3507, CLZ, ext, TCG_REG_TMP, a1);
1060 } else if (b == 0) {
1063 tcg_out_movi(s, ext, d, b);
1067 tcg_out_insn_3506(s, sel, ext, d, TCG_REG_TMP, b, TCG_COND_NE);
1071 #ifdef CONFIG_SOFTMMU
1072 #include "tcg-ldst.inc.c"
1074 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
1075 * TCGMemOpIdx oi, uintptr_t ra)
1077 static void * const qemu_ld_helpers[16] = {
1078 [MO_UB] = helper_ret_ldub_mmu,
1079 [MO_LEUW] = helper_le_lduw_mmu,
1080 [MO_LEUL] = helper_le_ldul_mmu,
1081 [MO_LEQ] = helper_le_ldq_mmu,
1082 [MO_BEUW] = helper_be_lduw_mmu,
1083 [MO_BEUL] = helper_be_ldul_mmu,
1084 [MO_BEQ] = helper_be_ldq_mmu,
1087 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1088 * uintxx_t val, TCGMemOpIdx oi,
1091 static void * const qemu_st_helpers[16] = {
1092 [MO_UB] = helper_ret_stb_mmu,
1093 [MO_LEUW] = helper_le_stw_mmu,
1094 [MO_LEUL] = helper_le_stl_mmu,
1095 [MO_LEQ] = helper_le_stq_mmu,
1096 [MO_BEUW] = helper_be_stw_mmu,
1097 [MO_BEUL] = helper_be_stl_mmu,
1098 [MO_BEQ] = helper_be_stq_mmu,
1101 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, void *target)
1103 ptrdiff_t offset = tcg_pcrel_diff(s, target);
1104 tcg_debug_assert(offset == sextract64(offset, 0, 21));
1105 tcg_out_insn(s, 3406, ADR, rd, offset);
1108 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1110 TCGMemOpIdx oi = lb->oi;
1111 TCGMemOp opc = get_memop(oi);
1112 TCGMemOp size = opc & MO_SIZE;
1114 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1116 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1117 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1118 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
1119 tcg_out_adr(s, TCG_REG_X3, lb->raddr);
1120 tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1121 if (opc & MO_SIGN) {
1122 tcg_out_sxt(s, lb->type, size, lb->datalo_reg, TCG_REG_X0);
1124 tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1127 tcg_out_goto(s, lb->raddr);
1130 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1132 TCGMemOpIdx oi = lb->oi;
1133 TCGMemOp opc = get_memop(oi);
1134 TCGMemOp size = opc & MO_SIZE;
1136 reloc_pc19(lb->label_ptr[0], s->code_ptr);
1138 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
1139 tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1140 tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1141 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
1142 tcg_out_adr(s, TCG_REG_X4, lb->raddr);
1143 tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1144 tcg_out_goto(s, lb->raddr);
1147 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
1148 TCGType ext, TCGReg data_reg, TCGReg addr_reg,
1149 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1151 TCGLabelQemuLdst *label = new_ldst_label(s);
1153 label->is_ld = is_ld;
1156 label->datalo_reg = data_reg;
1157 label->addrlo_reg = addr_reg;
1158 label->raddr = raddr;
1159 label->label_ptr[0] = label_ptr;
1162 /* Load and compare a TLB entry, emitting the conditional jump to the
1163 slow path for the failure case, which will be patched later when finalizing
1164 the slow path. Generated code returns the host addend in X1,
1165 clobbers X0,X2,X3,TMP. */
1166 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
1167 tcg_insn_unit **label_ptr, int mem_index,
1170 int tlb_offset = is_read ?
1171 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1172 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1173 unsigned a_bits = get_alignment_bits(opc);
1174 unsigned s_bits = opc & MO_SIZE;
1175 unsigned a_mask = (1u << a_bits) - 1;
1176 unsigned s_mask = (1u << s_bits) - 1;
1177 TCGReg base = TCG_AREG0, x3;
1180 /* For aligned accesses, we check the first byte and include the alignment
1181 bits within the address. For unaligned access, we check that we don't
1182 cross pages using the address of the last byte of the access. */
1183 if (a_bits >= s_bits) {
1186 tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
1187 TCG_REG_X3, addr_reg, s_mask - a_mask);
1190 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1192 /* Extract the TLB index from the address into X0.
1193 X0<CPU_TLB_BITS:0> =
1194 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1195 tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1196 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1198 /* Store the page mask part of the address into X3. */
1199 tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64,
1200 TCG_REG_X3, x3, tlb_mask);
1202 /* Add any "high bits" from the tlb offset to the env address into X2,
1203 to take advantage of the LSL12 form of the ADDI instruction.
1204 X2 = env + (tlb_offset & 0xfff000) */
1205 if (tlb_offset & 0xfff000) {
1206 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1207 tlb_offset & 0xfff000);
1211 /* Merge the tlb index contribution into X2.
1212 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1213 tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1214 TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1216 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1217 X0 = load [X2 + (tlb_offset & 0x000fff)] */
1218 tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1219 TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1221 /* Load the tlb addend. Do that early to avoid stalling.
1222 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1223 tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1224 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1225 (is_read ? offsetof(CPUTLBEntry, addr_read)
1226 : offsetof(CPUTLBEntry, addr_write)));
1228 /* Perform the address comparison. */
1229 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1231 /* If not equal, we jump to the slow path. */
1232 *label_ptr = s->code_ptr;
1233 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1236 #endif /* CONFIG_SOFTMMU */
1238 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop, TCGType ext,
1239 TCGReg data_r, TCGReg addr_r,
1240 TCGType otype, TCGReg off_r)
1242 const TCGMemOp bswap = memop & MO_BSWAP;
1244 switch (memop & MO_SSIZE) {
1246 tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, otype, off_r);
1249 tcg_out_ldst_r(s, ext ? I3312_LDRSBX : I3312_LDRSBW,
1250 data_r, addr_r, otype, off_r);
1253 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1255 tcg_out_rev16(s, data_r, data_r);
1260 tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, otype, off_r);
1261 tcg_out_rev16(s, data_r, data_r);
1262 tcg_out_sxt(s, ext, MO_16, data_r, data_r);
1264 tcg_out_ldst_r(s, (ext ? I3312_LDRSHX : I3312_LDRSHW),
1265 data_r, addr_r, otype, off_r);
1269 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1271 tcg_out_rev32(s, data_r, data_r);
1276 tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, otype, off_r);
1277 tcg_out_rev32(s, data_r, data_r);
1278 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1280 tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, otype, off_r);
1284 tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, otype, off_r);
1286 tcg_out_rev64(s, data_r, data_r);
1294 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1295 TCGReg data_r, TCGReg addr_r,
1296 TCGType otype, TCGReg off_r)
1298 const TCGMemOp bswap = memop & MO_BSWAP;
1300 switch (memop & MO_SIZE) {
1302 tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, otype, off_r);
1305 if (bswap && data_r != TCG_REG_XZR) {
1306 tcg_out_rev16(s, TCG_REG_TMP, data_r);
1307 data_r = TCG_REG_TMP;
1309 tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, otype, off_r);
1312 if (bswap && data_r != TCG_REG_XZR) {
1313 tcg_out_rev32(s, TCG_REG_TMP, data_r);
1314 data_r = TCG_REG_TMP;
1316 tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, otype, off_r);
1319 if (bswap && data_r != TCG_REG_XZR) {
1320 tcg_out_rev64(s, TCG_REG_TMP, data_r);
1321 data_r = TCG_REG_TMP;
1323 tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, otype, off_r);
1330 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1331 TCGMemOpIdx oi, TCGType ext)
1333 TCGMemOp memop = get_memop(oi);
1334 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1335 #ifdef CONFIG_SOFTMMU
1336 unsigned mem_index = get_mmuidx(oi);
1337 tcg_insn_unit *label_ptr;
1339 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 1);
1340 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1341 TCG_REG_X1, otype, addr_reg);
1342 add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
1343 s->code_ptr, label_ptr);
1344 #else /* !CONFIG_SOFTMMU */
1345 if (USE_GUEST_BASE) {
1346 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1347 TCG_REG_GUEST_BASE, otype, addr_reg);
1349 tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
1350 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1352 #endif /* CONFIG_SOFTMMU */
1355 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1358 TCGMemOp memop = get_memop(oi);
1359 const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
1360 #ifdef CONFIG_SOFTMMU
1361 unsigned mem_index = get_mmuidx(oi);
1362 tcg_insn_unit *label_ptr;
1364 tcg_out_tlb_read(s, addr_reg, memop, &label_ptr, mem_index, 0);
1365 tcg_out_qemu_st_direct(s, memop, data_reg,
1366 TCG_REG_X1, otype, addr_reg);
1367 add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
1368 data_reg, addr_reg, s->code_ptr, label_ptr);
1369 #else /* !CONFIG_SOFTMMU */
1370 if (USE_GUEST_BASE) {
1371 tcg_out_qemu_st_direct(s, memop, data_reg,
1372 TCG_REG_GUEST_BASE, otype, addr_reg);
1374 tcg_out_qemu_st_direct(s, memop, data_reg,
1375 addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
1377 #endif /* CONFIG_SOFTMMU */
1380 static tcg_insn_unit *tb_ret_addr;
1382 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1383 const TCGArg args[TCG_MAX_OP_ARGS],
1384 const int const_args[TCG_MAX_OP_ARGS])
1386 /* 99% of the time, we can signal the use of extension registers
1387 by looking to see if the opcode handles 64-bit data. */
1388 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1390 /* Hoist the loads of the most common arguments. */
1391 TCGArg a0 = args[0];
1392 TCGArg a1 = args[1];
1393 TCGArg a2 = args[2];
1394 int c2 = const_args[2];
1396 /* Some operands are defined with "rZ" constraint, a register or
1397 the zero register. These need not actually test args[I] == 0. */
1398 #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1401 case INDEX_op_exit_tb:
1402 /* Reuse the zeroing that exists for goto_ptr. */
1404 tcg_out_goto_long(s, s->code_gen_epilogue);
1406 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1407 tcg_out_goto_long(s, tb_ret_addr);
1411 case INDEX_op_goto_tb:
1412 if (s->tb_jmp_insn_offset != NULL) {
1413 /* TCG_TARGET_HAS_direct_jump */
1414 /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
1415 write can be used to patch the target address. */
1416 if ((uintptr_t)s->code_ptr & 7) {
1419 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1420 /* actual branch destination will be patched by
1421 tb_target_set_jmp_target later. */
1422 tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
1423 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
1425 /* !TCG_TARGET_HAS_direct_jump */
1426 tcg_debug_assert(s->tb_jmp_target_addr != NULL);
1427 intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
1428 tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
1430 tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
1431 s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
1434 case INDEX_op_goto_ptr:
1435 tcg_out_insn(s, 3207, BR, a0);
1439 tcg_out_goto_label(s, arg_label(a0));
1442 case INDEX_op_ld8u_i32:
1443 case INDEX_op_ld8u_i64:
1444 tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1446 case INDEX_op_ld8s_i32:
1447 tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1449 case INDEX_op_ld8s_i64:
1450 tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1452 case INDEX_op_ld16u_i32:
1453 case INDEX_op_ld16u_i64:
1454 tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1456 case INDEX_op_ld16s_i32:
1457 tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1459 case INDEX_op_ld16s_i64:
1460 tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1462 case INDEX_op_ld_i32:
1463 case INDEX_op_ld32u_i64:
1464 tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1466 case INDEX_op_ld32s_i64:
1467 tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1469 case INDEX_op_ld_i64:
1470 tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1473 case INDEX_op_st8_i32:
1474 case INDEX_op_st8_i64:
1475 tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1477 case INDEX_op_st16_i32:
1478 case INDEX_op_st16_i64:
1479 tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1481 case INDEX_op_st_i32:
1482 case INDEX_op_st32_i64:
1483 tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1485 case INDEX_op_st_i64:
1486 tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1489 case INDEX_op_add_i32:
1492 case INDEX_op_add_i64:
1494 tcg_out_addsubi(s, ext, a0, a1, a2);
1496 tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1500 case INDEX_op_sub_i32:
1503 case INDEX_op_sub_i64:
1505 tcg_out_addsubi(s, ext, a0, a1, -a2);
1507 tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1511 case INDEX_op_neg_i64:
1512 case INDEX_op_neg_i32:
1513 tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1516 case INDEX_op_and_i32:
1519 case INDEX_op_and_i64:
1521 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1523 tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1527 case INDEX_op_andc_i32:
1530 case INDEX_op_andc_i64:
1532 tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1534 tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1538 case INDEX_op_or_i32:
1541 case INDEX_op_or_i64:
1543 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1545 tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1549 case INDEX_op_orc_i32:
1552 case INDEX_op_orc_i64:
1554 tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1556 tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1560 case INDEX_op_xor_i32:
1563 case INDEX_op_xor_i64:
1565 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1567 tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1571 case INDEX_op_eqv_i32:
1574 case INDEX_op_eqv_i64:
1576 tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1578 tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1582 case INDEX_op_not_i64:
1583 case INDEX_op_not_i32:
1584 tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1587 case INDEX_op_mul_i64:
1588 case INDEX_op_mul_i32:
1589 tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1592 case INDEX_op_div_i64:
1593 case INDEX_op_div_i32:
1594 tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1596 case INDEX_op_divu_i64:
1597 case INDEX_op_divu_i32:
1598 tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1601 case INDEX_op_rem_i64:
1602 case INDEX_op_rem_i32:
1603 tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1604 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1606 case INDEX_op_remu_i64:
1607 case INDEX_op_remu_i32:
1608 tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1609 tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1612 case INDEX_op_shl_i64:
1613 case INDEX_op_shl_i32:
1615 tcg_out_shl(s, ext, a0, a1, a2);
1617 tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1621 case INDEX_op_shr_i64:
1622 case INDEX_op_shr_i32:
1624 tcg_out_shr(s, ext, a0, a1, a2);
1626 tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1630 case INDEX_op_sar_i64:
1631 case INDEX_op_sar_i32:
1633 tcg_out_sar(s, ext, a0, a1, a2);
1635 tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1639 case INDEX_op_rotr_i64:
1640 case INDEX_op_rotr_i32:
1642 tcg_out_rotr(s, ext, a0, a1, a2);
1644 tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1648 case INDEX_op_rotl_i64:
1649 case INDEX_op_rotl_i32:
1651 tcg_out_rotl(s, ext, a0, a1, a2);
1653 tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1654 tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1658 case INDEX_op_clz_i64:
1659 case INDEX_op_clz_i32:
1660 tcg_out_cltz(s, ext, a0, a1, a2, c2, false);
1662 case INDEX_op_ctz_i64:
1663 case INDEX_op_ctz_i32:
1664 tcg_out_cltz(s, ext, a0, a1, a2, c2, true);
1667 case INDEX_op_brcond_i32:
1670 case INDEX_op_brcond_i64:
1671 tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], arg_label(args[3]));
1674 case INDEX_op_setcond_i32:
1677 case INDEX_op_setcond_i64:
1678 tcg_out_cmp(s, ext, a1, a2, c2);
1679 /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond). */
1680 tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1681 TCG_REG_XZR, tcg_invert_cond(args[3]));
1684 case INDEX_op_movcond_i32:
1687 case INDEX_op_movcond_i64:
1688 tcg_out_cmp(s, ext, a1, a2, c2);
1689 tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1692 case INDEX_op_qemu_ld_i32:
1693 case INDEX_op_qemu_ld_i64:
1694 tcg_out_qemu_ld(s, a0, a1, a2, ext);
1696 case INDEX_op_qemu_st_i32:
1697 case INDEX_op_qemu_st_i64:
1698 tcg_out_qemu_st(s, REG0(0), a1, a2);
1701 case INDEX_op_bswap64_i64:
1702 tcg_out_rev64(s, a0, a1);
1704 case INDEX_op_bswap32_i64:
1705 case INDEX_op_bswap32_i32:
1706 tcg_out_rev32(s, a0, a1);
1708 case INDEX_op_bswap16_i64:
1709 case INDEX_op_bswap16_i32:
1710 tcg_out_rev16(s, a0, a1);
1713 case INDEX_op_ext8s_i64:
1714 case INDEX_op_ext8s_i32:
1715 tcg_out_sxt(s, ext, MO_8, a0, a1);
1717 case INDEX_op_ext16s_i64:
1718 case INDEX_op_ext16s_i32:
1719 tcg_out_sxt(s, ext, MO_16, a0, a1);
1721 case INDEX_op_ext_i32_i64:
1722 case INDEX_op_ext32s_i64:
1723 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1725 case INDEX_op_ext8u_i64:
1726 case INDEX_op_ext8u_i32:
1727 tcg_out_uxt(s, MO_8, a0, a1);
1729 case INDEX_op_ext16u_i64:
1730 case INDEX_op_ext16u_i32:
1731 tcg_out_uxt(s, MO_16, a0, a1);
1733 case INDEX_op_extu_i32_i64:
1734 case INDEX_op_ext32u_i64:
1735 tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1738 case INDEX_op_deposit_i64:
1739 case INDEX_op_deposit_i32:
1740 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1743 case INDEX_op_extract_i64:
1744 case INDEX_op_extract_i32:
1745 tcg_out_ubfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1748 case INDEX_op_sextract_i64:
1749 case INDEX_op_sextract_i32:
1750 tcg_out_sbfm(s, ext, a0, a1, a2, a2 + args[3] - 1);
1753 case INDEX_op_add2_i32:
1754 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1755 (int32_t)args[4], args[5], const_args[4],
1756 const_args[5], false);
1758 case INDEX_op_add2_i64:
1759 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1760 args[5], const_args[4], const_args[5], false);
1762 case INDEX_op_sub2_i32:
1763 tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1764 (int32_t)args[4], args[5], const_args[4],
1765 const_args[5], true);
1767 case INDEX_op_sub2_i64:
1768 tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1769 args[5], const_args[4], const_args[5], true);
1772 case INDEX_op_muluh_i64:
1773 tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1775 case INDEX_op_mulsh_i64:
1776 tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1783 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
1784 case INDEX_op_mov_i64:
1785 case INDEX_op_movi_i32: /* Always emitted via tcg_out_movi. */
1786 case INDEX_op_movi_i64:
1787 case INDEX_op_call: /* Always emitted via tcg_out_call. */
1795 static const TCGTargetOpDef aarch64_op_defs[] = {
1796 { INDEX_op_exit_tb, { } },
1797 { INDEX_op_goto_tb, { } },
1798 { INDEX_op_br, { } },
1799 { INDEX_op_goto_ptr, { "r" } },
1801 { INDEX_op_ld8u_i32, { "r", "r" } },
1802 { INDEX_op_ld8s_i32, { "r", "r" } },
1803 { INDEX_op_ld16u_i32, { "r", "r" } },
1804 { INDEX_op_ld16s_i32, { "r", "r" } },
1805 { INDEX_op_ld_i32, { "r", "r" } },
1806 { INDEX_op_ld8u_i64, { "r", "r" } },
1807 { INDEX_op_ld8s_i64, { "r", "r" } },
1808 { INDEX_op_ld16u_i64, { "r", "r" } },
1809 { INDEX_op_ld16s_i64, { "r", "r" } },
1810 { INDEX_op_ld32u_i64, { "r", "r" } },
1811 { INDEX_op_ld32s_i64, { "r", "r" } },
1812 { INDEX_op_ld_i64, { "r", "r" } },
1814 { INDEX_op_st8_i32, { "rZ", "r" } },
1815 { INDEX_op_st16_i32, { "rZ", "r" } },
1816 { INDEX_op_st_i32, { "rZ", "r" } },
1817 { INDEX_op_st8_i64, { "rZ", "r" } },
1818 { INDEX_op_st16_i64, { "rZ", "r" } },
1819 { INDEX_op_st32_i64, { "rZ", "r" } },
1820 { INDEX_op_st_i64, { "rZ", "r" } },
1822 { INDEX_op_add_i32, { "r", "r", "rA" } },
1823 { INDEX_op_add_i64, { "r", "r", "rA" } },
1824 { INDEX_op_sub_i32, { "r", "r", "rA" } },
1825 { INDEX_op_sub_i64, { "r", "r", "rA" } },
1826 { INDEX_op_mul_i32, { "r", "r", "r" } },
1827 { INDEX_op_mul_i64, { "r", "r", "r" } },
1828 { INDEX_op_div_i32, { "r", "r", "r" } },
1829 { INDEX_op_div_i64, { "r", "r", "r" } },
1830 { INDEX_op_divu_i32, { "r", "r", "r" } },
1831 { INDEX_op_divu_i64, { "r", "r", "r" } },
1832 { INDEX_op_rem_i32, { "r", "r", "r" } },
1833 { INDEX_op_rem_i64, { "r", "r", "r" } },
1834 { INDEX_op_remu_i32, { "r", "r", "r" } },
1835 { INDEX_op_remu_i64, { "r", "r", "r" } },
1836 { INDEX_op_and_i32, { "r", "r", "rL" } },
1837 { INDEX_op_and_i64, { "r", "r", "rL" } },
1838 { INDEX_op_or_i32, { "r", "r", "rL" } },
1839 { INDEX_op_or_i64, { "r", "r", "rL" } },
1840 { INDEX_op_xor_i32, { "r", "r", "rL" } },
1841 { INDEX_op_xor_i64, { "r", "r", "rL" } },
1842 { INDEX_op_andc_i32, { "r", "r", "rL" } },
1843 { INDEX_op_andc_i64, { "r", "r", "rL" } },
1844 { INDEX_op_orc_i32, { "r", "r", "rL" } },
1845 { INDEX_op_orc_i64, { "r", "r", "rL" } },
1846 { INDEX_op_eqv_i32, { "r", "r", "rL" } },
1847 { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1849 { INDEX_op_neg_i32, { "r", "r" } },
1850 { INDEX_op_neg_i64, { "r", "r" } },
1851 { INDEX_op_not_i32, { "r", "r" } },
1852 { INDEX_op_not_i64, { "r", "r" } },
1854 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1855 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1856 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1857 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1858 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1859 { INDEX_op_clz_i32, { "r", "r", "rAL" } },
1860 { INDEX_op_ctz_i32, { "r", "r", "rAL" } },
1861 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1862 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1863 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1864 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1865 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1866 { INDEX_op_clz_i64, { "r", "r", "rAL" } },
1867 { INDEX_op_ctz_i64, { "r", "r", "rAL" } },
1869 { INDEX_op_brcond_i32, { "r", "rA" } },
1870 { INDEX_op_brcond_i64, { "r", "rA" } },
1871 { INDEX_op_setcond_i32, { "r", "r", "rA" } },
1872 { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1873 { INDEX_op_movcond_i32, { "r", "r", "rA", "rZ", "rZ" } },
1874 { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1876 { INDEX_op_qemu_ld_i32, { "r", "l" } },
1877 { INDEX_op_qemu_ld_i64, { "r", "l" } },
1878 { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1879 { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1881 { INDEX_op_bswap16_i32, { "r", "r" } },
1882 { INDEX_op_bswap32_i32, { "r", "r" } },
1883 { INDEX_op_bswap16_i64, { "r", "r" } },
1884 { INDEX_op_bswap32_i64, { "r", "r" } },
1885 { INDEX_op_bswap64_i64, { "r", "r" } },
1887 { INDEX_op_ext8s_i32, { "r", "r" } },
1888 { INDEX_op_ext16s_i32, { "r", "r" } },
1889 { INDEX_op_ext8u_i32, { "r", "r" } },
1890 { INDEX_op_ext16u_i32, { "r", "r" } },
1892 { INDEX_op_ext8s_i64, { "r", "r" } },
1893 { INDEX_op_ext16s_i64, { "r", "r" } },
1894 { INDEX_op_ext32s_i64, { "r", "r" } },
1895 { INDEX_op_ext8u_i64, { "r", "r" } },
1896 { INDEX_op_ext16u_i64, { "r", "r" } },
1897 { INDEX_op_ext32u_i64, { "r", "r" } },
1898 { INDEX_op_ext_i32_i64, { "r", "r" } },
1899 { INDEX_op_extu_i32_i64, { "r", "r" } },
1901 { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1902 { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1903 { INDEX_op_extract_i32, { "r", "r" } },
1904 { INDEX_op_extract_i64, { "r", "r" } },
1905 { INDEX_op_sextract_i32, { "r", "r" } },
1906 { INDEX_op_sextract_i64, { "r", "r" } },
1908 { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1909 { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1910 { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1911 { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1913 { INDEX_op_muluh_i64, { "r", "r", "r" } },
1914 { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1916 { INDEX_op_mb, { } },
1920 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
1922 int i, n = ARRAY_SIZE(aarch64_op_defs);
1924 for (i = 0; i < n; ++i) {
1925 if (aarch64_op_defs[i].op == op) {
1926 return &aarch64_op_defs[i];
1932 static void tcg_target_init(TCGContext *s)
1934 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1935 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1937 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1938 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1939 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1940 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1941 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1942 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1943 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1944 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1945 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1946 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1947 (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1949 tcg_regset_clear(s->reserved_regs);
1950 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1951 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1952 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1953 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1956 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
1957 #define PUSH_SIZE ((30 - 19 + 1) * 8)
1959 #define FRAME_SIZE \
1961 + TCG_STATIC_CALL_ARGS_SIZE \
1962 + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1963 + TCG_TARGET_STACK_ALIGN - 1) \
1964 & ~(TCG_TARGET_STACK_ALIGN - 1))
1966 /* We're expecting a 2 byte uleb128 encoded value. */
1967 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1969 /* We're expecting to use a single ADDI insn. */
1970 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1972 static void tcg_target_qemu_prologue(TCGContext *s)
1976 /* Push (FP, LR) and allocate space for all saved registers. */
1977 tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1978 TCG_REG_SP, -PUSH_SIZE, 1, 1);
1980 /* Set up frame pointer for canonical unwinding. */
1981 tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1983 /* Store callee-preserved regs x19..x28. */
1984 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1985 int ofs = (r - TCG_REG_X19 + 2) * 8;
1986 tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1989 /* Make stack space for TCG locals. */
1990 tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1991 FRAME_SIZE - PUSH_SIZE);
1993 /* Inform TCG about how to find TCG locals with register, offset, size. */
1994 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1995 CPU_TEMP_BUF_NLONGS * sizeof(long));
1997 #if !defined(CONFIG_SOFTMMU)
1998 if (USE_GUEST_BASE) {
1999 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, guest_base);
2000 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
2004 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
2005 tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
2008 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
2009 * and fall through to the rest of the epilogue.
2011 s->code_gen_epilogue = s->code_ptr;
2012 tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_X0, 0);
2015 tb_ret_addr = s->code_ptr;
2017 /* Remove TCG locals stack space. */
2018 tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
2019 FRAME_SIZE - PUSH_SIZE);
2021 /* Restore registers x19..x28. */
2022 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
2023 int ofs = (r - TCG_REG_X19 + 2) * 8;
2024 tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
2027 /* Pop (FP, LR), restore SP to previous frame. */
2028 tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
2029 TCG_REG_SP, PUSH_SIZE, 0, 1);
2030 tcg_out_insn(s, 3207, RET, TCG_REG_LR);
2035 uint8_t fde_def_cfa[4];
2036 uint8_t fde_reg_ofs[24];
2039 #define ELF_HOST_MACHINE EM_AARCH64
2041 static const DebugFrame debug_frame = {
2042 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
2045 .h.cie.code_align = 1,
2046 .h.cie.data_align = 0x78, /* sleb128 -8 */
2047 .h.cie.return_column = TCG_REG_LR,
2049 /* Total FDE size does not include the "len" member. */
2050 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
2053 12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ... */
2054 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
2058 0x80 + 28, 1, /* DW_CFA_offset, x28, -8 */
2059 0x80 + 27, 2, /* DW_CFA_offset, x27, -16 */
2060 0x80 + 26, 3, /* DW_CFA_offset, x26, -24 */
2061 0x80 + 25, 4, /* DW_CFA_offset, x25, -32 */
2062 0x80 + 24, 5, /* DW_CFA_offset, x24, -40 */
2063 0x80 + 23, 6, /* DW_CFA_offset, x23, -48 */
2064 0x80 + 22, 7, /* DW_CFA_offset, x22, -56 */
2065 0x80 + 21, 8, /* DW_CFA_offset, x21, -64 */
2066 0x80 + 20, 9, /* DW_CFA_offset, x20, -72 */
2067 0x80 + 19, 10, /* DW_CFA_offset, x1p, -80 */
2068 0x80 + 30, 11, /* DW_CFA_offset, lr, -88 */
2069 0x80 + 29, 12, /* DW_CFA_offset, fp, -96 */
2073 void tcg_register_jit(void *buf, size_t buf_size)
2075 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));