2 * Initial TCG Implementation for aarch64
4 * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
5 * Written by Claudio Fontana
7 * This work is licensed under the terms of the GNU GPL, version 2 or
8 * (at your option) any later version.
10 * See the COPYING file in the top-level directory for details.
13 #include "tcg-be-ldst.h"
14 #include "qemu/bitops.h"
16 /* We're going to re-use TCGType in setting of the SF bit, which controls
17 the size of the operation performed. If we know the values match, it
18 makes things much cleaner. */
19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
23 "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
24 "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
25 "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
26 "%x24", "%x25", "%x26", "%x27", "%x28",
27 "%fp", /* frame pointer */
28 "%lr", /* link register */
29 "%sp", /* stack pointer */
33 #ifdef TARGET_WORDS_BIGENDIAN
34 #define TCG_LDST_BSWAP 1
36 #define TCG_LDST_BSWAP 0
39 static const int tcg_target_reg_alloc_order[] = {
40 TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
41 TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
42 TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
44 TCG_REG_X9, TCG_REG_X10, TCG_REG_X11, TCG_REG_X12,
45 TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
46 TCG_REG_X16, TCG_REG_X17,
48 TCG_REG_X18, TCG_REG_X19, /* will not use these, see tcg_target_init */
50 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
51 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
53 TCG_REG_X8, /* will not use, see tcg_target_init */
56 static const int tcg_target_call_iarg_regs[8] = {
57 TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
58 TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
60 static const int tcg_target_call_oarg_regs[1] = {
64 #define TCG_REG_TMP TCG_REG_X8
66 #ifndef CONFIG_SOFTMMU
67 # if defined(CONFIG_USE_GUEST_BASE)
68 # define TCG_REG_GUEST_BASE TCG_REG_X28
70 # define TCG_REG_GUEST_BASE TCG_REG_XZR
74 static inline void reloc_pc26(void *code_ptr, intptr_t target)
76 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
77 /* read instruction, mask away previous PC_REL26 parameter contents,
78 set the proper offset, then write back the instruction. */
79 uint32_t insn = *(uint32_t *)code_ptr;
80 insn = deposit32(insn, 0, 26, offset);
81 *(uint32_t *)code_ptr = insn;
84 static inline void reloc_pc19(void *code_ptr, intptr_t target)
86 intptr_t offset = (target - (intptr_t)code_ptr) / 4;
87 /* read instruction, mask away previous PC_REL19 parameter contents,
88 set the proper offset, then write back the instruction. */
89 uint32_t insn = *(uint32_t *)code_ptr;
90 insn = deposit32(insn, 5, 19, offset);
91 *(uint32_t *)code_ptr = insn;
94 static inline void patch_reloc(uint8_t *code_ptr, int type,
95 intptr_t value, intptr_t addend)
100 case R_AARCH64_JUMP26:
101 case R_AARCH64_CALL26:
102 reloc_pc26(code_ptr, value);
104 case R_AARCH64_CONDBR19:
105 reloc_pc19(code_ptr, value);
113 /* parse target specific constraints */
114 static int target_parse_constraint(TCGArgConstraint *ct,
115 const char **pct_str)
117 const char *ct_str = *pct_str;
121 ct->ct |= TCG_CT_REG;
122 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
124 case 'l': /* qemu_ld / qemu_st address, data_reg */
125 ct->ct |= TCG_CT_REG;
126 tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
127 #ifdef CONFIG_SOFTMMU
128 /* x0 and x1 will be overwritten when reading the tlb entry,
129 and x2, and x3 for helper args, better to avoid using them. */
130 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
131 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
132 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
133 tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
145 static inline int tcg_target_const_match(tcg_target_long val,
146 const TCGArgConstraint *arg_ct)
150 if (ct & TCG_CT_CONST) {
157 enum aarch64_cond_code {
160 COND_CS = 0x2, /* Unsigned greater or equal */
161 COND_HS = COND_CS, /* ALIAS greater or equal */
162 COND_CC = 0x3, /* Unsigned less than */
163 COND_LO = COND_CC, /* ALIAS Lower */
164 COND_MI = 0x4, /* Negative */
165 COND_PL = 0x5, /* Zero or greater */
166 COND_VS = 0x6, /* Overflow */
167 COND_VC = 0x7, /* No overflow */
168 COND_HI = 0x8, /* Unsigned greater than */
169 COND_LS = 0x9, /* Unsigned less or equal */
175 COND_NV = 0xf, /* behaves like COND_AL here */
178 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
179 [TCG_COND_EQ] = COND_EQ,
180 [TCG_COND_NE] = COND_NE,
181 [TCG_COND_LT] = COND_LT,
182 [TCG_COND_GE] = COND_GE,
183 [TCG_COND_LE] = COND_LE,
184 [TCG_COND_GT] = COND_GT,
186 [TCG_COND_LTU] = COND_LO,
187 [TCG_COND_GTU] = COND_HI,
188 [TCG_COND_GEU] = COND_HS,
189 [TCG_COND_LEU] = COND_LS,
192 /* opcodes for LDR / STR instructions with base + simm9 addressing */
193 enum aarch64_ldst_op_data { /* size of the data moved */
199 enum aarch64_ldst_op_type { /* type of operation */
200 LDST_ST = 0x0, /* store */
201 LDST_LD = 0x4, /* load */
202 LDST_LD_S_X = 0x8, /* load and sign-extend into Xt */
203 LDST_LD_S_W = 0xc, /* load and sign-extend into Wt */
206 enum aarch64_arith_opc {
217 enum aarch64_srr_opc {
224 static inline enum aarch64_ldst_op_data
225 aarch64_ldst_get_data(TCGOpcode tcg_op)
228 case INDEX_op_ld8u_i32:
229 case INDEX_op_ld8s_i32:
230 case INDEX_op_ld8u_i64:
231 case INDEX_op_ld8s_i64:
232 case INDEX_op_st8_i32:
233 case INDEX_op_st8_i64:
236 case INDEX_op_ld16u_i32:
237 case INDEX_op_ld16s_i32:
238 case INDEX_op_ld16u_i64:
239 case INDEX_op_ld16s_i64:
240 case INDEX_op_st16_i32:
241 case INDEX_op_st16_i64:
244 case INDEX_op_ld_i32:
245 case INDEX_op_st_i32:
246 case INDEX_op_ld32u_i64:
247 case INDEX_op_ld32s_i64:
248 case INDEX_op_st32_i64:
251 case INDEX_op_ld_i64:
252 case INDEX_op_st_i64:
260 static inline enum aarch64_ldst_op_type
261 aarch64_ldst_get_type(TCGOpcode tcg_op)
264 case INDEX_op_st8_i32:
265 case INDEX_op_st16_i32:
266 case INDEX_op_st8_i64:
267 case INDEX_op_st16_i64:
268 case INDEX_op_st_i32:
269 case INDEX_op_st32_i64:
270 case INDEX_op_st_i64:
273 case INDEX_op_ld8u_i32:
274 case INDEX_op_ld16u_i32:
275 case INDEX_op_ld8u_i64:
276 case INDEX_op_ld16u_i64:
277 case INDEX_op_ld_i32:
278 case INDEX_op_ld32u_i64:
279 case INDEX_op_ld_i64:
282 case INDEX_op_ld8s_i32:
283 case INDEX_op_ld16s_i32:
286 case INDEX_op_ld8s_i64:
287 case INDEX_op_ld16s_i64:
288 case INDEX_op_ld32s_i64:
296 static inline uint32_t tcg_in32(TCGContext *s)
298 uint32_t v = *(uint32_t *)s->code_ptr;
302 static inline void tcg_out_ldst_9(TCGContext *s,
303 enum aarch64_ldst_op_data op_data,
304 enum aarch64_ldst_op_type op_type,
305 TCGReg rd, TCGReg rn, tcg_target_long offset)
307 /* use LDUR with BASE register with 9bit signed unscaled offset */
308 tcg_out32(s, op_data << 24 | op_type << 20
309 | (offset & 0x1ff) << 12 | rn << 5 | rd);
312 /* tcg_out_ldst_12 expects a scaled unsigned immediate offset */
313 static inline void tcg_out_ldst_12(TCGContext *s,
314 enum aarch64_ldst_op_data op_data,
315 enum aarch64_ldst_op_type op_type,
316 TCGReg rd, TCGReg rn,
317 tcg_target_ulong scaled_uimm)
319 tcg_out32(s, (op_data | 1) << 24
320 | op_type << 20 | scaled_uimm << 10 | rn << 5 | rd);
323 static inline void tcg_out_movr(TCGContext *s, TCGType ext,
324 TCGReg rd, TCGReg src)
326 /* register to register move using MOV (shifted register with no shift) */
327 /* using MOV 0x2a0003e0 | (shift).. */
328 unsigned int base = ext ? 0xaa0003e0 : 0x2a0003e0;
329 tcg_out32(s, base | src << 16 | rd);
332 static inline void tcg_out_movi_aux(TCGContext *s,
333 TCGReg rd, uint64_t value)
335 uint32_t half, base, shift, movk = 0;
336 /* construct halfwords of the immediate with MOVZ/MOVK with LSL */
337 /* using MOVZ 0x52800000 | extended reg.. */
338 base = (value > 0xffffffff) ? 0xd2800000 : 0x52800000;
339 /* count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
340 first MOVZ with the half-word immediate skipping the zeros, with a shift
341 (LSL) equal to this number. Then morph all next instructions into MOVKs.
342 Zero the processed half-word in the value, continue until empty.
343 We build the final result 16bits at a time with up to 4 instructions,
344 but do not emit instructions for 16bit zero holes. */
346 shift = ctz64(value) & (63 & -16);
347 half = (value >> shift) & 0xffff;
348 tcg_out32(s, base | movk | shift << 17 | half << 5 | rd);
349 movk = 0x20000000; /* morph next MOVZs into MOVKs */
350 value &= ~(0xffffUL << shift);
354 static inline void tcg_out_movi(TCGContext *s, TCGType type,
355 TCGReg rd, tcg_target_long value)
357 if (type == TCG_TYPE_I64) {
358 tcg_out_movi_aux(s, rd, value);
360 tcg_out_movi_aux(s, rd, value & 0xffffffff);
364 static inline void tcg_out_ldst_r(TCGContext *s,
365 enum aarch64_ldst_op_data op_data,
366 enum aarch64_ldst_op_type op_type,
367 TCGReg rd, TCGReg base, TCGReg regoff)
369 /* load from memory to register using base + 64bit register offset */
370 /* using f.e. STR Wt, [Xn, Xm] 0xb8600800|(regoff << 16)|(base << 5)|rd */
371 /* the 0x6000 is for the "no extend field" */
372 tcg_out32(s, 0x00206800
373 | op_data << 24 | op_type << 20 | regoff << 16 | base << 5 | rd);
376 /* solve the whole ldst problem */
377 static inline void tcg_out_ldst(TCGContext *s, enum aarch64_ldst_op_data data,
378 enum aarch64_ldst_op_type type,
379 TCGReg rd, TCGReg rn, tcg_target_long offset)
381 if (offset >= -256 && offset < 256) {
382 tcg_out_ldst_9(s, data, type, rd, rn, offset);
387 /* if the offset is naturally aligned and in range,
388 then we can use the scaled uimm12 encoding */
389 unsigned int s_bits = data >> 6;
390 if (!(offset & ((1 << s_bits) - 1))) {
391 tcg_target_ulong scaled_uimm = offset >> s_bits;
392 if (scaled_uimm <= 0xfff) {
393 tcg_out_ldst_12(s, data, type, rd, rn, scaled_uimm);
399 /* worst-case scenario, move offset to temp register, use reg offset */
400 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
401 tcg_out_ldst_r(s, data, type, rd, rn, TCG_REG_TMP);
404 /* mov alias implemented with add immediate, useful to move to/from SP */
405 static inline void tcg_out_movr_sp(TCGContext *s, TCGType ext,
406 TCGReg rd, TCGReg rn)
408 /* using ADD 0x11000000 | (ext) | rn << 5 | rd */
409 unsigned int base = ext ? 0x91000000 : 0x11000000;
410 tcg_out32(s, base | rn << 5 | rd);
413 static inline void tcg_out_mov(TCGContext *s,
414 TCGType type, TCGReg ret, TCGReg arg)
417 tcg_out_movr(s, type == TCG_TYPE_I64, ret, arg);
421 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
422 TCGReg arg1, intptr_t arg2)
424 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_LD,
428 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
429 TCGReg arg1, intptr_t arg2)
431 tcg_out_ldst(s, (type == TCG_TYPE_I64) ? LDST_64 : LDST_32, LDST_ST,
435 static inline void tcg_out_arith(TCGContext *s, enum aarch64_arith_opc opc,
436 TCGType ext, TCGReg rd, TCGReg rn, TCGReg rm,
439 /* Using shifted register arithmetic operations */
440 /* if extended register operation (64bit) just OR with 0x80 << 24 */
441 unsigned int shift, base = ext ? (0x80 | opc) << 24 : opc << 24;
442 if (shift_imm == 0) {
444 } else if (shift_imm > 0) {
445 shift = shift_imm << 10 | 1 << 22;
446 } else /* (shift_imm < 0) */ {
447 shift = (-shift_imm) << 10;
449 tcg_out32(s, base | rm << 16 | shift | rn << 5 | rd);
452 static inline void tcg_out_mul(TCGContext *s, TCGType ext,
453 TCGReg rd, TCGReg rn, TCGReg rm)
455 /* Using MADD 0x1b000000 with Ra = wzr alias MUL 0x1b007c00 */
456 unsigned int base = ext ? 0x9b007c00 : 0x1b007c00;
457 tcg_out32(s, base | rm << 16 | rn << 5 | rd);
460 static inline void tcg_out_shiftrot_reg(TCGContext *s,
461 enum aarch64_srr_opc opc, TCGType ext,
462 TCGReg rd, TCGReg rn, TCGReg rm)
464 /* using 2-source data processing instructions 0x1ac02000 */
465 unsigned int base = ext ? 0x9ac02000 : 0x1ac02000;
466 tcg_out32(s, base | rm << 16 | opc << 8 | rn << 5 | rd);
469 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
470 TCGReg rn, unsigned int a, unsigned int b)
472 /* Using UBFM 0x53000000 Wd, Wn, a, b */
473 unsigned int base = ext ? 0xd3400000 : 0x53000000;
474 tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
477 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
478 TCGReg rn, unsigned int a, unsigned int b)
480 /* Using SBFM 0x13000000 Wd, Wn, a, b */
481 unsigned int base = ext ? 0x93400000 : 0x13000000;
482 tcg_out32(s, base | a << 16 | b << 10 | rn << 5 | rd);
485 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
486 TCGReg rn, TCGReg rm, unsigned int a)
488 /* Using EXTR 0x13800000 Wd, Wn, Wm, a */
489 unsigned int base = ext ? 0x93c00000 : 0x13800000;
490 tcg_out32(s, base | rm << 16 | a << 10 | rn << 5 | rd);
493 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
494 TCGReg rd, TCGReg rn, unsigned int m)
497 bits = ext ? 64 : 32;
499 tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
502 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
503 TCGReg rd, TCGReg rn, unsigned int m)
505 int max = ext ? 63 : 31;
506 tcg_out_ubfm(s, ext, rd, rn, m & max, max);
509 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
510 TCGReg rd, TCGReg rn, unsigned int m)
512 int max = ext ? 63 : 31;
513 tcg_out_sbfm(s, ext, rd, rn, m & max, max);
516 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
517 TCGReg rd, TCGReg rn, unsigned int m)
519 int max = ext ? 63 : 31;
520 tcg_out_extr(s, ext, rd, rn, rn, m & max);
523 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
524 TCGReg rd, TCGReg rn, unsigned int m)
527 bits = ext ? 64 : 32;
529 tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
532 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg rn, TCGReg rm)
534 /* Using CMP alias SUBS wzr, Wn, Wm */
535 tcg_out_arith(s, ARITH_SUBS, ext, TCG_REG_XZR, rn, rm, 0);
538 static inline void tcg_out_cset(TCGContext *s, TCGType ext,
539 TCGReg rd, TCGCond c)
541 /* Using CSET alias of CSINC 0x1a800400 Xd, XZR, XZR, invert(cond) */
542 unsigned int base = ext ? 0x9a9f07e0 : 0x1a9f07e0;
543 tcg_out32(s, base | tcg_cond_to_aarch64[tcg_invert_cond(c)] << 12 | rd);
546 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
548 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
550 if (offset < -0x02000000 || offset >= 0x02000000) {
551 /* out of 26bit range */
555 tcg_out32(s, 0x14000000 | (offset & 0x03ffffff));
558 static inline void tcg_out_goto_noaddr(TCGContext *s)
560 /* We pay attention here to not modify the branch target by
561 reading from the buffer. This ensure that caches and memory are
562 kept coherent during retranslation.
563 Mask away possible garbage in the high bits for the first translation,
564 while keeping the offset bits for retranslation. */
566 insn = (tcg_in32(s) & 0x03ffffff) | 0x14000000;
570 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
572 /* see comments in tcg_out_goto_noaddr */
574 insn = tcg_in32(s) & (0x07ffff << 5);
575 insn |= 0x54000000 | tcg_cond_to_aarch64[c];
579 static inline void tcg_out_goto_cond(TCGContext *s, TCGCond c, intptr_t target)
581 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
583 if (offset < -0x40000 || offset >= 0x40000) {
584 /* out of 19bit range */
589 tcg_out32(s, 0x54000000 | tcg_cond_to_aarch64[c] | offset << 5);
592 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
594 tcg_out32(s, 0xd63f0000 | reg << 5);
597 static inline void tcg_out_gotor(TCGContext *s, TCGReg reg)
599 tcg_out32(s, 0xd61f0000 | reg << 5);
602 static inline void tcg_out_call(TCGContext *s, intptr_t target)
604 intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
606 if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
607 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
608 tcg_out_callr(s, TCG_REG_TMP);
610 tcg_out32(s, 0x94000000 | (offset & 0x03ffffff));
614 /* encode a logical immediate, mapping user parameter
615 M=set bits pattern length to S=M-1 */
616 static inline unsigned int
617 aarch64_limm(unsigned int m, unsigned int r)
620 return r << 16 | (m - 1) << 10;
623 /* test a register against an immediate bit pattern made of
624 M set bits rotated right by R.
626 to test a 32/64 reg against 0x00000007, pass M = 3, R = 0.
627 to test a 32/64 reg against 0x000000ff, pass M = 8, R = 0.
628 to test a 32bit reg against 0xff000000, pass M = 8, R = 8.
629 to test a 32bit reg against 0xff0000ff, pass M = 16, R = 8.
631 static inline void tcg_out_tst(TCGContext *s, TCGType ext, TCGReg rn,
632 unsigned int m, unsigned int r)
634 /* using TST alias of ANDS XZR, Xn,#bimm64 0x7200001f */
635 unsigned int base = ext ? 0xf240001f : 0x7200001f;
636 tcg_out32(s, base | aarch64_limm(m, r) | rn << 5);
639 /* and a register with a bit pattern, similarly to TST, no flags change */
640 static inline void tcg_out_andi(TCGContext *s, TCGType ext, TCGReg rd,
641 TCGReg rn, unsigned int m, unsigned int r)
643 /* using AND 0x12000000 */
644 unsigned int base = ext ? 0x92400000 : 0x12000000;
645 tcg_out32(s, base | aarch64_limm(m, r) | rn << 5 | rd);
648 static inline void tcg_out_ret(TCGContext *s)
650 /* emit RET { LR } */
651 tcg_out32(s, 0xd65f03c0);
654 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
656 intptr_t target = addr;
657 intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
659 if (offset < -0x02000000 || offset >= 0x02000000) {
660 /* out of 26bit range */
664 patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
665 flush_icache_range(jmp_addr, jmp_addr + 4);
668 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
670 TCGLabel *l = &s->labels[label_index];
673 tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
674 tcg_out_goto_noaddr(s);
676 tcg_out_goto(s, l->u.value);
680 static inline void tcg_out_goto_label_cond(TCGContext *s,
681 TCGCond c, int label_index)
683 TCGLabel *l = &s->labels[label_index];
686 tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label_index, 0);
687 tcg_out_goto_cond_noaddr(s, c);
689 tcg_out_goto_cond(s, c, l->u.value);
693 static inline void tcg_out_rev(TCGContext *s, TCGType ext,
694 TCGReg rd, TCGReg rm)
696 /* using REV 0x5ac00800 */
697 unsigned int base = ext ? 0xdac00c00 : 0x5ac00800;
698 tcg_out32(s, base | rm << 5 | rd);
701 static inline void tcg_out_rev16(TCGContext *s, TCGType ext,
702 TCGReg rd, TCGReg rm)
704 /* using REV16 0x5ac00400 */
705 unsigned int base = ext ? 0xdac00400 : 0x5ac00400;
706 tcg_out32(s, base | rm << 5 | rd);
709 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, int s_bits,
710 TCGReg rd, TCGReg rn)
712 /* using ALIASes SXTB 0x13001c00, SXTH 0x13003c00, SXTW 0x93407c00
713 of SBFM Xd, Xn, #0, #7|15|31 */
714 int bits = 8 * (1 << s_bits) - 1;
715 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
718 static inline void tcg_out_uxt(TCGContext *s, int s_bits,
719 TCGReg rd, TCGReg rn)
721 /* using ALIASes UXTB 0x53001c00, UXTH 0x53003c00
722 of UBFM Wd, Wn, #0, #7|15 */
723 int bits = 8 * (1 << s_bits) - 1;
724 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
727 static inline void tcg_out_addi(TCGContext *s, TCGType ext,
728 TCGReg rd, TCGReg rn, unsigned int aimm)
730 /* add immediate aimm unsigned 12bit value (with LSL 0 or 12) */
731 /* using ADD 0x11000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
732 unsigned int base = ext ? 0x91000000 : 0x11000000;
737 /* we can only shift left by 12, on assert we cannot represent */
738 assert(!(aimm & 0xfff));
739 assert(aimm <= 0xfff000);
740 base |= 1 << 22; /* apply LSL 12 */
744 tcg_out32(s, base | aimm | (rn << 5) | rd);
747 static inline void tcg_out_subi(TCGContext *s, TCGType ext,
748 TCGReg rd, TCGReg rn, unsigned int aimm)
750 /* sub immediate aimm unsigned 12bit value (with LSL 0 or 12) */
751 /* using SUB 0x51000000 | (ext) | (aimm << 10) | (rn << 5) | rd */
752 unsigned int base = ext ? 0xd1000000 : 0x51000000;
757 /* we can only shift left by 12, on assert we cannot represent */
758 assert(!(aimm & 0xfff));
759 assert(aimm <= 0xfff000);
760 base |= 1 << 22; /* apply LSL 12 */
764 tcg_out32(s, base | aimm | (rn << 5) | rd);
767 static inline void tcg_out_nop(TCGContext *s)
769 tcg_out32(s, 0xd503201f);
772 #ifdef CONFIG_SOFTMMU
773 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
774 * int mmu_idx, uintptr_t ra)
776 static const void * const qemu_ld_helpers[4] = {
783 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
784 * uintxx_t val, int mmu_idx, uintptr_t ra)
786 static const void * const qemu_st_helpers[4] = {
793 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
795 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
797 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
798 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
799 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
800 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X3, (tcg_target_long)lb->raddr);
801 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
802 (tcg_target_long)qemu_ld_helpers[lb->opc & 3]);
803 tcg_out_callr(s, TCG_REG_TMP);
804 if (lb->opc & 0x04) {
805 tcg_out_sxt(s, 1, lb->opc & 3, lb->datalo_reg, TCG_REG_X0);
807 tcg_out_movr(s, 1, lb->datalo_reg, TCG_REG_X0);
810 tcg_out_goto(s, (intptr_t)lb->raddr);
813 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
815 reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
817 tcg_out_movr(s, 1, TCG_REG_X0, TCG_AREG0);
818 tcg_out_movr(s, (TARGET_LONG_BITS == 64), TCG_REG_X1, lb->addrlo_reg);
819 tcg_out_movr(s, 1, TCG_REG_X2, lb->datalo_reg);
820 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
821 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X4, (intptr_t)lb->raddr);
822 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP,
823 (intptr_t)qemu_st_helpers[lb->opc & 3]);
824 tcg_out_callr(s, TCG_REG_TMP);
827 tcg_out_goto(s, (tcg_target_long)lb->raddr);
830 static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
831 TCGReg data_reg, TCGReg addr_reg,
833 uint8_t *raddr, uint8_t *label_ptr)
835 TCGLabelQemuLdst *label = new_ldst_label(s);
837 label->is_ld = is_ld;
839 label->datalo_reg = data_reg;
840 label->addrlo_reg = addr_reg;
841 label->mem_index = mem_index;
842 label->raddr = raddr;
843 label->label_ptr[0] = label_ptr;
846 /* Load and compare a TLB entry, emitting the conditional jump to the
847 slow path for the failure case, which will be patched later when finalizing
848 the slow path. Generated code returns the host addend in X1,
849 clobbers X0,X2,X3,TMP. */
850 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg,
851 int s_bits, uint8_t **label_ptr, int mem_index, int is_read)
853 TCGReg base = TCG_AREG0;
854 int tlb_offset = is_read ?
855 offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
856 : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
857 /* Extract the TLB index from the address into X0.
859 addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
860 tcg_out_ubfm(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, addr_reg,
861 TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
862 /* Store the page mask part of the address and the low s_bits into X3.
863 Later this allows checking for equality and alignment at the same time.
864 X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
865 tcg_out_andi(s, (TARGET_LONG_BITS == 64), TCG_REG_X3, addr_reg,
866 (TARGET_LONG_BITS - TARGET_PAGE_BITS) + s_bits,
867 (TARGET_LONG_BITS - TARGET_PAGE_BITS));
868 /* Add any "high bits" from the tlb offset to the env address into X2,
869 to take advantage of the LSL12 form of the addi instruction.
870 X2 = env + (tlb_offset & 0xfff000) */
871 tcg_out_addi(s, 1, TCG_REG_X2, base, tlb_offset & 0xfff000);
872 /* Merge the tlb index contribution into X2.
873 X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
874 tcg_out_arith(s, ARITH_ADD, 1, TCG_REG_X2, TCG_REG_X2,
875 TCG_REG_X0, -CPU_TLB_ENTRY_BITS);
876 /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
877 X0 = load [X2 + (tlb_offset & 0x000fff)] */
878 tcg_out_ldst(s, TARGET_LONG_BITS == 64 ? LDST_64 : LDST_32,
879 LDST_LD, TCG_REG_X0, TCG_REG_X2,
880 (tlb_offset & 0xfff));
881 /* Load the tlb addend. Do that early to avoid stalling.
882 X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
883 tcg_out_ldst(s, LDST_64, LDST_LD, TCG_REG_X1, TCG_REG_X2,
884 (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
885 (is_read ? offsetof(CPUTLBEntry, addr_read)
886 : offsetof(CPUTLBEntry, addr_write)));
887 /* Perform the address comparison. */
888 tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3);
889 *label_ptr = s->code_ptr;
890 /* If not equal, we jump to the slow path. */
891 tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
894 #endif /* CONFIG_SOFTMMU */
896 static void tcg_out_qemu_ld_direct(TCGContext *s, int opc, TCGReg data_r,
897 TCGReg addr_r, TCGReg off_r)
901 tcg_out_ldst_r(s, LDST_8, LDST_LD, data_r, addr_r, off_r);
904 tcg_out_ldst_r(s, LDST_8, LDST_LD_S_X, data_r, addr_r, off_r);
907 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
908 if (TCG_LDST_BSWAP) {
909 tcg_out_rev16(s, 0, data_r, data_r);
913 if (TCG_LDST_BSWAP) {
914 tcg_out_ldst_r(s, LDST_16, LDST_LD, data_r, addr_r, off_r);
915 tcg_out_rev16(s, 0, data_r, data_r);
916 tcg_out_sxt(s, 1, 1, data_r, data_r);
918 tcg_out_ldst_r(s, LDST_16, LDST_LD_S_X, data_r, addr_r, off_r);
922 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
923 if (TCG_LDST_BSWAP) {
924 tcg_out_rev(s, 0, data_r, data_r);
928 if (TCG_LDST_BSWAP) {
929 tcg_out_ldst_r(s, LDST_32, LDST_LD, data_r, addr_r, off_r);
930 tcg_out_rev(s, 0, data_r, data_r);
931 tcg_out_sxt(s, 1, 2, data_r, data_r);
933 tcg_out_ldst_r(s, LDST_32, LDST_LD_S_X, data_r, addr_r, off_r);
937 tcg_out_ldst_r(s, LDST_64, LDST_LD, data_r, addr_r, off_r);
938 if (TCG_LDST_BSWAP) {
939 tcg_out_rev(s, 1, data_r, data_r);
947 static void tcg_out_qemu_st_direct(TCGContext *s, int opc, TCGReg data_r,
948 TCGReg addr_r, TCGReg off_r)
952 tcg_out_ldst_r(s, LDST_8, LDST_ST, data_r, addr_r, off_r);
955 if (TCG_LDST_BSWAP) {
956 tcg_out_rev16(s, 0, TCG_REG_TMP, data_r);
957 tcg_out_ldst_r(s, LDST_16, LDST_ST, TCG_REG_TMP, addr_r, off_r);
959 tcg_out_ldst_r(s, LDST_16, LDST_ST, data_r, addr_r, off_r);
963 if (TCG_LDST_BSWAP) {
964 tcg_out_rev(s, 0, TCG_REG_TMP, data_r);
965 tcg_out_ldst_r(s, LDST_32, LDST_ST, TCG_REG_TMP, addr_r, off_r);
967 tcg_out_ldst_r(s, LDST_32, LDST_ST, data_r, addr_r, off_r);
971 if (TCG_LDST_BSWAP) {
972 tcg_out_rev(s, 1, TCG_REG_TMP, data_r);
973 tcg_out_ldst_r(s, LDST_64, LDST_ST, TCG_REG_TMP, addr_r, off_r);
975 tcg_out_ldst_r(s, LDST_64, LDST_ST, data_r, addr_r, off_r);
983 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
985 TCGReg addr_reg, data_reg;
986 #ifdef CONFIG_SOFTMMU
987 int mem_index, s_bits;
993 #ifdef CONFIG_SOFTMMU
996 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
997 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
998 add_qemu_ldst_label(s, 1, opc, data_reg, addr_reg,
999 mem_index, s->code_ptr, label_ptr);
1000 #else /* !CONFIG_SOFTMMU */
1001 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg,
1002 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1003 #endif /* CONFIG_SOFTMMU */
1006 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
1008 TCGReg addr_reg, data_reg;
1009 #ifdef CONFIG_SOFTMMU
1010 int mem_index, s_bits;
1016 #ifdef CONFIG_SOFTMMU
1017 mem_index = args[2];
1020 tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1021 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, TCG_REG_X1);
1022 add_qemu_ldst_label(s, 0, opc, data_reg, addr_reg,
1023 mem_index, s->code_ptr, label_ptr);
1024 #else /* !CONFIG_SOFTMMU */
1025 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg,
1026 GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1027 #endif /* CONFIG_SOFTMMU */
1030 static uint8_t *tb_ret_addr;
1032 /* callee stack use example:
1033 stp x29, x30, [sp,#-32]!
1035 stp x1, x2, [sp,#16]
1037 ldp x1, x2, [sp,#16]
1038 ldp x29, x30, [sp],#32
1042 /* push r1 and r2, and alloc stack space for a total of
1043 alloc_n elements (1 element=16 bytes, must be between 1 and 31. */
1044 static inline void tcg_out_push_pair(TCGContext *s, TCGReg addr,
1045 TCGReg r1, TCGReg r2, int alloc_n)
1047 /* using indexed scaled simm7 STP 0x28800000 | (ext) | 0x01000000 (pre-idx)
1048 | alloc_n * (-1) << 16 | r2 << 10 | addr << 5 | r1 */
1049 assert(alloc_n > 0 && alloc_n < 0x20);
1050 alloc_n = (-alloc_n) & 0x3f;
1051 tcg_out32(s, 0xa9800000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1054 /* dealloc stack space for a total of alloc_n elements and pop r1, r2. */
1055 static inline void tcg_out_pop_pair(TCGContext *s, TCGReg addr,
1056 TCGReg r1, TCGReg r2, int alloc_n)
1058 /* using indexed scaled simm7 LDP 0x28c00000 | (ext) | nothing (post-idx)
1059 | alloc_n << 16 | r2 << 10 | addr << 5 | r1 */
1060 assert(alloc_n > 0 && alloc_n < 0x20);
1061 tcg_out32(s, 0xa8c00000 | alloc_n << 16 | r2 << 10 | addr << 5 | r1);
1064 static inline void tcg_out_store_pair(TCGContext *s, TCGReg addr,
1065 TCGReg r1, TCGReg r2, int idx)
1067 /* using register pair offset simm7 STP 0x29000000 | (ext)
1068 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1069 assert(idx > 0 && idx < 0x20);
1070 tcg_out32(s, 0xa9000000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1073 static inline void tcg_out_load_pair(TCGContext *s, TCGReg addr,
1074 TCGReg r1, TCGReg r2, int idx)
1076 /* using register pair offset simm7 LDP 0x29400000 | (ext)
1077 | idx << 16 | r2 << 10 | addr << 5 | r1 */
1078 assert(idx > 0 && idx < 0x20);
1079 tcg_out32(s, 0xa9400000 | idx << 16 | r2 << 10 | addr << 5 | r1);
1082 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1083 const TCGArg args[TCG_MAX_OP_ARGS],
1084 const int const_args[TCG_MAX_OP_ARGS])
1086 /* 99% of the time, we can signal the use of extension registers
1087 by looking to see if the opcode handles 64-bit data. */
1088 TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1090 /* Hoist the loads of the most common arguments. */
1091 TCGArg a0 = args[0];
1092 TCGArg a1 = args[1];
1093 TCGArg a2 = args[2];
1094 int c2 = const_args[2];
1097 case INDEX_op_exit_tb:
1098 tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1099 tcg_out_goto(s, (intptr_t)tb_ret_addr);
1102 case INDEX_op_goto_tb:
1103 #ifndef USE_DIRECT_JUMP
1104 #error "USE_DIRECT_JUMP required for aarch64"
1106 assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1107 s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1108 /* actual branch destination will be patched by
1109 aarch64_tb_set_jmp_target later, beware retranslation. */
1110 tcg_out_goto_noaddr(s);
1111 s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1115 if (const_args[0]) {
1116 tcg_out_call(s, a0);
1118 tcg_out_callr(s, a0);
1123 tcg_out_goto_label(s, a0);
1126 case INDEX_op_ld_i32:
1127 case INDEX_op_ld_i64:
1128 case INDEX_op_st_i32:
1129 case INDEX_op_st_i64:
1130 case INDEX_op_ld8u_i32:
1131 case INDEX_op_ld8s_i32:
1132 case INDEX_op_ld16u_i32:
1133 case INDEX_op_ld16s_i32:
1134 case INDEX_op_ld8u_i64:
1135 case INDEX_op_ld8s_i64:
1136 case INDEX_op_ld16u_i64:
1137 case INDEX_op_ld16s_i64:
1138 case INDEX_op_ld32u_i64:
1139 case INDEX_op_ld32s_i64:
1140 case INDEX_op_st8_i32:
1141 case INDEX_op_st8_i64:
1142 case INDEX_op_st16_i32:
1143 case INDEX_op_st16_i64:
1144 case INDEX_op_st32_i64:
1145 tcg_out_ldst(s, aarch64_ldst_get_data(opc), aarch64_ldst_get_type(opc),
1149 case INDEX_op_add_i64:
1150 case INDEX_op_add_i32:
1151 tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
1154 case INDEX_op_sub_i64:
1155 case INDEX_op_sub_i32:
1156 tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
1159 case INDEX_op_and_i64:
1160 case INDEX_op_and_i32:
1161 tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
1164 case INDEX_op_or_i64:
1165 case INDEX_op_or_i32:
1166 tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
1169 case INDEX_op_xor_i64:
1170 case INDEX_op_xor_i32:
1171 tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
1174 case INDEX_op_mul_i64:
1175 case INDEX_op_mul_i32:
1176 tcg_out_mul(s, ext, a0, a1, a2);
1179 case INDEX_op_shl_i64:
1180 case INDEX_op_shl_i32:
1181 if (c2) { /* LSL / UBFM Wd, Wn, (32 - m) */
1182 tcg_out_shl(s, ext, a0, a1, a2);
1183 } else { /* LSL / LSLV */
1184 tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
1188 case INDEX_op_shr_i64:
1189 case INDEX_op_shr_i32:
1190 if (c2) { /* LSR / UBFM Wd, Wn, m, 31 */
1191 tcg_out_shr(s, ext, a0, a1, a2);
1192 } else { /* LSR / LSRV */
1193 tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
1197 case INDEX_op_sar_i64:
1198 case INDEX_op_sar_i32:
1199 if (c2) { /* ASR / SBFM Wd, Wn, m, 31 */
1200 tcg_out_sar(s, ext, a0, a1, a2);
1201 } else { /* ASR / ASRV */
1202 tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
1206 case INDEX_op_rotr_i64:
1207 case INDEX_op_rotr_i32:
1208 if (c2) { /* ROR / EXTR Wd, Wm, Wm, m */
1209 tcg_out_rotr(s, ext, a0, a1, a2);
1210 } else { /* ROR / RORV */
1211 tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
1215 case INDEX_op_rotl_i64:
1216 case INDEX_op_rotl_i32: /* same as rotate right by (32 - m) */
1217 if (c2) { /* ROR / EXTR Wd, Wm, Wm, 32 - m */
1218 tcg_out_rotl(s, ext, a0, a1, a2);
1220 tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
1221 tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
1225 case INDEX_op_brcond_i64:
1226 case INDEX_op_brcond_i32:
1227 tcg_out_cmp(s, ext, a0, a1);
1228 tcg_out_goto_label_cond(s, a2, args[3]);
1231 case INDEX_op_setcond_i64:
1232 case INDEX_op_setcond_i32:
1233 tcg_out_cmp(s, ext, a1, a2);
1234 tcg_out_cset(s, 0, a0, args[3]);
1237 case INDEX_op_qemu_ld8u:
1238 tcg_out_qemu_ld(s, args, 0 | 0);
1240 case INDEX_op_qemu_ld8s:
1241 tcg_out_qemu_ld(s, args, 4 | 0);
1243 case INDEX_op_qemu_ld16u:
1244 tcg_out_qemu_ld(s, args, 0 | 1);
1246 case INDEX_op_qemu_ld16s:
1247 tcg_out_qemu_ld(s, args, 4 | 1);
1249 case INDEX_op_qemu_ld32u:
1250 tcg_out_qemu_ld(s, args, 0 | 2);
1252 case INDEX_op_qemu_ld32s:
1253 tcg_out_qemu_ld(s, args, 4 | 2);
1255 case INDEX_op_qemu_ld32:
1256 tcg_out_qemu_ld(s, args, 0 | 2);
1258 case INDEX_op_qemu_ld64:
1259 tcg_out_qemu_ld(s, args, 0 | 3);
1261 case INDEX_op_qemu_st8:
1262 tcg_out_qemu_st(s, args, 0);
1264 case INDEX_op_qemu_st16:
1265 tcg_out_qemu_st(s, args, 1);
1267 case INDEX_op_qemu_st32:
1268 tcg_out_qemu_st(s, args, 2);
1270 case INDEX_op_qemu_st64:
1271 tcg_out_qemu_st(s, args, 3);
1274 case INDEX_op_bswap32_i64:
1275 /* Despite the _i64, this is a 32-bit bswap. */
1278 case INDEX_op_bswap64_i64:
1279 case INDEX_op_bswap32_i32:
1280 tcg_out_rev(s, ext, a0, a1);
1282 case INDEX_op_bswap16_i64:
1283 case INDEX_op_bswap16_i32:
1284 tcg_out_rev16(s, 0, a0, a1);
1287 case INDEX_op_ext8s_i64:
1288 case INDEX_op_ext8s_i32:
1289 tcg_out_sxt(s, ext, 0, a0, a1);
1291 case INDEX_op_ext16s_i64:
1292 case INDEX_op_ext16s_i32:
1293 tcg_out_sxt(s, ext, 1, a0, a1);
1295 case INDEX_op_ext32s_i64:
1296 tcg_out_sxt(s, 1, 2, a0, a1);
1298 case INDEX_op_ext8u_i64:
1299 case INDEX_op_ext8u_i32:
1300 tcg_out_uxt(s, 0, a0, a1);
1302 case INDEX_op_ext16u_i64:
1303 case INDEX_op_ext16u_i32:
1304 tcg_out_uxt(s, 1, a0, a1);
1306 case INDEX_op_ext32u_i64:
1307 tcg_out_movr(s, 0, a0, a1);
1310 case INDEX_op_mov_i64:
1311 case INDEX_op_mov_i32:
1312 case INDEX_op_movi_i64:
1313 case INDEX_op_movi_i32:
1314 /* Always implemented with tcg_out_mov/i, never with tcg_out_op. */
1316 /* Opcode not implemented. */
1321 static const TCGTargetOpDef aarch64_op_defs[] = {
1322 { INDEX_op_exit_tb, { } },
1323 { INDEX_op_goto_tb, { } },
1324 { INDEX_op_call, { "ri" } },
1325 { INDEX_op_br, { } },
1327 { INDEX_op_mov_i32, { "r", "r" } },
1328 { INDEX_op_mov_i64, { "r", "r" } },
1330 { INDEX_op_movi_i32, { "r" } },
1331 { INDEX_op_movi_i64, { "r" } },
1333 { INDEX_op_ld8u_i32, { "r", "r" } },
1334 { INDEX_op_ld8s_i32, { "r", "r" } },
1335 { INDEX_op_ld16u_i32, { "r", "r" } },
1336 { INDEX_op_ld16s_i32, { "r", "r" } },
1337 { INDEX_op_ld_i32, { "r", "r" } },
1338 { INDEX_op_ld8u_i64, { "r", "r" } },
1339 { INDEX_op_ld8s_i64, { "r", "r" } },
1340 { INDEX_op_ld16u_i64, { "r", "r" } },
1341 { INDEX_op_ld16s_i64, { "r", "r" } },
1342 { INDEX_op_ld32u_i64, { "r", "r" } },
1343 { INDEX_op_ld32s_i64, { "r", "r" } },
1344 { INDEX_op_ld_i64, { "r", "r" } },
1346 { INDEX_op_st8_i32, { "r", "r" } },
1347 { INDEX_op_st16_i32, { "r", "r" } },
1348 { INDEX_op_st_i32, { "r", "r" } },
1349 { INDEX_op_st8_i64, { "r", "r" } },
1350 { INDEX_op_st16_i64, { "r", "r" } },
1351 { INDEX_op_st32_i64, { "r", "r" } },
1352 { INDEX_op_st_i64, { "r", "r" } },
1354 { INDEX_op_add_i32, { "r", "r", "r" } },
1355 { INDEX_op_add_i64, { "r", "r", "r" } },
1356 { INDEX_op_sub_i32, { "r", "r", "r" } },
1357 { INDEX_op_sub_i64, { "r", "r", "r" } },
1358 { INDEX_op_mul_i32, { "r", "r", "r" } },
1359 { INDEX_op_mul_i64, { "r", "r", "r" } },
1360 { INDEX_op_and_i32, { "r", "r", "r" } },
1361 { INDEX_op_and_i64, { "r", "r", "r" } },
1362 { INDEX_op_or_i32, { "r", "r", "r" } },
1363 { INDEX_op_or_i64, { "r", "r", "r" } },
1364 { INDEX_op_xor_i32, { "r", "r", "r" } },
1365 { INDEX_op_xor_i64, { "r", "r", "r" } },
1367 { INDEX_op_shl_i32, { "r", "r", "ri" } },
1368 { INDEX_op_shr_i32, { "r", "r", "ri" } },
1369 { INDEX_op_sar_i32, { "r", "r", "ri" } },
1370 { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1371 { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1372 { INDEX_op_shl_i64, { "r", "r", "ri" } },
1373 { INDEX_op_shr_i64, { "r", "r", "ri" } },
1374 { INDEX_op_sar_i64, { "r", "r", "ri" } },
1375 { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1376 { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1378 { INDEX_op_brcond_i32, { "r", "r" } },
1379 { INDEX_op_setcond_i32, { "r", "r", "r" } },
1380 { INDEX_op_brcond_i64, { "r", "r" } },
1381 { INDEX_op_setcond_i64, { "r", "r", "r" } },
1383 { INDEX_op_qemu_ld8u, { "r", "l" } },
1384 { INDEX_op_qemu_ld8s, { "r", "l" } },
1385 { INDEX_op_qemu_ld16u, { "r", "l" } },
1386 { INDEX_op_qemu_ld16s, { "r", "l" } },
1387 { INDEX_op_qemu_ld32u, { "r", "l" } },
1388 { INDEX_op_qemu_ld32s, { "r", "l" } },
1390 { INDEX_op_qemu_ld32, { "r", "l" } },
1391 { INDEX_op_qemu_ld64, { "r", "l" } },
1393 { INDEX_op_qemu_st8, { "l", "l" } },
1394 { INDEX_op_qemu_st16, { "l", "l" } },
1395 { INDEX_op_qemu_st32, { "l", "l" } },
1396 { INDEX_op_qemu_st64, { "l", "l" } },
1398 { INDEX_op_bswap16_i32, { "r", "r" } },
1399 { INDEX_op_bswap32_i32, { "r", "r" } },
1400 { INDEX_op_bswap16_i64, { "r", "r" } },
1401 { INDEX_op_bswap32_i64, { "r", "r" } },
1402 { INDEX_op_bswap64_i64, { "r", "r" } },
1404 { INDEX_op_ext8s_i32, { "r", "r" } },
1405 { INDEX_op_ext16s_i32, { "r", "r" } },
1406 { INDEX_op_ext8u_i32, { "r", "r" } },
1407 { INDEX_op_ext16u_i32, { "r", "r" } },
1409 { INDEX_op_ext8s_i64, { "r", "r" } },
1410 { INDEX_op_ext16s_i64, { "r", "r" } },
1411 { INDEX_op_ext32s_i64, { "r", "r" } },
1412 { INDEX_op_ext8u_i64, { "r", "r" } },
1413 { INDEX_op_ext16u_i64, { "r", "r" } },
1414 { INDEX_op_ext32u_i64, { "r", "r" } },
1419 static void tcg_target_init(TCGContext *s)
1421 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1422 tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1424 tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1425 (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1426 (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1427 (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1428 (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1429 (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1430 (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1431 (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1432 (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1433 (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1434 (1 << TCG_REG_X18));
1436 tcg_regset_clear(s->reserved_regs);
1437 tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1438 tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1439 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1440 tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1442 tcg_add_target_add_op_defs(aarch64_op_defs);
1445 static void tcg_target_qemu_prologue(TCGContext *s)
1447 /* NB: frame sizes are in 16 byte stack units! */
1448 int frame_size_callee_saved, frame_size_tcg_locals;
1451 /* save pairs (FP, LR) and (X19, X20) .. (X27, X28) */
1452 frame_size_callee_saved = (1) + (TCG_REG_X28 - TCG_REG_X19) / 2 + 1;
1454 /* frame size requirement for TCG local variables */
1455 frame_size_tcg_locals = TCG_STATIC_CALL_ARGS_SIZE
1456 + CPU_TEMP_BUF_NLONGS * sizeof(long)
1457 + (TCG_TARGET_STACK_ALIGN - 1);
1458 frame_size_tcg_locals &= ~(TCG_TARGET_STACK_ALIGN - 1);
1459 frame_size_tcg_locals /= TCG_TARGET_STACK_ALIGN;
1461 /* push (FP, LR) and update sp */
1462 tcg_out_push_pair(s, TCG_REG_SP,
1463 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);
1465 /* FP -> callee_saved */
1466 tcg_out_movr_sp(s, 1, TCG_REG_FP, TCG_REG_SP);
1468 /* store callee-preserved regs x19..x28 using FP -> callee_saved */
1469 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1470 int idx = (r - TCG_REG_X19) / 2 + 1;
1471 tcg_out_store_pair(s, TCG_REG_FP, r, r + 1, idx);
1474 /* make stack space for TCG locals */
1475 tcg_out_subi(s, 1, TCG_REG_SP, TCG_REG_SP,
1476 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1477 /* inform TCG about how to find TCG locals with register, offset, size */
1478 tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1479 CPU_TEMP_BUF_NLONGS * sizeof(long));
1481 #if defined(CONFIG_USE_GUEST_BASE)
1483 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1484 tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1488 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1489 tcg_out_gotor(s, tcg_target_call_iarg_regs[1]);
1491 tb_ret_addr = s->code_ptr;
1493 /* remove TCG locals stack space */
1494 tcg_out_addi(s, 1, TCG_REG_SP, TCG_REG_SP,
1495 frame_size_tcg_locals * TCG_TARGET_STACK_ALIGN);
1497 /* restore registers x19..x28.
1498 FP must be preserved, so it still points to callee_saved area */
1499 for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1500 int idx = (r - TCG_REG_X19) / 2 + 1;
1501 tcg_out_load_pair(s, TCG_REG_FP, r, r + 1, idx);
1504 /* pop (FP, LR), restore SP to previous frame, return */
1505 tcg_out_pop_pair(s, TCG_REG_SP,
1506 TCG_REG_FP, TCG_REG_LR, frame_size_callee_saved);