tcg/aarch64/tcg-target.c

   1 /*
   2  * Initial TCG Implementation for aarch64
   3  *
   4  * Copyright (c) 2013 Huawei Technologies Duesseldorf GmbH
   5  * Written by Claudio Fontana
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or
   8  * (at your option) any later version.
   9  *
  10  * See the COPYING file in the top-level directory for details.
  11  */
  12
  13 #include "tcg-be-ldst.h"
  14 #include "qemu/bitops.h"
  15
  16 /* We're going to re-use TCGType in setting of the SF bit, which controls
  17    the size of the operation performed.  If we know the values match, it
  18    makes things much cleaner.  */
  19 QEMU_BUILD_BUG_ON(TCG_TYPE_I32 != 0 || TCG_TYPE_I64 != 1);
  20
  21 #ifndef NDEBUG
  22 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
  23     "%x0", "%x1", "%x2", "%x3", "%x4", "%x5", "%x6", "%x7",
  24     "%x8", "%x9", "%x10", "%x11", "%x12", "%x13", "%x14", "%x15",
  25     "%x16", "%x17", "%x18", "%x19", "%x20", "%x21", "%x22", "%x23",
  26     "%x24", "%x25", "%x26", "%x27", "%x28", "%fp", "%x30", "%sp",
  27 };
  28 #endif /* NDEBUG */
  29
  30 static const int tcg_target_reg_alloc_order[] = {
  31     TCG_REG_X20, TCG_REG_X21, TCG_REG_X22, TCG_REG_X23,
  32     TCG_REG_X24, TCG_REG_X25, TCG_REG_X26, TCG_REG_X27,
  33     TCG_REG_X28, /* we will reserve this for GUEST_BASE if configured */
  34
  35     TCG_REG_X8, TCG_REG_X9, TCG_REG_X10, TCG_REG_X11,
  36     TCG_REG_X12, TCG_REG_X13, TCG_REG_X14, TCG_REG_X15,
  37     TCG_REG_X16, TCG_REG_X17,
  38
  39     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  40     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7,
  41
  42     /* X18 reserved by system */
  43     /* X19 reserved for AREG0 */
  44     /* X29 reserved as fp */
  45     /* X30 reserved as temporary */
  46 };
  47
  48 static const int tcg_target_call_iarg_regs[8] = {
  49     TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3,
  50     TCG_REG_X4, TCG_REG_X5, TCG_REG_X6, TCG_REG_X7
  51 };
  52 static const int tcg_target_call_oarg_regs[1] = {
  53     TCG_REG_X0
  54 };
  55
  56 #define TCG_REG_TMP TCG_REG_X30
  57
  58 #ifndef CONFIG_SOFTMMU
  59 # ifdef CONFIG_USE_GUEST_BASE
  60 #  define TCG_REG_GUEST_BASE TCG_REG_X28
  61 # else
  62 #  define TCG_REG_GUEST_BASE TCG_REG_XZR
  63 # endif
  64 #endif
  65
  66 static inline void reloc_pc26(void *code_ptr, intptr_t target)
  67 {
  68     intptr_t offset = (target - (intptr_t)code_ptr) / 4;
  69     /* read instruction, mask away previous PC_REL26 parameter contents,
  70        set the proper offset, then write back the instruction. */
  71     uint32_t insn = *(uint32_t *)code_ptr;
  72     insn = deposit32(insn, 0, 26, offset);
  73     *(uint32_t *)code_ptr = insn;
  74 }
  75
  76 static inline void reloc_pc19(void *code_ptr, intptr_t target)
  77 {
  78     intptr_t offset = (target - (intptr_t)code_ptr) / 4;
  79     /* read instruction, mask away previous PC_REL19 parameter contents,
  80        set the proper offset, then write back the instruction. */
  81     uint32_t insn = *(uint32_t *)code_ptr;
  82     insn = deposit32(insn, 5, 19, offset);
  83     *(uint32_t *)code_ptr = insn;
  84 }
  85
  86 static inline void patch_reloc(uint8_t *code_ptr, int type,
  87                                intptr_t value, intptr_t addend)
  88 {
  89     value += addend;
  90
  91     switch (type) {
  92     case R_AARCH64_JUMP26:
  93     case R_AARCH64_CALL26:
  94         reloc_pc26(code_ptr, value);
  95         break;
  96     case R_AARCH64_CONDBR19:
  97         reloc_pc19(code_ptr, value);
  98         break;
  99
 100     default:
 101         tcg_abort();
 102     }
 103 }
 104
 105 #define TCG_CT_CONST_IS32 0x100
 106 #define TCG_CT_CONST_AIMM 0x200
 107 #define TCG_CT_CONST_LIMM 0x400
 108 #define TCG_CT_CONST_ZERO 0x800
 109 #define TCG_CT_CONST_MONE 0x1000
 110
 111 /* parse target specific constraints */
 112 static int target_parse_constraint(TCGArgConstraint *ct,
 113                                    const char **pct_str)
 114 {
 115     const char *ct_str = *pct_str;
 116
 117     switch (ct_str[0]) {
 118     case 'r':
 119         ct->ct |= TCG_CT_REG;
 120         tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 121         break;
 122     case 'l': /* qemu_ld / qemu_st address, data_reg */
 123         ct->ct |= TCG_CT_REG;
 124         tcg_regset_set32(ct->u.regs, 0, (1ULL << TCG_TARGET_NB_REGS) - 1);
 125 #ifdef CONFIG_SOFTMMU
 126         /* x0 and x1 will be overwritten when reading the tlb entry,
 127            and x2, and x3 for helper args, better to avoid using them. */
 128         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X0);
 129         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X1);
 130         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X2);
 131         tcg_regset_reset_reg(ct->u.regs, TCG_REG_X3);
 132 #endif
 133         break;
 134     case 'w': /* The operand should be considered 32-bit.  */
 135         ct->ct |= TCG_CT_CONST_IS32;
 136         break;
 137     case 'A': /* Valid for arithmetic immediate (positive or negative).  */
 138         ct->ct |= TCG_CT_CONST_AIMM;
 139         break;
 140     case 'L': /* Valid for logical immediate.  */
 141         ct->ct |= TCG_CT_CONST_LIMM;
 142         break;
 143     case 'M': /* minus one */
 144         ct->ct |= TCG_CT_CONST_MONE;
 145         break;
 146     case 'Z': /* zero */
 147         ct->ct |= TCG_CT_CONST_ZERO;
 148         break;
 149     default:
 150         return -1;
 151     }
 152
 153     ct_str++;
 154     *pct_str = ct_str;
 155     return 0;
 156 }
 157
 158 static inline bool is_aimm(uint64_t val)
 159 {
 160     return (val & ~0xfff) == 0 || (val & ~0xfff000) == 0;
 161 }
 162
 163 static inline bool is_limm(uint64_t val)
 164 {
 165     /* Taking a simplified view of the logical immediates for now, ignoring
 166        the replication that can happen across the field.  Match bit patterns
 167        of the forms
 168            0....01....1
 169            0..01..10..0
 170        and their inverses.  */
 171
 172     /* Make things easier below, by testing the form with msb clear. */
 173     if ((int64_t)val < 0) {
 174         val = ~val;
 175     }
 176     if (val == 0) {
 177         return false;
 178     }
 179     val += val & -val;
 180     return (val & (val - 1)) == 0;
 181 }
 182
 183 static int tcg_target_const_match(tcg_target_long val,
 184                                   const TCGArgConstraint *arg_ct)
 185 {
 186     int ct = arg_ct->ct;
 187
 188     if (ct & TCG_CT_CONST) {
 189         return 1;
 190     }
 191     if (ct & TCG_CT_CONST_IS32) {
 192         val = (int32_t)val;
 193     }
 194     if ((ct & TCG_CT_CONST_AIMM) && (is_aimm(val) || is_aimm(-val))) {
 195         return 1;
 196     }
 197     if ((ct & TCG_CT_CONST_LIMM) && is_limm(val)) {
 198         return 1;
 199     }
 200     if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
 201         return 1;
 202     }
 203     if ((ct & TCG_CT_CONST_MONE) && val == -1) {
 204         return 1;
 205     }
 206
 207     return 0;
 208 }
 209
 210 enum aarch64_cond_code {
 211     COND_EQ = 0x0,
 212     COND_NE = 0x1,
 213     COND_CS = 0x2,     /* Unsigned greater or equal */
 214     COND_HS = COND_CS, /* ALIAS greater or equal */
 215     COND_CC = 0x3,     /* Unsigned less than */
 216     COND_LO = COND_CC, /* ALIAS Lower */
 217     COND_MI = 0x4,     /* Negative */
 218     COND_PL = 0x5,     /* Zero or greater */
 219     COND_VS = 0x6,     /* Overflow */
 220     COND_VC = 0x7,     /* No overflow */
 221     COND_HI = 0x8,     /* Unsigned greater than */
 222     COND_LS = 0x9,     /* Unsigned less or equal */
 223     COND_GE = 0xa,
 224     COND_LT = 0xb,
 225     COND_GT = 0xc,
 226     COND_LE = 0xd,
 227     COND_AL = 0xe,
 228     COND_NV = 0xf, /* behaves like COND_AL here */
 229 };
 230
 231 static const enum aarch64_cond_code tcg_cond_to_aarch64[] = {
 232     [TCG_COND_EQ] = COND_EQ,
 233     [TCG_COND_NE] = COND_NE,
 234     [TCG_COND_LT] = COND_LT,
 235     [TCG_COND_GE] = COND_GE,
 236     [TCG_COND_LE] = COND_LE,
 237     [TCG_COND_GT] = COND_GT,
 238     /* unsigned */
 239     [TCG_COND_LTU] = COND_LO,
 240     [TCG_COND_GTU] = COND_HI,
 241     [TCG_COND_GEU] = COND_HS,
 242     [TCG_COND_LEU] = COND_LS,
 243 };
 244
 245 typedef enum {
 246     LDST_ST = 0,    /* store */
 247     LDST_LD = 1,    /* load */
 248     LDST_LD_S_X = 2,  /* load and sign-extend into Xt */
 249     LDST_LD_S_W = 3,  /* load and sign-extend into Wt */
 250 } AArch64LdstType;
 251
 252 /* We encode the format of the insn into the beginning of the name, so that
 253    we can have the preprocessor help "typecheck" the insn vs the output
 254    function.  Arm didn't provide us with nice names for the formats, so we
 255    use the section number of the architecture reference manual in which the
 256    instruction group is described.  */
 257 typedef enum {
 258     /* Compare and branch (immediate).  */
 259     I3201_CBZ       = 0x34000000,
 260     I3201_CBNZ      = 0x35000000,
 261
 262     /* Conditional branch (immediate).  */
 263     I3202_B_C       = 0x54000000,
 264
 265     /* Unconditional branch (immediate).  */
 266     I3206_B         = 0x14000000,
 267     I3206_BL        = 0x94000000,
 268
 269     /* Unconditional branch (register).  */
 270     I3207_BR        = 0xd61f0000,
 271     I3207_BLR       = 0xd63f0000,
 272     I3207_RET       = 0xd65f0000,
 273
 274     /* Load/store register.  Described here as 3.3.12, but the helper
 275        that emits them can transform to 3.3.10 or 3.3.13.  */
 276     I3312_STRB      = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
 277     I3312_STRH      = 0x38000000 | LDST_ST << 22 | MO_16 << 30,
 278     I3312_STRW      = 0x38000000 | LDST_ST << 22 | MO_32 << 30,
 279     I3312_STRX      = 0x38000000 | LDST_ST << 22 | MO_64 << 30,
 280
 281     I3312_LDRB      = 0x38000000 | LDST_LD << 22 | MO_8 << 30,
 282     I3312_LDRH      = 0x38000000 | LDST_LD << 22 | MO_16 << 30,
 283     I3312_LDRW      = 0x38000000 | LDST_LD << 22 | MO_32 << 30,
 284     I3312_LDRX      = 0x38000000 | LDST_LD << 22 | MO_64 << 30,
 285
 286     I3312_LDRSBW    = 0x38000000 | LDST_LD_S_W << 22 | MO_8 << 30,
 287     I3312_LDRSHW    = 0x38000000 | LDST_LD_S_W << 22 | MO_16 << 30,
 288
 289     I3312_LDRSBX    = 0x38000000 | LDST_LD_S_X << 22 | MO_8 << 30,
 290     I3312_LDRSHX    = 0x38000000 | LDST_LD_S_X << 22 | MO_16 << 30,
 291     I3312_LDRSWX    = 0x38000000 | LDST_LD_S_X << 22 | MO_32 << 30,
 292
 293     I3312_TO_I3310  = 0x00206800,
 294     I3312_TO_I3313  = 0x01000000,
 295
 296     /* Load/store register pair instructions.  */
 297     I3314_LDP       = 0x28400000,
 298     I3314_STP       = 0x28000000,
 299
 300     /* Add/subtract immediate instructions.  */
 301     I3401_ADDI      = 0x11000000,
 302     I3401_ADDSI     = 0x31000000,
 303     I3401_SUBI      = 0x51000000,
 304     I3401_SUBSI     = 0x71000000,
 305
 306     /* Bitfield instructions.  */
 307     I3402_BFM       = 0x33000000,
 308     I3402_SBFM      = 0x13000000,
 309     I3402_UBFM      = 0x53000000,
 310
 311     /* Extract instruction.  */
 312     I3403_EXTR      = 0x13800000,
 313
 314     /* Logical immediate instructions.  */
 315     I3404_ANDI      = 0x12000000,
 316     I3404_ORRI      = 0x32000000,
 317     I3404_EORI      = 0x52000000,
 318
 319     /* Move wide immediate instructions.  */
 320     I3405_MOVN      = 0x12800000,
 321     I3405_MOVZ      = 0x52800000,
 322     I3405_MOVK      = 0x72800000,
 323
 324     /* PC relative addressing instructions.  */
 325     I3406_ADR       = 0x10000000,
 326     I3406_ADRP      = 0x90000000,
 327
 328     /* Add/subtract shifted register instructions (without a shift).  */
 329     I3502_ADD       = 0x0b000000,
 330     I3502_ADDS      = 0x2b000000,
 331     I3502_SUB       = 0x4b000000,
 332     I3502_SUBS      = 0x6b000000,
 333
 334     /* Add/subtract shifted register instructions (with a shift).  */
 335     I3502S_ADD_LSL  = I3502_ADD,
 336
 337     /* Add/subtract with carry instructions.  */
 338     I3503_ADC       = 0x1a000000,
 339     I3503_SBC       = 0x5a000000,
 340
 341     /* Conditional select instructions.  */
 342     I3506_CSEL      = 0x1a800000,
 343     I3506_CSINC     = 0x1a800400,
 344
 345     /* Data-processing (1 source) instructions.  */
 346     I3507_REV16     = 0x5ac00400,
 347     I3507_REV32     = 0x5ac00800,
 348     I3507_REV64     = 0x5ac00c00,
 349
 350     /* Data-processing (2 source) instructions.  */
 351     I3508_LSLV      = 0x1ac02000,
 352     I3508_LSRV      = 0x1ac02400,
 353     I3508_ASRV      = 0x1ac02800,
 354     I3508_RORV      = 0x1ac02c00,
 355     I3508_SMULH     = 0x9b407c00,
 356     I3508_UMULH     = 0x9bc07c00,
 357     I3508_UDIV      = 0x1ac00800,
 358     I3508_SDIV      = 0x1ac00c00,
 359
 360     /* Data-processing (3 source) instructions.  */
 361     I3509_MADD      = 0x1b000000,
 362     I3509_MSUB      = 0x1b008000,
 363
 364     /* Logical shifted register instructions (without a shift).  */
 365     I3510_AND       = 0x0a000000,
 366     I3510_BIC       = 0x0a200000,
 367     I3510_ORR       = 0x2a000000,
 368     I3510_ORN       = 0x2a200000,
 369     I3510_EOR       = 0x4a000000,
 370     I3510_EON       = 0x4a200000,
 371     I3510_ANDS      = 0x6a000000,
 372 } AArch64Insn;
 373
 374 static inline uint32_t tcg_in32(TCGContext *s)
 375 {
 376     uint32_t v = *(uint32_t *)s->code_ptr;
 377     return v;
 378 }
 379
 380 /* Emit an opcode with "type-checking" of the format.  */
 381 #define tcg_out_insn(S, FMT, OP, ...) \
 382     glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
 383
 384 static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
 385                               TCGReg rt, int imm19)
 386 {
 387     tcg_out32(s, insn | ext << 31 | (imm19 & 0x7ffff) << 5 | rt);
 388 }
 389
 390 static void tcg_out_insn_3202(TCGContext *s, AArch64Insn insn,
 391                               TCGCond c, int imm19)
 392 {
 393     tcg_out32(s, insn | tcg_cond_to_aarch64[c] | (imm19 & 0x7ffff) << 5);
 394 }
 395
 396 static void tcg_out_insn_3206(TCGContext *s, AArch64Insn insn, int imm26)
 397 {
 398     tcg_out32(s, insn | (imm26 & 0x03ffffff));
 399 }
 400
 401 static void tcg_out_insn_3207(TCGContext *s, AArch64Insn insn, TCGReg rn)
 402 {
 403     tcg_out32(s, insn | rn << 5);
 404 }
 405
 406 static void tcg_out_insn_3314(TCGContext *s, AArch64Insn insn,
 407                               TCGReg r1, TCGReg r2, TCGReg rn,
 408                               tcg_target_long ofs, bool pre, bool w)
 409 {
 410     insn |= 1u << 31; /* ext */
 411     insn |= pre << 24;
 412     insn |= w << 23;
 413
 414     assert(ofs >= -0x200 && ofs < 0x200 && (ofs & 7) == 0);
 415     insn |= (ofs & (0x7f << 3)) << (15 - 3);
 416
 417     tcg_out32(s, insn | r2 << 10 | rn << 5 | r1);
 418 }
 419
 420 static void tcg_out_insn_3401(TCGContext *s, AArch64Insn insn, TCGType ext,
 421                               TCGReg rd, TCGReg rn, uint64_t aimm)
 422 {
 423     if (aimm > 0xfff) {
 424         assert((aimm & 0xfff) == 0);
 425         aimm >>= 12;
 426         assert(aimm <= 0xfff);
 427         aimm |= 1 << 12;  /* apply LSL 12 */
 428     }
 429     tcg_out32(s, insn | ext << 31 | aimm << 10 | rn << 5 | rd);
 430 }
 431
 432 /* This function can be used for both 3.4.2 (Bitfield) and 3.4.4
 433    (Logical immediate).  Both insn groups have N, IMMR and IMMS fields
 434    that feed the DecodeBitMasks pseudo function.  */
 435 static void tcg_out_insn_3402(TCGContext *s, AArch64Insn insn, TCGType ext,
 436                               TCGReg rd, TCGReg rn, int n, int immr, int imms)
 437 {
 438     tcg_out32(s, insn | ext << 31 | n << 22 | immr << 16 | imms << 10
 439               | rn << 5 | rd);
 440 }
 441
 442 #define tcg_out_insn_3404  tcg_out_insn_3402
 443
 444 static void tcg_out_insn_3403(TCGContext *s, AArch64Insn insn, TCGType ext,
 445                               TCGReg rd, TCGReg rn, TCGReg rm, int imms)
 446 {
 447     tcg_out32(s, insn | ext << 31 | ext << 22 | rm << 16 | imms << 10
 448               | rn << 5 | rd);
 449 }
 450
 451 /* This function is used for the Move (wide immediate) instruction group.
 452    Note that SHIFT is a full shift count, not the 2 bit HW field. */
 453 static void tcg_out_insn_3405(TCGContext *s, AArch64Insn insn, TCGType ext,
 454                               TCGReg rd, uint16_t half, unsigned shift)
 455 {
 456     assert((shift & ~0x30) == 0);
 457     tcg_out32(s, insn | ext << 31 | shift << (21 - 4) | half << 5 | rd);
 458 }
 459
 460 static void tcg_out_insn_3406(TCGContext *s, AArch64Insn insn,
 461                               TCGReg rd, int64_t disp)
 462 {
 463     tcg_out32(s, insn | (disp & 3) << 29 | (disp & 0x1ffffc) << (5 - 2) | rd);
 464 }
 465
 466 /* This function is for both 3.5.2 (Add/Subtract shifted register), for
 467    the rare occasion when we actually want to supply a shift amount.  */
 468 static inline void tcg_out_insn_3502S(TCGContext *s, AArch64Insn insn,
 469                                       TCGType ext, TCGReg rd, TCGReg rn,
 470                                       TCGReg rm, int imm6)
 471 {
 472     tcg_out32(s, insn | ext << 31 | rm << 16 | imm6 << 10 | rn << 5 | rd);
 473 }
 474
 475 /* This function is for 3.5.2 (Add/subtract shifted register),
 476    and 3.5.10 (Logical shifted register), for the vast majorty of cases
 477    when we don't want to apply a shift.  Thus it can also be used for
 478    3.5.3 (Add/subtract with carry) and 3.5.8 (Data processing 2 source).  */
 479 static void tcg_out_insn_3502(TCGContext *s, AArch64Insn insn, TCGType ext,
 480                               TCGReg rd, TCGReg rn, TCGReg rm)
 481 {
 482     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd);
 483 }
 484
 485 #define tcg_out_insn_3503  tcg_out_insn_3502
 486 #define tcg_out_insn_3508  tcg_out_insn_3502
 487 #define tcg_out_insn_3510  tcg_out_insn_3502
 488
 489 static void tcg_out_insn_3506(TCGContext *s, AArch64Insn insn, TCGType ext,
 490                               TCGReg rd, TCGReg rn, TCGReg rm, TCGCond c)
 491 {
 492     tcg_out32(s, insn | ext << 31 | rm << 16 | rn << 5 | rd
 493               | tcg_cond_to_aarch64[c] << 12);
 494 }
 495
 496 static void tcg_out_insn_3507(TCGContext *s, AArch64Insn insn, TCGType ext,
 497                               TCGReg rd, TCGReg rn)
 498 {
 499     tcg_out32(s, insn | ext << 31 | rn << 5 | rd);
 500 }
 501
 502 static void tcg_out_insn_3509(TCGContext *s, AArch64Insn insn, TCGType ext,
 503                               TCGReg rd, TCGReg rn, TCGReg rm, TCGReg ra)
 504 {
 505     tcg_out32(s, insn | ext << 31 | rm << 16 | ra << 10 | rn << 5 | rd);
 506 }
 507
 508 static void tcg_out_insn_3310(TCGContext *s, AArch64Insn insn,
 509                               TCGReg rd, TCGReg base, TCGReg regoff)
 510 {
 511     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 512     tcg_out32(s, insn | I3312_TO_I3310 | regoff << 16 | base << 5 | rd);
 513 }
 514
 515
 516 static void tcg_out_insn_3312(TCGContext *s, AArch64Insn insn,
 517                               TCGReg rd, TCGReg rn, intptr_t offset)
 518 {
 519     tcg_out32(s, insn | (offset & 0x1ff) << 12 | rn << 5 | rd);
 520 }
 521
 522 static void tcg_out_insn_3313(TCGContext *s, AArch64Insn insn,
 523                               TCGReg rd, TCGReg rn, uintptr_t scaled_uimm)
 524 {
 525     /* Note the AArch64Insn constants above are for C3.3.12.  Adjust.  */
 526     tcg_out32(s, insn | I3312_TO_I3313 | scaled_uimm << 10 | rn << 5 | rd);
 527 }
 528
 529 /* Register to register move using ORR (shifted register with no shift). */
 530 static void tcg_out_movr(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rm)
 531 {
 532     tcg_out_insn(s, 3510, ORR, ext, rd, TCG_REG_XZR, rm);
 533 }
 534
 535 /* Register to register move using ADDI (move to/from SP).  */
 536 static void tcg_out_movr_sp(TCGContext *s, TCGType ext, TCGReg rd, TCGReg rn)
 537 {
 538     tcg_out_insn(s, 3401, ADDI, ext, rd, rn, 0);
 539 }
 540
 541 /* This function is used for the Logical (immediate) instruction group.
 542    The value of LIMM must satisfy IS_LIMM.  See the comment above about
 543    only supporting simplified logical immediates.  */
 544 static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
 545                              TCGReg rd, TCGReg rn, uint64_t limm)
 546 {
 547     unsigned h, l, r, c;
 548
 549     assert(is_limm(limm));
 550
 551     h = clz64(limm);
 552     l = ctz64(limm);
 553     if (l == 0) {
 554         r = 0;                  /* form 0....01....1 */
 555         c = ctz64(~limm) - 1;
 556         if (h == 0) {
 557             r = clz64(~limm);   /* form 1..10..01..1 */
 558             c += r;
 559         }
 560     } else {
 561         r = 64 - l;             /* form 1....10....0 or 0..01..10..0 */
 562         c = r - h - 1;
 563     }
 564     if (ext == TCG_TYPE_I32) {
 565         r &= 31;
 566         c &= 31;
 567     }
 568
 569     tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
 570 }
 571
 572 static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
 573                          tcg_target_long value)
 574 {
 575     AArch64Insn insn;
 576     int i, wantinv, shift;
 577     tcg_target_long svalue = value;
 578     tcg_target_long ivalue = ~value;
 579     tcg_target_long imask;
 580
 581     /* For 32-bit values, discard potential garbage in value.  For 64-bit
 582        values within [2**31, 2**32-1], we can create smaller sequences by
 583        interpreting this as a negative 32-bit number, while ensuring that
 584        the high 32 bits are cleared by setting SF=0.  */
 585     if (type == TCG_TYPE_I32 || (value & ~0xffffffffull) == 0) {
 586         svalue = (int32_t)value;
 587         value = (uint32_t)value;
 588         ivalue = (uint32_t)ivalue;
 589         type = TCG_TYPE_I32;
 590     }
 591
 592     /* Speed things up by handling the common case of small positive
 593        and negative values specially.  */
 594     if ((value & ~0xffffull) == 0) {
 595         tcg_out_insn(s, 3405, MOVZ, type, rd, value, 0);
 596         return;
 597     } else if ((ivalue & ~0xffffull) == 0) {
 598         tcg_out_insn(s, 3405, MOVN, type, rd, ivalue, 0);
 599         return;
 600     }
 601
 602     /* Check for bitfield immediates.  For the benefit of 32-bit quantities,
 603        use the sign-extended value.  That lets us match rotated values such
 604        as 0xff0000ff with the same 64-bit logic matching 0xffffffffff0000ff. */
 605     if (is_limm(svalue)) {
 606         tcg_out_logicali(s, I3404_ORRI, type, rd, TCG_REG_XZR, svalue);
 607         return;
 608     }
 609
 610     /* Look for host pointer values within 4G of the PC.  This happens
 611        often when loading pointers to QEMU's own data structures.  */
 612     if (type == TCG_TYPE_I64) {
 613         tcg_target_long disp = (value >> 12) - ((intptr_t)s->code_ptr >> 12);
 614         if (disp == sextract64(disp, 0, 21)) {
 615             tcg_out_insn(s, 3406, ADRP, rd, disp);
 616             if (value & 0xfff) {
 617                 tcg_out_insn(s, 3401, ADDI, type, rd, rd, value & 0xfff);
 618             }
 619             return;
 620         }
 621     }
 622
 623     /* Would it take fewer insns to begin with MOVN?  For the value and its
 624        inverse, count the number of 16-bit lanes that are 0.  */
 625     for (i = wantinv = imask = 0; i < 64; i += 16) {
 626         tcg_target_long mask = 0xffffull << i;
 627         if ((value & mask) == 0) {
 628             wantinv -= 1;
 629         }
 630         if ((ivalue & mask) == 0) {
 631             wantinv += 1;
 632             imask |= mask;
 633         }
 634     }
 635
 636     /* If we had more 0xffff than 0x0000, invert VALUE and use MOVN.  */
 637     insn = I3405_MOVZ;
 638     if (wantinv > 0) {
 639         value = ivalue;
 640         insn = I3405_MOVN;
 641     }
 642
 643     /* Find the lowest lane that is not 0x0000.  */
 644     shift = ctz64(value) & (63 & -16);
 645     tcg_out_insn_3405(s, insn, type, rd, value >> shift, shift);
 646
 647     if (wantinv > 0) {
 648         /* Re-invert the value, so MOVK sees non-inverted bits.  */
 649         value = ~value;
 650         /* Clear out all the 0xffff lanes.  */
 651         value ^= imask;
 652     }
 653     /* Clear out the lane that we just set.  */
 654     value &= ~(0xffffUL << shift);
 655
 656     /* Iterate until all lanes have been set, and thus cleared from VALUE.  */
 657     while (value) {
 658         shift = ctz64(value) & (63 & -16);
 659         tcg_out_insn(s, 3405, MOVK, type, rd, value >> shift, shift);
 660         value &= ~(0xffffUL << shift);
 661     }
 662 }
 663
 664 /* Define something more legible for general use.  */
 665 #define tcg_out_ldst_r  tcg_out_insn_3310
 666
 667 static void tcg_out_ldst(TCGContext *s, AArch64Insn insn,
 668                          TCGReg rd, TCGReg rn, intptr_t offset)
 669 {
 670     TCGMemOp size = (uint32_t)insn >> 30;
 671
 672     /* If the offset is naturally aligned and in range, then we can
 673        use the scaled uimm12 encoding */
 674     if (offset >= 0 && !(offset & ((1 << size) - 1))) {
 675         uintptr_t scaled_uimm = offset >> size;
 676         if (scaled_uimm <= 0xfff) {
 677             tcg_out_insn_3313(s, insn, rd, rn, scaled_uimm);
 678             return;
 679         }
 680     }
 681
 682     /* Small signed offsets can use the unscaled encoding.  */
 683     if (offset >= -256 && offset < 256) {
 684         tcg_out_insn_3312(s, insn, rd, rn, offset);
 685         return;
 686     }
 687
 688     /* Worst-case scenario, move offset to temp register, use reg offset.  */
 689     tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, offset);
 690     tcg_out_ldst_r(s, insn, rd, rn, TCG_REG_TMP);
 691 }
 692
 693 static inline void tcg_out_mov(TCGContext *s,
 694                                TCGType type, TCGReg ret, TCGReg arg)
 695 {
 696     if (ret != arg) {
 697         tcg_out_movr(s, type, ret, arg);
 698     }
 699 }
 700
 701 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
 702                               TCGReg arg1, intptr_t arg2)
 703 {
 704     tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_LDRW : I3312_LDRX,
 705                  arg, arg1, arg2);
 706 }
 707
 708 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
 709                               TCGReg arg1, intptr_t arg2)
 710 {
 711     tcg_out_ldst(s, type == TCG_TYPE_I32 ? I3312_STRW : I3312_STRX,
 712                  arg, arg1, arg2);
 713 }
 714
 715 static inline void tcg_out_bfm(TCGContext *s, TCGType ext, TCGReg rd,
 716                                TCGReg rn, unsigned int a, unsigned int b)
 717 {
 718     tcg_out_insn(s, 3402, BFM, ext, rd, rn, ext, a, b);
 719 }
 720
 721 static inline void tcg_out_ubfm(TCGContext *s, TCGType ext, TCGReg rd,
 722                                 TCGReg rn, unsigned int a, unsigned int b)
 723 {
 724     tcg_out_insn(s, 3402, UBFM, ext, rd, rn, ext, a, b);
 725 }
 726
 727 static inline void tcg_out_sbfm(TCGContext *s, TCGType ext, TCGReg rd,
 728                                 TCGReg rn, unsigned int a, unsigned int b)
 729 {
 730     tcg_out_insn(s, 3402, SBFM, ext, rd, rn, ext, a, b);
 731 }
 732
 733 static inline void tcg_out_extr(TCGContext *s, TCGType ext, TCGReg rd,
 734                                 TCGReg rn, TCGReg rm, unsigned int a)
 735 {
 736     tcg_out_insn(s, 3403, EXTR, ext, rd, rn, rm, a);
 737 }
 738
 739 static inline void tcg_out_shl(TCGContext *s, TCGType ext,
 740                                TCGReg rd, TCGReg rn, unsigned int m)
 741 {
 742     int bits = ext ? 64 : 32;
 743     int max = bits - 1;
 744     tcg_out_ubfm(s, ext, rd, rn, bits - (m & max), max - (m & max));
 745 }
 746
 747 static inline void tcg_out_shr(TCGContext *s, TCGType ext,
 748                                TCGReg rd, TCGReg rn, unsigned int m)
 749 {
 750     int max = ext ? 63 : 31;
 751     tcg_out_ubfm(s, ext, rd, rn, m & max, max);
 752 }
 753
 754 static inline void tcg_out_sar(TCGContext *s, TCGType ext,
 755                                TCGReg rd, TCGReg rn, unsigned int m)
 756 {
 757     int max = ext ? 63 : 31;
 758     tcg_out_sbfm(s, ext, rd, rn, m & max, max);
 759 }
 760
 761 static inline void tcg_out_rotr(TCGContext *s, TCGType ext,
 762                                 TCGReg rd, TCGReg rn, unsigned int m)
 763 {
 764     int max = ext ? 63 : 31;
 765     tcg_out_extr(s, ext, rd, rn, rn, m & max);
 766 }
 767
 768 static inline void tcg_out_rotl(TCGContext *s, TCGType ext,
 769                                 TCGReg rd, TCGReg rn, unsigned int m)
 770 {
 771     int bits = ext ? 64 : 32;
 772     int max = bits - 1;
 773     tcg_out_extr(s, ext, rd, rn, rn, bits - (m & max));
 774 }
 775
 776 static inline void tcg_out_dep(TCGContext *s, TCGType ext, TCGReg rd,
 777                                TCGReg rn, unsigned lsb, unsigned width)
 778 {
 779     unsigned size = ext ? 64 : 32;
 780     unsigned a = (size - lsb) & (size - 1);
 781     unsigned b = width - 1;
 782     tcg_out_bfm(s, ext, rd, rn, a, b);
 783 }
 784
 785 static void tcg_out_cmp(TCGContext *s, TCGType ext, TCGReg a,
 786                         tcg_target_long b, bool const_b)
 787 {
 788     if (const_b) {
 789         /* Using CMP or CMN aliases.  */
 790         if (b >= 0) {
 791             tcg_out_insn(s, 3401, SUBSI, ext, TCG_REG_XZR, a, b);
 792         } else {
 793             tcg_out_insn(s, 3401, ADDSI, ext, TCG_REG_XZR, a, -b);
 794         }
 795     } else {
 796         /* Using CMP alias SUBS wzr, Wn, Wm */
 797         tcg_out_insn(s, 3502, SUBS, ext, TCG_REG_XZR, a, b);
 798     }
 799 }
 800
 801 static inline void tcg_out_goto(TCGContext *s, intptr_t target)
 802 {
 803     intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
 804
 805     if (offset < -0x02000000 || offset >= 0x02000000) {
 806         /* out of 26bit range */
 807         tcg_abort();
 808     }
 809
 810     tcg_out_insn(s, 3206, B, offset);
 811 }
 812
 813 static inline void tcg_out_goto_noaddr(TCGContext *s)
 814 {
 815     /* We pay attention here to not modify the branch target by reading from
 816        the buffer. This ensure that caches and memory are kept coherent during
 817        retranslation.  Mask away possible garbage in the high bits for the
 818        first translation, while keeping the offset bits for retranslation. */
 819     uint32_t old = tcg_in32(s);
 820     tcg_out_insn(s, 3206, B, old);
 821 }
 822
 823 static inline void tcg_out_goto_cond_noaddr(TCGContext *s, TCGCond c)
 824 {
 825     /* See comments in tcg_out_goto_noaddr.  */
 826     uint32_t old = tcg_in32(s) >> 5;
 827     tcg_out_insn(s, 3202, B_C, c, old);
 828 }
 829
 830 static inline void tcg_out_callr(TCGContext *s, TCGReg reg)
 831 {
 832     tcg_out_insn(s, 3207, BLR, reg);
 833 }
 834
 835 static inline void tcg_out_call(TCGContext *s, intptr_t target)
 836 {
 837     intptr_t offset = (target - (intptr_t)s->code_ptr) / 4;
 838
 839     if (offset < -0x02000000 || offset >= 0x02000000) { /* out of 26bit rng */
 840         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, target);
 841         tcg_out_callr(s, TCG_REG_TMP);
 842     } else {
 843         tcg_out_insn(s, 3206, BL, offset);
 844     }
 845 }
 846
 847 void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
 848 {
 849     intptr_t target = addr;
 850     intptr_t offset = (target - (intptr_t)jmp_addr) / 4;
 851
 852     if (offset < -0x02000000 || offset >= 0x02000000) {
 853         /* out of 26bit range */
 854         tcg_abort();
 855     }
 856
 857     patch_reloc((uint8_t *)jmp_addr, R_AARCH64_JUMP26, target, 0);
 858     flush_icache_range(jmp_addr, jmp_addr + 4);
 859 }
 860
 861 static inline void tcg_out_goto_label(TCGContext *s, int label_index)
 862 {
 863     TCGLabel *l = &s->labels[label_index];
 864
 865     if (!l->has_value) {
 866         tcg_out_reloc(s, s->code_ptr, R_AARCH64_JUMP26, label_index, 0);
 867         tcg_out_goto_noaddr(s);
 868     } else {
 869         tcg_out_goto(s, l->u.value);
 870     }
 871 }
 872
 873 static void tcg_out_brcond(TCGContext *s, TCGMemOp ext, TCGCond c, TCGArg a,
 874                            TCGArg b, bool b_const, int label)
 875 {
 876     TCGLabel *l = &s->labels[label];
 877     intptr_t offset;
 878     bool need_cmp;
 879
 880     if (b_const && b == 0 && (c == TCG_COND_EQ || c == TCG_COND_NE)) {
 881         need_cmp = false;
 882     } else {
 883         need_cmp = true;
 884         tcg_out_cmp(s, ext, a, b, b_const);
 885     }
 886
 887     if (!l->has_value) {
 888         tcg_out_reloc(s, s->code_ptr, R_AARCH64_CONDBR19, label, 0);
 889         offset = tcg_in32(s) >> 5;
 890     } else {
 891         offset = l->u.value - (uintptr_t)s->code_ptr;
 892         offset >>= 2;
 893         assert(offset >= -0x40000 && offset < 0x40000);
 894     }
 895
 896     if (need_cmp) {
 897         tcg_out_insn(s, 3202, B_C, c, offset);
 898     } else if (c == TCG_COND_EQ) {
 899         tcg_out_insn(s, 3201, CBZ, ext, a, offset);
 900     } else {
 901         tcg_out_insn(s, 3201, CBNZ, ext, a, offset);
 902     }
 903 }
 904
 905 static inline void tcg_out_rev64(TCGContext *s, TCGReg rd, TCGReg rn)
 906 {
 907     tcg_out_insn(s, 3507, REV64, TCG_TYPE_I64, rd, rn);
 908 }
 909
 910 static inline void tcg_out_rev32(TCGContext *s, TCGReg rd, TCGReg rn)
 911 {
 912     tcg_out_insn(s, 3507, REV32, TCG_TYPE_I32, rd, rn);
 913 }
 914
 915 static inline void tcg_out_rev16(TCGContext *s, TCGReg rd, TCGReg rn)
 916 {
 917     tcg_out_insn(s, 3507, REV16, TCG_TYPE_I32, rd, rn);
 918 }
 919
 920 static inline void tcg_out_sxt(TCGContext *s, TCGType ext, TCGMemOp s_bits,
 921                                TCGReg rd, TCGReg rn)
 922 {
 923     /* Using ALIASes SXTB, SXTH, SXTW, of SBFM Xd, Xn, #0, #7|15|31 */
 924     int bits = (8 << s_bits) - 1;
 925     tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 926 }
 927
 928 static inline void tcg_out_uxt(TCGContext *s, TCGMemOp s_bits,
 929                                TCGReg rd, TCGReg rn)
 930 {
 931     /* Using ALIASes UXTB, UXTH of UBFM Wd, Wn, #0, #7|15 */
 932     int bits = (8 << s_bits) - 1;
 933     tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 934 }
 935
 936 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 937                             TCGReg rn, int64_t aimm)
 938 {
 939     if (aimm >= 0) {
 940         tcg_out_insn(s, 3401, ADDI, ext, rd, rn, aimm);
 941     } else {
 942         tcg_out_insn(s, 3401, SUBI, ext, rd, rn, -aimm);
 943     }
 944 }
 945
 946 static inline void tcg_out_addsub2(TCGContext *s, int ext, TCGReg rl,
 947                                    TCGReg rh, TCGReg al, TCGReg ah,
 948                                    tcg_target_long bl, tcg_target_long bh,
 949                                    bool const_bl, bool const_bh, bool sub)
 950 {
 951     TCGReg orig_rl = rl;
 952     AArch64Insn insn;
 953
 954     if (rl == ah || (!const_bh && rl == bh)) {
 955         rl = TCG_REG_TMP;
 956     }
 957
 958     if (const_bl) {
 959         insn = I3401_ADDSI;
 960         if ((bl < 0) ^ sub) {
 961             insn = I3401_SUBSI;
 962             bl = -bl;
 963         }
 964         tcg_out_insn_3401(s, insn, ext, rl, al, bl);
 965     } else {
 966         tcg_out_insn_3502(s, sub ? I3502_SUBS : I3502_ADDS, ext, rl, al, bl);
 967     }
 968
 969     insn = I3503_ADC;
 970     if (const_bh) {
 971         /* Note that the only two constants we support are 0 and -1, and
 972            that SBC = rn + ~rm + c, so adc -1 is sbc 0, and vice-versa.  */
 973         if ((bh != 0) ^ sub) {
 974             insn = I3503_SBC;
 975         }
 976         bh = TCG_REG_XZR;
 977     } else if (sub) {
 978         insn = I3503_SBC;
 979     }
 980     tcg_out_insn_3503(s, insn, ext, rh, ah, bh);
 981
 982     tcg_out_mov(s, ext, orig_rl, rl);
 983 }
 984
 985 #ifdef CONFIG_SOFTMMU
 986 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
 987  *                                     int mmu_idx, uintptr_t ra)
 988  */
 989 static const void * const qemu_ld_helpers[16] = {
 990     [MO_UB]   = helper_ret_ldub_mmu,
 991     [MO_LEUW] = helper_le_lduw_mmu,
 992     [MO_LEUL] = helper_le_ldul_mmu,
 993     [MO_LEQ]  = helper_le_ldq_mmu,
 994     [MO_BEUW] = helper_be_lduw_mmu,
 995     [MO_BEUL] = helper_be_ldul_mmu,
 996     [MO_BEQ]  = helper_be_ldq_mmu,
 997 };
 998
 999 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
1000  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
1001  */
1002 static const void * const qemu_st_helpers[16] = {
1003     [MO_UB]   = helper_ret_stb_mmu,
1004     [MO_LEUW] = helper_le_stw_mmu,
1005     [MO_LEUL] = helper_le_stl_mmu,
1006     [MO_LEQ]  = helper_le_stq_mmu,
1007     [MO_BEUW] = helper_be_stw_mmu,
1008     [MO_BEUL] = helper_be_stl_mmu,
1009     [MO_BEQ]  = helper_be_stq_mmu,
1010 };
1011
1012 static inline void tcg_out_adr(TCGContext *s, TCGReg rd, uintptr_t addr)
1013 {
1014     addr -= (uintptr_t)s->code_ptr;
1015     assert(addr == sextract64(addr, 0, 21));
1016     tcg_out_insn(s, 3406, ADR, rd, addr);
1017 }
1018
1019 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1020 {
1021     TCGMemOp opc = lb->opc;
1022     TCGMemOp size = opc & MO_SIZE;
1023
1024     reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1025
1026     tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1027     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1028     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, lb->mem_index);
1029     tcg_out_adr(s, TCG_REG_X3, (intptr_t)lb->raddr);
1030     tcg_out_call(s, (intptr_t)qemu_ld_helpers[opc & ~MO_SIGN]);
1031     if (opc & MO_SIGN) {
1032         tcg_out_sxt(s, TCG_TYPE_I64, size, lb->datalo_reg, TCG_REG_X0);
1033     } else {
1034         tcg_out_mov(s, size == MO_64, lb->datalo_reg, TCG_REG_X0);
1035     }
1036
1037     tcg_out_goto(s, (intptr_t)lb->raddr);
1038 }
1039
1040 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1041 {
1042     TCGMemOp opc = lb->opc;
1043     TCGMemOp size = opc & MO_SIZE;
1044
1045     reloc_pc19(lb->label_ptr[0], (intptr_t)s->code_ptr);
1046
1047     tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_X0, TCG_AREG0);
1048     tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
1049     tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
1050     tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, lb->mem_index);
1051     tcg_out_adr(s, TCG_REG_X4, (intptr_t)lb->raddr);
1052     tcg_out_call(s, (intptr_t)qemu_st_helpers[opc]);
1053     tcg_out_goto(s, (intptr_t)lb->raddr);
1054 }
1055
1056 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
1057                                 TCGReg data_reg, TCGReg addr_reg,
1058                                 int mem_index,
1059                                 uint8_t *raddr, uint8_t *label_ptr)
1060 {
1061     TCGLabelQemuLdst *label = new_ldst_label(s);
1062
1063     label->is_ld = is_ld;
1064     label->opc = opc;
1065     label->datalo_reg = data_reg;
1066     label->addrlo_reg = addr_reg;
1067     label->mem_index = mem_index;
1068     label->raddr = raddr;
1069     label->label_ptr[0] = label_ptr;
1070 }
1071
1072 /* Load and compare a TLB entry, emitting the conditional jump to the
1073    slow path for the failure case, which will be patched later when finalizing
1074    the slow path. Generated code returns the host addend in X1,
1075    clobbers X0,X2,X3,TMP. */
1076 static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp s_bits,
1077                              uint8_t **label_ptr, int mem_index, bool is_read)
1078 {
1079     TCGReg base = TCG_AREG0;
1080     int tlb_offset = is_read ?
1081         offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
1082         : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
1083
1084     /* Extract the TLB index from the address into X0.
1085        X0<CPU_TLB_BITS:0> =
1086        addr_reg<TARGET_PAGE_BITS+CPU_TLB_BITS:TARGET_PAGE_BITS> */
1087     tcg_out_ubfm(s, TARGET_LONG_BITS == 64, TCG_REG_X0, addr_reg,
1088                  TARGET_PAGE_BITS, TARGET_PAGE_BITS + CPU_TLB_BITS);
1089
1090     /* Store the page mask part of the address and the low s_bits into X3.
1091        Later this allows checking for equality and alignment at the same time.
1092        X3 = addr_reg & (PAGE_MASK | ((1 << s_bits) - 1)) */
1093     tcg_out_logicali(s, I3404_ANDI, TARGET_LONG_BITS == 64, TCG_REG_X3,
1094                      addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
1095
1096     /* Add any "high bits" from the tlb offset to the env address into X2,
1097        to take advantage of the LSL12 form of the ADDI instruction.
1098        X2 = env + (tlb_offset & 0xfff000) */
1099     if (tlb_offset & 0xfff000) {
1100         tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_X2, base,
1101                      tlb_offset & 0xfff000);
1102         base = TCG_REG_X2;
1103     }
1104
1105     /* Merge the tlb index contribution into X2.
1106        X2 = X2 + (X0 << CPU_TLB_ENTRY_BITS) */
1107     tcg_out_insn(s, 3502S, ADD_LSL, TCG_TYPE_I64, TCG_REG_X2, base,
1108                  TCG_REG_X0, CPU_TLB_ENTRY_BITS);
1109
1110     /* Merge "low bits" from tlb offset, load the tlb comparator into X0.
1111        X0 = load [X2 + (tlb_offset & 0x000fff)] */
1112     tcg_out_ldst(s, TARGET_LONG_BITS == 32 ? I3312_LDRW : I3312_LDRX,
1113                  TCG_REG_X0, TCG_REG_X2, tlb_offset & 0xfff);
1114
1115     /* Load the tlb addend. Do that early to avoid stalling.
1116        X1 = load [X2 + (tlb_offset & 0xfff) + offsetof(addend)] */
1117     tcg_out_ldst(s, I3312_LDRX, TCG_REG_X1, TCG_REG_X2,
1118                  (tlb_offset & 0xfff) + (offsetof(CPUTLBEntry, addend)) -
1119                  (is_read ? offsetof(CPUTLBEntry, addr_read)
1120                   : offsetof(CPUTLBEntry, addr_write)));
1121
1122     /* Perform the address comparison. */
1123     tcg_out_cmp(s, (TARGET_LONG_BITS == 64), TCG_REG_X0, TCG_REG_X3, 0);
1124
1125     /* If not equal, we jump to the slow path. */
1126     *label_ptr = s->code_ptr;
1127     tcg_out_goto_cond_noaddr(s, TCG_COND_NE);
1128 }
1129
1130 #endif /* CONFIG_SOFTMMU */
1131
1132 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp memop,
1133                                    TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1134 {
1135     const TCGMemOp bswap = memop & MO_BSWAP;
1136
1137     switch (memop & MO_SSIZE) {
1138     case MO_UB:
1139         tcg_out_ldst_r(s, I3312_LDRB, data_r, addr_r, off_r);
1140         break;
1141     case MO_SB:
1142         tcg_out_ldst_r(s, I3312_LDRSBX, data_r, addr_r, off_r);
1143         break;
1144     case MO_UW:
1145         tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r);
1146         if (bswap) {
1147             tcg_out_rev16(s, data_r, data_r);
1148         }
1149         break;
1150     case MO_SW:
1151         if (bswap) {
1152             tcg_out_ldst_r(s, I3312_LDRH, data_r, addr_r, off_r);
1153             tcg_out_rev16(s, data_r, data_r);
1154             tcg_out_sxt(s, TCG_TYPE_I64, MO_16, data_r, data_r);
1155         } else {
1156             tcg_out_ldst_r(s, I3312_LDRSHX, data_r, addr_r, off_r);
1157         }
1158         break;
1159     case MO_UL:
1160         tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r);
1161         if (bswap) {
1162             tcg_out_rev32(s, data_r, data_r);
1163         }
1164         break;
1165     case MO_SL:
1166         if (bswap) {
1167             tcg_out_ldst_r(s, I3312_LDRW, data_r, addr_r, off_r);
1168             tcg_out_rev32(s, data_r, data_r);
1169             tcg_out_sxt(s, TCG_TYPE_I64, MO_32, data_r, data_r);
1170         } else {
1171             tcg_out_ldst_r(s, I3312_LDRSWX, data_r, addr_r, off_r);
1172         }
1173         break;
1174     case MO_Q:
1175         tcg_out_ldst_r(s, I3312_LDRX, data_r, addr_r, off_r);
1176         if (bswap) {
1177             tcg_out_rev64(s, data_r, data_r);
1178         }
1179         break;
1180     default:
1181         tcg_abort();
1182     }
1183 }
1184
1185 static void tcg_out_qemu_st_direct(TCGContext *s, TCGMemOp memop,
1186                                    TCGReg data_r, TCGReg addr_r, TCGReg off_r)
1187 {
1188     const TCGMemOp bswap = memop & MO_BSWAP;
1189
1190     switch (memop & MO_SIZE) {
1191     case MO_8:
1192         tcg_out_ldst_r(s, I3312_STRB, data_r, addr_r, off_r);
1193         break;
1194     case MO_16:
1195         if (bswap && data_r != TCG_REG_XZR) {
1196             tcg_out_rev16(s, TCG_REG_TMP, data_r);
1197             data_r = TCG_REG_TMP;
1198         }
1199         tcg_out_ldst_r(s, I3312_STRH, data_r, addr_r, off_r);
1200         break;
1201     case MO_32:
1202         if (bswap && data_r != TCG_REG_XZR) {
1203             tcg_out_rev32(s, TCG_REG_TMP, data_r);
1204             data_r = TCG_REG_TMP;
1205         }
1206         tcg_out_ldst_r(s, I3312_STRW, data_r, addr_r, off_r);
1207         break;
1208     case MO_64:
1209         if (bswap && data_r != TCG_REG_XZR) {
1210             tcg_out_rev64(s, TCG_REG_TMP, data_r);
1211             data_r = TCG_REG_TMP;
1212         }
1213         tcg_out_ldst_r(s, I3312_STRX, data_r, addr_r, off_r);
1214         break;
1215     default:
1216         tcg_abort();
1217     }
1218 }
1219
1220 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1221                             TCGMemOp memop, int mem_index)
1222 {
1223 #ifdef CONFIG_SOFTMMU
1224     TCGMemOp s_bits = memop & MO_SIZE;
1225     uint8_t *label_ptr;
1226
1227     tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 1);
1228     tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1229     add_qemu_ldst_label(s, 1, memop, data_reg, addr_reg,
1230                         mem_index, s->code_ptr, label_ptr);
1231 #else /* !CONFIG_SOFTMMU */
1232     tcg_out_qemu_ld_direct(s, memop, data_reg, addr_reg,
1233                            GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1234 #endif /* CONFIG_SOFTMMU */
1235 }
1236
1237 static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
1238                             TCGMemOp memop, int mem_index)
1239 {
1240 #ifdef CONFIG_SOFTMMU
1241     TCGMemOp s_bits = memop & MO_SIZE;
1242     uint8_t *label_ptr;
1243
1244     tcg_out_tlb_read(s, addr_reg, s_bits, &label_ptr, mem_index, 0);
1245     tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg, TCG_REG_X1);
1246     add_qemu_ldst_label(s, 0, memop, data_reg, addr_reg,
1247                         mem_index, s->code_ptr, label_ptr);
1248 #else /* !CONFIG_SOFTMMU */
1249     tcg_out_qemu_st_direct(s, memop, data_reg, addr_reg,
1250                            GUEST_BASE ? TCG_REG_GUEST_BASE : TCG_REG_XZR);
1251 #endif /* CONFIG_SOFTMMU */
1252 }
1253
1254 static uint8_t *tb_ret_addr;
1255
1256 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
1257                        const TCGArg args[TCG_MAX_OP_ARGS],
1258                        const int const_args[TCG_MAX_OP_ARGS])
1259 {
1260     /* 99% of the time, we can signal the use of extension registers
1261        by looking to see if the opcode handles 64-bit data.  */
1262     TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
1263
1264     /* Hoist the loads of the most common arguments.  */
1265     TCGArg a0 = args[0];
1266     TCGArg a1 = args[1];
1267     TCGArg a2 = args[2];
1268     int c2 = const_args[2];
1269
1270     /* Some operands are defined with "rZ" constraint, a register or
1271        the zero register.  These need not actually test args[I] == 0.  */
1272 #define REG0(I)  (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I])
1273
1274     switch (opc) {
1275     case INDEX_op_exit_tb:
1276         tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
1277         tcg_out_goto(s, (intptr_t)tb_ret_addr);
1278         break;
1279
1280     case INDEX_op_goto_tb:
1281 #ifndef USE_DIRECT_JUMP
1282 #error "USE_DIRECT_JUMP required for aarch64"
1283 #endif
1284         assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP */
1285         s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
1286         /* actual branch destination will be patched by
1287            aarch64_tb_set_jmp_target later, beware retranslation. */
1288         tcg_out_goto_noaddr(s);
1289         s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
1290         break;
1291
1292     case INDEX_op_call:
1293         if (const_args[0]) {
1294             tcg_out_call(s, a0);
1295         } else {
1296             tcg_out_callr(s, a0);
1297         }
1298         break;
1299
1300     case INDEX_op_br:
1301         tcg_out_goto_label(s, a0);
1302         break;
1303
1304     case INDEX_op_ld8u_i32:
1305     case INDEX_op_ld8u_i64:
1306         tcg_out_ldst(s, I3312_LDRB, a0, a1, a2);
1307         break;
1308     case INDEX_op_ld8s_i32:
1309         tcg_out_ldst(s, I3312_LDRSBW, a0, a1, a2);
1310         break;
1311     case INDEX_op_ld8s_i64:
1312         tcg_out_ldst(s, I3312_LDRSBX, a0, a1, a2);
1313         break;
1314     case INDEX_op_ld16u_i32:
1315     case INDEX_op_ld16u_i64:
1316         tcg_out_ldst(s, I3312_LDRH, a0, a1, a2);
1317         break;
1318     case INDEX_op_ld16s_i32:
1319         tcg_out_ldst(s, I3312_LDRSHW, a0, a1, a2);
1320         break;
1321     case INDEX_op_ld16s_i64:
1322         tcg_out_ldst(s, I3312_LDRSHX, a0, a1, a2);
1323         break;
1324     case INDEX_op_ld_i32:
1325     case INDEX_op_ld32u_i64:
1326         tcg_out_ldst(s, I3312_LDRW, a0, a1, a2);
1327         break;
1328     case INDEX_op_ld32s_i64:
1329         tcg_out_ldst(s, I3312_LDRSWX, a0, a1, a2);
1330         break;
1331     case INDEX_op_ld_i64:
1332         tcg_out_ldst(s, I3312_LDRX, a0, a1, a2);
1333         break;
1334
1335     case INDEX_op_st8_i32:
1336     case INDEX_op_st8_i64:
1337         tcg_out_ldst(s, I3312_STRB, REG0(0), a1, a2);
1338         break;
1339     case INDEX_op_st16_i32:
1340     case INDEX_op_st16_i64:
1341         tcg_out_ldst(s, I3312_STRH, REG0(0), a1, a2);
1342         break;
1343     case INDEX_op_st_i32:
1344     case INDEX_op_st32_i64:
1345         tcg_out_ldst(s, I3312_STRW, REG0(0), a1, a2);
1346         break;
1347     case INDEX_op_st_i64:
1348         tcg_out_ldst(s, I3312_STRX, REG0(0), a1, a2);
1349         break;
1350
1351     case INDEX_op_add_i32:
1352         a2 = (int32_t)a2;
1353         /* FALLTHRU */
1354     case INDEX_op_add_i64:
1355         if (c2) {
1356             tcg_out_addsubi(s, ext, a0, a1, a2);
1357         } else {
1358             tcg_out_insn(s, 3502, ADD, ext, a0, a1, a2);
1359         }
1360         break;
1361
1362     case INDEX_op_sub_i32:
1363         a2 = (int32_t)a2;
1364         /* FALLTHRU */
1365     case INDEX_op_sub_i64:
1366         if (c2) {
1367             tcg_out_addsubi(s, ext, a0, a1, -a2);
1368         } else {
1369             tcg_out_insn(s, 3502, SUB, ext, a0, a1, a2);
1370         }
1371         break;
1372
1373     case INDEX_op_neg_i64:
1374     case INDEX_op_neg_i32:
1375         tcg_out_insn(s, 3502, SUB, ext, a0, TCG_REG_XZR, a1);
1376         break;
1377
1378     case INDEX_op_and_i32:
1379         a2 = (int32_t)a2;
1380         /* FALLTHRU */
1381     case INDEX_op_and_i64:
1382         if (c2) {
1383             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, a2);
1384         } else {
1385             tcg_out_insn(s, 3510, AND, ext, a0, a1, a2);
1386         }
1387         break;
1388
1389     case INDEX_op_andc_i32:
1390         a2 = (int32_t)a2;
1391         /* FALLTHRU */
1392     case INDEX_op_andc_i64:
1393         if (c2) {
1394             tcg_out_logicali(s, I3404_ANDI, ext, a0, a1, ~a2);
1395         } else {
1396             tcg_out_insn(s, 3510, BIC, ext, a0, a1, a2);
1397         }
1398         break;
1399
1400     case INDEX_op_or_i32:
1401         a2 = (int32_t)a2;
1402         /* FALLTHRU */
1403     case INDEX_op_or_i64:
1404         if (c2) {
1405             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, a2);
1406         } else {
1407             tcg_out_insn(s, 3510, ORR, ext, a0, a1, a2);
1408         }
1409         break;
1410
1411     case INDEX_op_orc_i32:
1412         a2 = (int32_t)a2;
1413         /* FALLTHRU */
1414     case INDEX_op_orc_i64:
1415         if (c2) {
1416             tcg_out_logicali(s, I3404_ORRI, ext, a0, a1, ~a2);
1417         } else {
1418             tcg_out_insn(s, 3510, ORN, ext, a0, a1, a2);
1419         }
1420         break;
1421
1422     case INDEX_op_xor_i32:
1423         a2 = (int32_t)a2;
1424         /* FALLTHRU */
1425     case INDEX_op_xor_i64:
1426         if (c2) {
1427             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, a2);
1428         } else {
1429             tcg_out_insn(s, 3510, EOR, ext, a0, a1, a2);
1430         }
1431         break;
1432
1433     case INDEX_op_eqv_i32:
1434         a2 = (int32_t)a2;
1435         /* FALLTHRU */
1436     case INDEX_op_eqv_i64:
1437         if (c2) {
1438             tcg_out_logicali(s, I3404_EORI, ext, a0, a1, ~a2);
1439         } else {
1440             tcg_out_insn(s, 3510, EON, ext, a0, a1, a2);
1441         }
1442         break;
1443
1444     case INDEX_op_not_i64:
1445     case INDEX_op_not_i32:
1446         tcg_out_insn(s, 3510, ORN, ext, a0, TCG_REG_XZR, a1);
1447         break;
1448
1449     case INDEX_op_mul_i64:
1450     case INDEX_op_mul_i32:
1451         tcg_out_insn(s, 3509, MADD, ext, a0, a1, a2, TCG_REG_XZR);
1452         break;
1453
1454     case INDEX_op_div_i64:
1455     case INDEX_op_div_i32:
1456         tcg_out_insn(s, 3508, SDIV, ext, a0, a1, a2);
1457         break;
1458     case INDEX_op_divu_i64:
1459     case INDEX_op_divu_i32:
1460         tcg_out_insn(s, 3508, UDIV, ext, a0, a1, a2);
1461         break;
1462
1463     case INDEX_op_rem_i64:
1464     case INDEX_op_rem_i32:
1465         tcg_out_insn(s, 3508, SDIV, ext, TCG_REG_TMP, a1, a2);
1466         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1467         break;
1468     case INDEX_op_remu_i64:
1469     case INDEX_op_remu_i32:
1470         tcg_out_insn(s, 3508, UDIV, ext, TCG_REG_TMP, a1, a2);
1471         tcg_out_insn(s, 3509, MSUB, ext, a0, TCG_REG_TMP, a2, a1);
1472         break;
1473
1474     case INDEX_op_shl_i64:
1475     case INDEX_op_shl_i32:
1476         if (c2) {
1477             tcg_out_shl(s, ext, a0, a1, a2);
1478         } else {
1479             tcg_out_insn(s, 3508, LSLV, ext, a0, a1, a2);
1480         }
1481         break;
1482
1483     case INDEX_op_shr_i64:
1484     case INDEX_op_shr_i32:
1485         if (c2) {
1486             tcg_out_shr(s, ext, a0, a1, a2);
1487         } else {
1488             tcg_out_insn(s, 3508, LSRV, ext, a0, a1, a2);
1489         }
1490         break;
1491
1492     case INDEX_op_sar_i64:
1493     case INDEX_op_sar_i32:
1494         if (c2) {
1495             tcg_out_sar(s, ext, a0, a1, a2);
1496         } else {
1497             tcg_out_insn(s, 3508, ASRV, ext, a0, a1, a2);
1498         }
1499         break;
1500
1501     case INDEX_op_rotr_i64:
1502     case INDEX_op_rotr_i32:
1503         if (c2) {
1504             tcg_out_rotr(s, ext, a0, a1, a2);
1505         } else {
1506             tcg_out_insn(s, 3508, RORV, ext, a0, a1, a2);
1507         }
1508         break;
1509
1510     case INDEX_op_rotl_i64:
1511     case INDEX_op_rotl_i32:
1512         if (c2) {
1513             tcg_out_rotl(s, ext, a0, a1, a2);
1514         } else {
1515             tcg_out_insn(s, 3502, SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2);
1516             tcg_out_insn(s, 3508, RORV, ext, a0, a1, TCG_REG_TMP);
1517         }
1518         break;
1519
1520     case INDEX_op_brcond_i32:
1521         a1 = (int32_t)a1;
1522         /* FALLTHRU */
1523     case INDEX_op_brcond_i64:
1524         tcg_out_brcond(s, ext, a2, a0, a1, const_args[1], args[3]);
1525         break;
1526
1527     case INDEX_op_setcond_i32:
1528         a2 = (int32_t)a2;
1529         /* FALLTHRU */
1530     case INDEX_op_setcond_i64:
1531         tcg_out_cmp(s, ext, a1, a2, c2);
1532         /* Use CSET alias of CSINC Wd, WZR, WZR, invert(cond).  */
1533         tcg_out_insn(s, 3506, CSINC, TCG_TYPE_I32, a0, TCG_REG_XZR,
1534                      TCG_REG_XZR, tcg_invert_cond(args[3]));
1535         break;
1536
1537     case INDEX_op_movcond_i32:
1538         a2 = (int32_t)a2;
1539         /* FALLTHRU */
1540     case INDEX_op_movcond_i64:
1541         tcg_out_cmp(s, ext, a1, a2, c2);
1542         tcg_out_insn(s, 3506, CSEL, ext, a0, REG0(3), REG0(4), args[5]);
1543         break;
1544
1545     case INDEX_op_qemu_ld_i32:
1546     case INDEX_op_qemu_ld_i64:
1547         tcg_out_qemu_ld(s, a0, a1, a2, args[3]);
1548         break;
1549     case INDEX_op_qemu_st_i32:
1550     case INDEX_op_qemu_st_i64:
1551         tcg_out_qemu_st(s, REG0(0), a1, a2, args[3]);
1552         break;
1553
1554     case INDEX_op_bswap64_i64:
1555         tcg_out_rev64(s, a0, a1);
1556         break;
1557     case INDEX_op_bswap32_i64:
1558     case INDEX_op_bswap32_i32:
1559         tcg_out_rev32(s, a0, a1);
1560         break;
1561     case INDEX_op_bswap16_i64:
1562     case INDEX_op_bswap16_i32:
1563         tcg_out_rev16(s, a0, a1);
1564         break;
1565
1566     case INDEX_op_ext8s_i64:
1567     case INDEX_op_ext8s_i32:
1568         tcg_out_sxt(s, ext, MO_8, a0, a1);
1569         break;
1570     case INDEX_op_ext16s_i64:
1571     case INDEX_op_ext16s_i32:
1572         tcg_out_sxt(s, ext, MO_16, a0, a1);
1573         break;
1574     case INDEX_op_ext32s_i64:
1575         tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
1576         break;
1577     case INDEX_op_ext8u_i64:
1578     case INDEX_op_ext8u_i32:
1579         tcg_out_uxt(s, MO_8, a0, a1);
1580         break;
1581     case INDEX_op_ext16u_i64:
1582     case INDEX_op_ext16u_i32:
1583         tcg_out_uxt(s, MO_16, a0, a1);
1584         break;
1585     case INDEX_op_ext32u_i64:
1586         tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
1587         break;
1588
1589     case INDEX_op_deposit_i64:
1590     case INDEX_op_deposit_i32:
1591         tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
1592         break;
1593
1594     case INDEX_op_add2_i32:
1595         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1596                         (int32_t)args[4], args[5], const_args[4],
1597                         const_args[5], false);
1598         break;
1599     case INDEX_op_add2_i64:
1600         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1601                         args[5], const_args[4], const_args[5], false);
1602         break;
1603     case INDEX_op_sub2_i32:
1604         tcg_out_addsub2(s, TCG_TYPE_I32, a0, a1, REG0(2), REG0(3),
1605                         (int32_t)args[4], args[5], const_args[4],
1606                         const_args[5], true);
1607         break;
1608     case INDEX_op_sub2_i64:
1609         tcg_out_addsub2(s, TCG_TYPE_I64, a0, a1, REG0(2), REG0(3), args[4],
1610                         args[5], const_args[4], const_args[5], true);
1611         break;
1612
1613     case INDEX_op_muluh_i64:
1614         tcg_out_insn(s, 3508, UMULH, TCG_TYPE_I64, a0, a1, a2);
1615         break;
1616     case INDEX_op_mulsh_i64:
1617         tcg_out_insn(s, 3508, SMULH, TCG_TYPE_I64, a0, a1, a2);
1618         break;
1619
1620     case INDEX_op_mov_i64:
1621     case INDEX_op_mov_i32:
1622     case INDEX_op_movi_i64:
1623     case INDEX_op_movi_i32:
1624         /* Always implemented with tcg_out_mov/i, never with tcg_out_op.  */
1625     default:
1626         /* Opcode not implemented.  */
1627         tcg_abort();
1628     }
1629
1630 #undef REG0
1631 }
1632
1633 static const TCGTargetOpDef aarch64_op_defs[] = {
1634     { INDEX_op_exit_tb, { } },
1635     { INDEX_op_goto_tb, { } },
1636     { INDEX_op_call, { "ri" } },
1637     { INDEX_op_br, { } },
1638
1639     { INDEX_op_mov_i32, { "r", "r" } },
1640     { INDEX_op_mov_i64, { "r", "r" } },
1641
1642     { INDEX_op_movi_i32, { "r" } },
1643     { INDEX_op_movi_i64, { "r" } },
1644
1645     { INDEX_op_ld8u_i32, { "r", "r" } },
1646     { INDEX_op_ld8s_i32, { "r", "r" } },
1647     { INDEX_op_ld16u_i32, { "r", "r" } },
1648     { INDEX_op_ld16s_i32, { "r", "r" } },
1649     { INDEX_op_ld_i32, { "r", "r" } },
1650     { INDEX_op_ld8u_i64, { "r", "r" } },
1651     { INDEX_op_ld8s_i64, { "r", "r" } },
1652     { INDEX_op_ld16u_i64, { "r", "r" } },
1653     { INDEX_op_ld16s_i64, { "r", "r" } },
1654     { INDEX_op_ld32u_i64, { "r", "r" } },
1655     { INDEX_op_ld32s_i64, { "r", "r" } },
1656     { INDEX_op_ld_i64, { "r", "r" } },
1657
1658     { INDEX_op_st8_i32, { "rZ", "r" } },
1659     { INDEX_op_st16_i32, { "rZ", "r" } },
1660     { INDEX_op_st_i32, { "rZ", "r" } },
1661     { INDEX_op_st8_i64, { "rZ", "r" } },
1662     { INDEX_op_st16_i64, { "rZ", "r" } },
1663     { INDEX_op_st32_i64, { "rZ", "r" } },
1664     { INDEX_op_st_i64, { "rZ", "r" } },
1665
1666     { INDEX_op_add_i32, { "r", "r", "rwA" } },
1667     { INDEX_op_add_i64, { "r", "r", "rA" } },
1668     { INDEX_op_sub_i32, { "r", "r", "rwA" } },
1669     { INDEX_op_sub_i64, { "r", "r", "rA" } },
1670     { INDEX_op_mul_i32, { "r", "r", "r" } },
1671     { INDEX_op_mul_i64, { "r", "r", "r" } },
1672     { INDEX_op_div_i32, { "r", "r", "r" } },
1673     { INDEX_op_div_i64, { "r", "r", "r" } },
1674     { INDEX_op_divu_i32, { "r", "r", "r" } },
1675     { INDEX_op_divu_i64, { "r", "r", "r" } },
1676     { INDEX_op_rem_i32, { "r", "r", "r" } },
1677     { INDEX_op_rem_i64, { "r", "r", "r" } },
1678     { INDEX_op_remu_i32, { "r", "r", "r" } },
1679     { INDEX_op_remu_i64, { "r", "r", "r" } },
1680     { INDEX_op_and_i32, { "r", "r", "rwL" } },
1681     { INDEX_op_and_i64, { "r", "r", "rL" } },
1682     { INDEX_op_or_i32, { "r", "r", "rwL" } },
1683     { INDEX_op_or_i64, { "r", "r", "rL" } },
1684     { INDEX_op_xor_i32, { "r", "r", "rwL" } },
1685     { INDEX_op_xor_i64, { "r", "r", "rL" } },
1686     { INDEX_op_andc_i32, { "r", "r", "rwL" } },
1687     { INDEX_op_andc_i64, { "r", "r", "rL" } },
1688     { INDEX_op_orc_i32, { "r", "r", "rwL" } },
1689     { INDEX_op_orc_i64, { "r", "r", "rL" } },
1690     { INDEX_op_eqv_i32, { "r", "r", "rwL" } },
1691     { INDEX_op_eqv_i64, { "r", "r", "rL" } },
1692
1693     { INDEX_op_neg_i32, { "r", "r" } },
1694     { INDEX_op_neg_i64, { "r", "r" } },
1695     { INDEX_op_not_i32, { "r", "r" } },
1696     { INDEX_op_not_i64, { "r", "r" } },
1697
1698     { INDEX_op_shl_i32, { "r", "r", "ri" } },
1699     { INDEX_op_shr_i32, { "r", "r", "ri" } },
1700     { INDEX_op_sar_i32, { "r", "r", "ri" } },
1701     { INDEX_op_rotl_i32, { "r", "r", "ri" } },
1702     { INDEX_op_rotr_i32, { "r", "r", "ri" } },
1703     { INDEX_op_shl_i64, { "r", "r", "ri" } },
1704     { INDEX_op_shr_i64, { "r", "r", "ri" } },
1705     { INDEX_op_sar_i64, { "r", "r", "ri" } },
1706     { INDEX_op_rotl_i64, { "r", "r", "ri" } },
1707     { INDEX_op_rotr_i64, { "r", "r", "ri" } },
1708
1709     { INDEX_op_brcond_i32, { "r", "rwA" } },
1710     { INDEX_op_brcond_i64, { "r", "rA" } },
1711     { INDEX_op_setcond_i32, { "r", "r", "rwA" } },
1712     { INDEX_op_setcond_i64, { "r", "r", "rA" } },
1713     { INDEX_op_movcond_i32, { "r", "r", "rwA", "rZ", "rZ" } },
1714     { INDEX_op_movcond_i64, { "r", "r", "rA", "rZ", "rZ" } },
1715
1716     { INDEX_op_qemu_ld_i32, { "r", "l" } },
1717     { INDEX_op_qemu_ld_i64, { "r", "l" } },
1718     { INDEX_op_qemu_st_i32, { "lZ", "l" } },
1719     { INDEX_op_qemu_st_i64, { "lZ", "l" } },
1720
1721     { INDEX_op_bswap16_i32, { "r", "r" } },
1722     { INDEX_op_bswap32_i32, { "r", "r" } },
1723     { INDEX_op_bswap16_i64, { "r", "r" } },
1724     { INDEX_op_bswap32_i64, { "r", "r" } },
1725     { INDEX_op_bswap64_i64, { "r", "r" } },
1726
1727     { INDEX_op_ext8s_i32, { "r", "r" } },
1728     { INDEX_op_ext16s_i32, { "r", "r" } },
1729     { INDEX_op_ext8u_i32, { "r", "r" } },
1730     { INDEX_op_ext16u_i32, { "r", "r" } },
1731
1732     { INDEX_op_ext8s_i64, { "r", "r" } },
1733     { INDEX_op_ext16s_i64, { "r", "r" } },
1734     { INDEX_op_ext32s_i64, { "r", "r" } },
1735     { INDEX_op_ext8u_i64, { "r", "r" } },
1736     { INDEX_op_ext16u_i64, { "r", "r" } },
1737     { INDEX_op_ext32u_i64, { "r", "r" } },
1738
1739     { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
1740     { INDEX_op_deposit_i64, { "r", "0", "rZ" } },
1741
1742     { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1743     { INDEX_op_add2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1744     { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rwA", "rwMZ" } },
1745     { INDEX_op_sub2_i64, { "r", "r", "rZ", "rZ", "rA", "rMZ" } },
1746
1747     { INDEX_op_muluh_i64, { "r", "r", "r" } },
1748     { INDEX_op_mulsh_i64, { "r", "r", "r" } },
1749
1750     { -1 },
1751 };
1752
1753 static void tcg_target_init(TCGContext *s)
1754 {
1755     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I32], 0, 0xffffffff);
1756     tcg_regset_set32(tcg_target_available_regs[TCG_TYPE_I64], 0, 0xffffffff);
1757
1758     tcg_regset_set32(tcg_target_call_clobber_regs, 0,
1759                      (1 << TCG_REG_X0) | (1 << TCG_REG_X1) |
1760                      (1 << TCG_REG_X2) | (1 << TCG_REG_X3) |
1761                      (1 << TCG_REG_X4) | (1 << TCG_REG_X5) |
1762                      (1 << TCG_REG_X6) | (1 << TCG_REG_X7) |
1763                      (1 << TCG_REG_X8) | (1 << TCG_REG_X9) |
1764                      (1 << TCG_REG_X10) | (1 << TCG_REG_X11) |
1765                      (1 << TCG_REG_X12) | (1 << TCG_REG_X13) |
1766                      (1 << TCG_REG_X14) | (1 << TCG_REG_X15) |
1767                      (1 << TCG_REG_X16) | (1 << TCG_REG_X17) |
1768                      (1 << TCG_REG_X18) | (1 << TCG_REG_X30));
1769
1770     tcg_regset_clear(s->reserved_regs);
1771     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
1772     tcg_regset_set_reg(s->reserved_regs, TCG_REG_FP);
1773     tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP);
1774     tcg_regset_set_reg(s->reserved_regs, TCG_REG_X18); /* platform register */
1775
1776     tcg_add_target_add_op_defs(aarch64_op_defs);
1777 }
1778
1779 /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)).  */
1780 #define PUSH_SIZE  ((30 - 19 + 1) * 8)
1781
1782 #define FRAME_SIZE \
1783     ((PUSH_SIZE \
1784       + TCG_STATIC_CALL_ARGS_SIZE \
1785       + CPU_TEMP_BUF_NLONGS * sizeof(long) \
1786       + TCG_TARGET_STACK_ALIGN - 1) \
1787      & ~(TCG_TARGET_STACK_ALIGN - 1))
1788
1789 /* We're expecting a 2 byte uleb128 encoded value.  */
1790 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
1791
1792 /* We're expecting to use a single ADDI insn.  */
1793 QEMU_BUILD_BUG_ON(FRAME_SIZE - PUSH_SIZE > 0xfff);
1794
1795 static void tcg_target_qemu_prologue(TCGContext *s)
1796 {
1797     TCGReg r;
1798
1799     /* Push (FP, LR) and allocate space for all saved registers.  */
1800     tcg_out_insn(s, 3314, STP, TCG_REG_FP, TCG_REG_LR,
1801                  TCG_REG_SP, -PUSH_SIZE, 1, 1);
1802
1803     /* Set up frame pointer for canonical unwinding.  */
1804     tcg_out_movr_sp(s, TCG_TYPE_I64, TCG_REG_FP, TCG_REG_SP);
1805
1806     /* Store callee-preserved regs x19..x28.  */
1807     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1808         int ofs = (r - TCG_REG_X19 + 2) * 8;
1809         tcg_out_insn(s, 3314, STP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1810     }
1811
1812     /* Make stack space for TCG locals.  */
1813     tcg_out_insn(s, 3401, SUBI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1814                  FRAME_SIZE - PUSH_SIZE);
1815
1816     /* Inform TCG about how to find TCG locals with register, offset, size.  */
1817     tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE,
1818                   CPU_TEMP_BUF_NLONGS * sizeof(long));
1819
1820 #if defined(CONFIG_USE_GUEST_BASE)
1821     if (GUEST_BASE) {
1822         tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_GUEST_BASE, GUEST_BASE);
1823         tcg_regset_set_reg(s->reserved_regs, TCG_REG_GUEST_BASE);
1824     }
1825 #endif
1826
1827     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
1828     tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
1829
1830     tb_ret_addr = s->code_ptr;
1831
1832     /* Remove TCG locals stack space.  */
1833     tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_SP, TCG_REG_SP,
1834                  FRAME_SIZE - PUSH_SIZE);
1835
1836     /* Restore registers x19..x28.  */
1837     for (r = TCG_REG_X19; r <= TCG_REG_X27; r += 2) {
1838         int ofs = (r - TCG_REG_X19 + 2) * 8;
1839         tcg_out_insn(s, 3314, LDP, r, r + 1, TCG_REG_SP, ofs, 1, 0);
1840     }
1841
1842     /* Pop (FP, LR), restore SP to previous frame.  */
1843     tcg_out_insn(s, 3314, LDP, TCG_REG_FP, TCG_REG_LR,
1844                  TCG_REG_SP, PUSH_SIZE, 0, 1);
1845     tcg_out_insn(s, 3207, RET, TCG_REG_LR);
1846 }
1847
1848 typedef struct {
1849     DebugFrameCIE cie;
1850     DebugFrameFDEHeader fde;
1851     uint8_t fde_def_cfa[4];
1852     uint8_t fde_reg_ofs[24];
1853 } DebugFrame;
1854
1855 #define ELF_HOST_MACHINE EM_AARCH64
1856
1857 static DebugFrame debug_frame = {
1858     .cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
1859     .cie.id = -1,
1860     .cie.version = 1,
1861     .cie.code_align = 1,
1862     .cie.data_align = 0x78,             /* sleb128 -8 */
1863     .cie.return_column = TCG_REG_LR,
1864
1865     /* Total FDE size does not include the "len" member.  */
1866     .fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, fde.cie_offset),
1867
1868     .fde_def_cfa = {
1869         12, TCG_REG_SP,                 /* DW_CFA_def_cfa sp, ... */
1870         (FRAME_SIZE & 0x7f) | 0x80,     /* ... uleb128 FRAME_SIZE */
1871         (FRAME_SIZE >> 7)
1872     },
1873     .fde_reg_ofs = {
1874         0x80 + 28, 1,                   /* DW_CFA_offset, x28,  -8 */
1875         0x80 + 27, 2,                   /* DW_CFA_offset, x27, -16 */
1876         0x80 + 26, 3,                   /* DW_CFA_offset, x26, -24 */
1877         0x80 + 25, 4,                   /* DW_CFA_offset, x25, -32 */
1878         0x80 + 24, 5,                   /* DW_CFA_offset, x24, -40 */
1879         0x80 + 23, 6,                   /* DW_CFA_offset, x23, -48 */
1880         0x80 + 22, 7,                   /* DW_CFA_offset, x22, -56 */
1881         0x80 + 21, 8,                   /* DW_CFA_offset, x21, -64 */
1882         0x80 + 20, 9,                   /* DW_CFA_offset, x20, -72 */
1883         0x80 + 19, 10,                  /* DW_CFA_offset, x1p, -80 */
1884         0x80 + 30, 11,                  /* DW_CFA_offset,  lr, -88 */
1885         0x80 + 29, 12,                  /* DW_CFA_offset,  fp, -96 */
1886     }
1887 };
1888
1889 void tcg_register_jit(void *buf, size_t buf_size)
1890 {
1891     debug_frame.fde.func_start = (intptr_t)buf;
1892     debug_frame.fde.func_len = buf_size;
1893
1894     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
1895 }