tcg: Avoid undefined behaviour patching code at unaligned addresses

[qemu.git] / tcg / i386 / tcg-target.c
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c

index 5d4cf9386e55971ef7ede4fab68f2a8f512d0cb1..9a585ab5ecccca3dd3f1b61d5b55eb526715bef2 100644 (file)
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -88,6 +88,11 @@ static const int tcg_target_call_oarg_regs[] = {
  #endif
  };
  
+/* Constants we accept.  */
+#define TCG_CT_CONST_S32 0x100
+#define TCG_CT_CONST_U32 0x200
+#define TCG_CT_CONST_I32 0x400
+
  /* Registers used with L constraint, which are the first argument 
     registers on x86_64, and two random call clobbered registers on
     i386. */
@@ -110,7 +115,7 @@ static const int tcg_target_call_oarg_regs[] = {
     is available.  */
  #if TCG_TARGET_REG_BITS == 64
  # define have_cmov 1
-#elif defined(CONFIG_CPUID_H)
+#elif defined(CONFIG_CPUID_H) && defined(bit_CMOV)
  static bool have_cmov;
  #else
  # define have_cmov 0
@@ -124,6 +129,16 @@ static bool have_movbe;
  # define have_movbe 0
  #endif
  
+/* We need this symbol in tcg-target.h, and we can't properly conditionalize
+   it there.  Therefore we always define the variable.  */
+bool have_bmi1;
+
+#if defined(CONFIG_CPUID_H) && defined(bit_BMI2)
+static bool have_bmi2;
+#else
+# define have_bmi2 0
+#endif
+
  static uint8_t *tb_ret_addr;
  
  static void patch_reloc(uint8_t *code_ptr, int type,
@@ -136,14 +151,14 @@ static void patch_reloc(uint8_t *code_ptr, int type,
          if (value != (int32_t)value) {
              tcg_abort();
          }
-        *(uint32_t *)code_ptr = value;
+        tcg_patch32(code_ptr, value);
          break;
      case R_386_PC8:
          value -= (uintptr_t)code_ptr;
          if (value != (int8_t)value) {
              tcg_abort();
          }
-        *(uint8_t *)code_ptr = value;
+        tcg_patch8(code_ptr, value);
          break;
      default:
          tcg_abort();
@@ -166,6 +181,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
          tcg_regset_set_reg(ct->u.regs, TCG_REG_EBX);
          break;
      case 'c':
+    case_c:
          ct->ct |= TCG_CT_REG;
          tcg_regset_set_reg(ct->u.regs, TCG_REG_ECX);
          break;
@@ -194,6 +210,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
          tcg_regset_set32(ct->u.regs, 0, 0xf);
          break;
      case 'r':
+    case_r:
          ct->ct |= TCG_CT_REG;
          if (TCG_TARGET_REG_BITS == 64) {
              tcg_regset_set32(ct->u.regs, 0, 0xffff);
@@ -201,6 +218,13 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
              tcg_regset_set32(ct->u.regs, 0, 0xff);
          }
          break;
+    case 'C':
+        /* With SHRX et al, we need not use ECX as shift count register.  */
+        if (have_bmi2) {
+            goto case_r;
+        } else {
+            goto case_c;
+        }
  
          /* qemu_ld/st address constraint */
      case 'L':
@@ -220,6 +244,9 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
      case 'Z':
          ct->ct |= TCG_CT_CONST_U32;
          break;
+    case 'I':
+        ct->ct |= TCG_CT_CONST_I32;
+        break;
  
      default:
          return -1;
@@ -230,7 +257,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
  }
  
  /* test if a constant matches the constraint */
-static inline int tcg_target_const_match(tcg_target_long val,
+static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
                                           const TCGArgConstraint *arg_ct)
  {
      int ct = arg_ct->ct;
@@ -243,6 +270,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
      if ((ct & TCG_CT_CONST_U32) && val == (uint32_t)val) {
          return 1;
      }
+    if ((ct & TCG_CT_CONST_I32) && ~val == (int32_t)~val) {
+        return 1;
+    }
      return 0;
  }
  
@@ -268,10 +298,13 @@ static inline int tcg_target_const_match(tcg_target_long val,
  # define P_REXB_RM     0
  # define P_GS           0
  #endif
+#define P_SIMDF3        0x10000         /* 0xf3 opcode prefix */
+#define P_SIMDF2        0x20000         /* 0xf2 opcode prefix */
  
  #define OPC_ARITH_EvIz (0x81)
  #define OPC_ARITH_EvIb (0x83)
  #define OPC_ARITH_GvEv (0x03)          /* ... plus (ARITH_FOO << 3) */
+#define OPC_ANDN        (0xf2 | P_EXT38)
  #define OPC_ADD_GvEv   (OPC_ARITH_GvEv | (ARITH_ADD << 3))
  #define OPC_BSWAP      (0xc8 | P_EXT)
  #define OPC_CALL_Jz    (0xe8)
@@ -309,6 +342,9 @@ static inline int tcg_target_const_match(tcg_target_long val,
  #define OPC_SHIFT_1    (0xd1)
  #define OPC_SHIFT_Ib   (0xc1)
  #define OPC_SHIFT_cl   (0xd3)
+#define OPC_SARX        (0xf7 | P_EXT38 | P_SIMDF3)
+#define OPC_SHLX        (0xf7 | P_EXT38 | P_DATA16)
+#define OPC_SHRX        (0xf7 | P_EXT38 | P_SIMDF2)
  #define OPC_TESTL      (0x85)
  #define OPC_XCHG_ax_r32        (0x90)
  
@@ -398,9 +434,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x)
  
      rex = 0;
      rex |= (opc & P_REXW) ? 0x8 : 0x0;  /* REX.W */
-    rex |= (r & 8) >> 1;               /* REX.R */
-    rex |= (x & 8) >> 2;               /* REX.X */
-    rex |= (rm & 8) >> 3;              /* REX.B */
+    rex |= (r & 8) >> 1;                /* REX.R */
+    rex |= (x & 8) >> 2;                /* REX.X */
+    rex |= (rm & 8) >> 3;               /* REX.B */
  
      /* P_REXB_{R,RM} indicates that the given register is the low byte.
         For %[abcd]l we need no REX prefix, but for %{si,di,bp,sp}l we do,
@@ -449,6 +485,48 @@ static void tcg_out_modrm(TCGContext *s, int opc, int r, int rm)
      tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
  }
  
+static void tcg_out_vex_modrm(TCGContext *s, int opc, int r, int v, int rm)
+{
+    int tmp;
+
+    if ((opc & (P_REXW | P_EXT | P_EXT38)) || (rm & 8)) {
+        /* Three byte VEX prefix.  */
+        tcg_out8(s, 0xc4);
+
+        /* VEX.m-mmmm */
+        if (opc & P_EXT38) {
+            tmp = 2;
+        } else if (opc & P_EXT) {
+            tmp = 1;
+        } else {
+            tcg_abort();
+        }
+        tmp |= 0x40;                       /* VEX.X */
+        tmp |= (r & 8 ? 0 : 0x80);         /* VEX.R */
+        tmp |= (rm & 8 ? 0 : 0x20);        /* VEX.B */
+        tcg_out8(s, tmp);
+
+        tmp = (opc & P_REXW ? 0x80 : 0);   /* VEX.W */
+    } else {
+        /* Two byte VEX prefix.  */
+        tcg_out8(s, 0xc5);
+
+        tmp = (r & 8 ? 0 : 0x80);          /* VEX.R */
+    }
+    /* VEX.pp */
+    if (opc & P_DATA16) {
+        tmp |= 1;                          /* 0x66 */
+    } else if (opc & P_SIMDF3) {
+        tmp |= 2;                          /* 0xf3 */
+    } else if (opc & P_SIMDF2) {
+        tmp |= 3;                          /* 0xf2 */
+    }
+    tmp |= (~v & 15) << 3;                 /* VEX.vvvv */
+    tcg_out8(s, tmp);
+    tcg_out8(s, opc);
+    tcg_out8(s, 0xc0 | (LOWREGMASK(r) << 3) | LOWREGMASK(rm));
+}
+
  /* Output an opcode with a full "rm + (index<<shift) + offset" address mode.
     We handle either RM and INDEX missing with a negative value.  In 64-bit
     mode for absolute addresses, ~RM is the size of the immediate operand
@@ -1166,7 +1244,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
   * Record the context of a call to the out of line helper code for the slow path
   * for a load or store, so that we can later generate the correct helper code
   */
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
                                  TCGReg datalo, TCGReg datahi,
                                  TCGReg addrlo, TCGReg addrhi,
                                  int mem_index, uint8_t *raddr,
@@ -1198,9 +1276,9 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
      uint8_t **label_ptr = &l->label_ptr[0];
  
      /* resolve label address */
-    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
      if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
      }
  
      if (TCG_TARGET_REG_BITS == 32) {
@@ -1282,9 +1360,9 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
      TCGReg retaddr;
  
      /* resolve label address */
-    *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
+    tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
      if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
-        *(uint32_t *)label_ptr[1] = (uint32_t)(s->code_ptr - label_ptr[1] - 4);
+        tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
      }
  
      if (TCG_TARGET_REG_BITS == 32) {
@@ -1476,7 +1554,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
      tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
  
      /* Record the current context of a load into ldst label */
-    add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
+    add_qemu_ldst_label(s, true, opc, datalo, datahi, addrlo, addrhi,
                          mem_index, s->code_ptr, label_ptr);
  #else
      {
@@ -1607,7 +1685,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
      tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
  
      /* Record the current context of a store into ldst label */
-    add_qemu_ldst_label(s, 0, opc, datalo, datahi, addrlo, addrhi,
+    add_qemu_ldst_label(s, false, opc, datalo, datahi, addrlo, addrhi,
                          mem_index, s->code_ptr, label_ptr);
  #else
      {
@@ -1638,7 +1716,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                                const TCGArg *args, const int *const_args)
  {
-    int c, rexw = 0;
+    int c, vexop, rexw = 0;
  
  #if TCG_TARGET_REG_BITS == 64
  # define OP_32_64(x) \
@@ -1774,6 +1852,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          }
          break;
  
+    OP_32_64(andc):
+        if (const_args[2]) {
+            tcg_out_mov(s, rexw ? TCG_TYPE_I64 : TCG_TYPE_I32,
+                        args[0], args[1]);
+            tgen_arithi(s, ARITH_AND + rexw, args[0], ~args[2], 0);
+        } else {
+            tcg_out_vex_modrm(s, OPC_ANDN + rexw, args[0], args[2], args[1]);
+        }
+        break;
+
      OP_32_64(mul):
          if (const_args[2]) {
              int32_t val;
@@ -1799,19 +1887,28 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
  
      OP_32_64(shl):
          c = SHIFT_SHL;
-        goto gen_shift;
+        vexop = OPC_SHLX;
+        goto gen_shift_maybe_vex;
      OP_32_64(shr):
          c = SHIFT_SHR;
-        goto gen_shift;
+        vexop = OPC_SHRX;
+        goto gen_shift_maybe_vex;
      OP_32_64(sar):
          c = SHIFT_SAR;
-        goto gen_shift;
+        vexop = OPC_SARX;
+        goto gen_shift_maybe_vex;
      OP_32_64(rotl):
          c = SHIFT_ROL;
          goto gen_shift;
      OP_32_64(rotr):
          c = SHIFT_ROR;
          goto gen_shift;
+    gen_shift_maybe_vex:
+        if (have_bmi2 && !const_args[2]) {
+            tcg_out_vex_modrm(s, vexop + rexw, args[0], args[2], args[1]);
+            break;
+        }
+        /* FALLTHRU */
      gen_shift:
          if (const_args[2]) {
              tcg_out_shifti(s, c + rexw, args[0], args[2]);
@@ -2002,10 +2099,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
      { INDEX_op_and_i32, { "r", "0", "ri" } },
      { INDEX_op_or_i32, { "r", "0", "ri" } },
      { INDEX_op_xor_i32, { "r", "0", "ri" } },
+    { INDEX_op_andc_i32, { "r", "r", "ri" } },
  
-    { INDEX_op_shl_i32, { "r", "0", "ci" } },
-    { INDEX_op_shr_i32, { "r", "0", "ci" } },
-    { INDEX_op_sar_i32, { "r", "0", "ci" } },
+    { INDEX_op_shl_i32, { "r", "0", "Ci" } },
+    { INDEX_op_shr_i32, { "r", "0", "Ci" } },
+    { INDEX_op_sar_i32, { "r", "0", "Ci" } },
      { INDEX_op_rotl_i32, { "r", "0", "ci" } },
      { INDEX_op_rotr_i32, { "r", "0", "ci" } },
  
@@ -2059,10 +2157,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
      { INDEX_op_and_i64, { "r", "0", "reZ" } },
      { INDEX_op_or_i64, { "r", "0", "re" } },
      { INDEX_op_xor_i64, { "r", "0", "re" } },
+    { INDEX_op_andc_i64, { "r", "r", "rI" } },
  
-    { INDEX_op_shl_i64, { "r", "0", "ci" } },
-    { INDEX_op_shr_i64, { "r", "0", "ci" } },
-    { INDEX_op_sar_i64, { "r", "0", "ci" } },
+    { INDEX_op_shl_i64, { "r", "0", "Ci" } },
+    { INDEX_op_shr_i64, { "r", "0", "Ci" } },
+    { INDEX_op_sar_i64, { "r", "0", "Ci" } },
      { INDEX_op_rotl_i64, { "r", "0", "ci" } },
      { INDEX_op_rotr_i64, { "r", "0", "ci" } },
  
@@ -2196,23 +2295,34 @@ static void tcg_target_qemu_prologue(TCGContext *s)
  
  static void tcg_target_init(TCGContext *s)
  {
-#if !(defined(have_cmov) && defined(have_movbe))
-    {
-        unsigned a, b, c, d;
-        int ret = __get_cpuid(1, &a, &b, &c, &d);
+#ifdef CONFIG_CPUID_H
+    unsigned a, b, c, d;
+    int max = __get_cpuid_max(0, 0);
  
-# ifndef have_cmov
+    if (max >= 1) {
+        __cpuid(1, a, b, c, d);
+#ifndef have_cmov
          /* For 32-bit, 99% certainty that we're running on hardware that
             supports cmov, but we still need to check.  In case cmov is not
             available, we'll use a small forward branch.  */
-        have_cmov = ret && (d & bit_CMOV);
-# endif
-
-# ifndef have_movbe
+        have_cmov = (d & bit_CMOV) != 0;
+#endif
+#ifndef have_movbe
          /* MOVBE is only available on Intel Atom and Haswell CPUs, so we
             need to probe for it.  */
-        have_movbe = ret && (c & bit_MOVBE);
-# endif
+        have_movbe = (c & bit_MOVBE) != 0;
+#endif
+    }
+
+    if (max >= 7) {
+        /* BMI1 is available on AMD Piledriver and Intel Haswell CPUs.  */
+        __cpuid_count(7, 0, a, b, c, d);
+#ifdef bit_BMI
+        have_bmi1 = (b & bit_BMI) != 0;
+#endif
+#ifndef have_bmi2
+        have_bmi2 = (b & bit_BMI2) != 0;
+#endif
      }
  #endif