tcg/s390x: Remove DISTINCT_OPERANDS facility check

[qemu.git] / tcg / s390x / tcg-target.c.inc
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc

index 063f720199ad0798445b2d76c8c283cd31c0d601..e4403ffabf0f0ee6776c5e67f5ee4e84f3a89df0 100644 (file)
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -29,14 +29,10 @@
  #error "unsupported code generation mode"
  #endif
  
+#include "../tcg-ldst.c.inc"
  #include "../tcg-pool.c.inc"
  #include "elf.h"
  
-/* ??? The translation blocks produced by TCG are generally small enough to
-   be entirely reachable with a 16-bit displacement.  Leaving the option for
-   a 32-bit displacement here Just In Case.  */
-#define USE_LONG_BRANCHES 0
-
  #define TCG_CT_CONST_S16   0x100
  #define TCG_CT_CONST_S32   0x200
  #define TCG_CT_CONST_S33   0x400
@@ -64,12 +60,6 @@
  /* A scratch register that may be be used throughout the backend.  */
  #define TCG_TMP0        TCG_REG_R1
  
-/* A scratch register that holds a pointer to the beginning of the TB.
-   We don't need this when we have pc-relative loads with the general
-   instructions extension facility.  */
-#define TCG_REG_TB      TCG_REG_R12
-#define USE_REG_TB      (!HAVE_FACILITY(GEN_INST_EXT))
-
  #ifndef CONFIG_SOFTMMU
  #define TCG_GUEST_BASE_REG TCG_REG_R13
  #endif
@@ -136,6 +126,7 @@ typedef enum S390Opcode {
      RI_OIHL     = 0xa509,
      RI_OILH     = 0xa50a,
      RI_OILL     = 0xa50b,
+    RI_TMLL     = 0xa701,
  
      RIE_CGIJ    = 0xec7c,
      RIE_CGRJ    = 0xec64,
@@ -270,17 +261,41 @@ typedef enum S390Opcode {
      VRIb_VGM    = 0xe746,
      VRIc_VREP   = 0xe74d,
  
+    VRRa_VLC    = 0xe7de,
+    VRRa_VLP    = 0xe7df,
      VRRa_VLR    = 0xe756,
      VRRc_VA     = 0xe7f3,
      VRRc_VCEQ   = 0xe7f8,   /* we leave the m5 cs field 0 */
      VRRc_VCH    = 0xe7fb,   /* " */
      VRRc_VCHL   = 0xe7f9,   /* " */
+    VRRc_VERLLV = 0xe773,
+    VRRc_VESLV  = 0xe770,
+    VRRc_VESRAV = 0xe77a,
+    VRRc_VESRLV = 0xe778,
+    VRRc_VML    = 0xe7a2,
+    VRRc_VMN    = 0xe7fe,
+    VRRc_VMNL   = 0xe7fc,
+    VRRc_VMX    = 0xe7ff,
+    VRRc_VMXL   = 0xe7fd,
      VRRc_VN     = 0xe768,
+    VRRc_VNC    = 0xe769,
+    VRRc_VNN    = 0xe76e,
+    VRRc_VNO    = 0xe76b,
+    VRRc_VNX    = 0xe76c,
      VRRc_VO     = 0xe76a,
+    VRRc_VOC    = 0xe76f,
+    VRRc_VPKS   = 0xe797,   /* we leave the m5 cs field 0 */
      VRRc_VS     = 0xe7f7,
+    VRRa_VUPH   = 0xe7d7,
+    VRRa_VUPL   = 0xe7d6,
      VRRc_VX     = 0xe76d,
+    VRRe_VSEL   = 0xe78d,
      VRRf_VLVGP  = 0xe762,
  
+    VRSa_VERLL  = 0xe733,
+    VRSa_VESL   = 0xe730,
+    VRSa_VESRA  = 0xe73a,
+    VRSa_VESRL  = 0xe738,
      VRSb_VLVG   = 0xe722,
      VRSc_VLGV   = 0xe721,
  
@@ -416,22 +431,22 @@ static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
      [MO_LESW] = helper_le_ldsw_mmu,
      [MO_LEUL] = helper_le_ldul_mmu,
      [MO_LESL] = helper_le_ldsl_mmu,
-    [MO_LEQ]  = helper_le_ldq_mmu,
+    [MO_LEUQ] = helper_le_ldq_mmu,
      [MO_BEUW] = helper_be_lduw_mmu,
      [MO_BESW] = helper_be_ldsw_mmu,
      [MO_BEUL] = helper_be_ldul_mmu,
      [MO_BESL] = helper_be_ldsl_mmu,
-    [MO_BEQ]  = helper_be_ldq_mmu,
+    [MO_BEUQ] = helper_be_ldq_mmu,
  };
  
  static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
      [MO_UB]   = helper_ret_stb_mmu,
      [MO_LEUW] = helper_le_stw_mmu,
      [MO_LEUL] = helper_le_stl_mmu,
-    [MO_LEQ]  = helper_le_stq_mmu,
+    [MO_LEUQ] = helper_le_stq_mmu,
      [MO_BEUW] = helper_be_stw_mmu,
      [MO_BEUL] = helper_be_stl_mmu,
-    [MO_BEQ]  = helper_be_stq_mmu,
+    [MO_BEUQ] = helper_be_stq_mmu,
  };
  #endif
  
@@ -626,6 +641,18 @@ static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
      tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
  }
  
+static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
+                              TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(is_vector_reg(v2));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_debug_assert(is_vector_reg(v4));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
+    tcg_out16(s, v3 << 12);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
+}
+
  static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
                                TCGReg v1, TCGReg r2, TCGReg r3)
  {
@@ -637,6 +664,18 @@ static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
      tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
  }
  
+static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
+                              intptr_t d2, TCGReg b2, TCGReg v3, int m4)
+{
+    tcg_debug_assert(is_vector_reg(v1));
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(is_general_reg(b2));
+    tcg_debug_assert(is_vector_reg(v3));
+    tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
+    tcg_out16(s, b2 << 12 | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
+}
+
  static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
                                intptr_t d2, TCGReg b2, TCGReg r3, int m4)
  {
@@ -752,9 +791,9 @@ static bool maybe_out_small_movi(TCGContext *s, TCGType type,
      }
  
      for (i = 0; i < 4; i++) {
-        tcg_target_long mask = 0xffffull << i*16;
+        tcg_target_long mask = 0xffffull << i * 16;
          if ((uval & mask) == uval) {
-            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i*16);
+            tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i * 16);
              return true;
          }
      }
@@ -763,8 +802,8 @@ static bool maybe_out_small_movi(TCGContext *s, TCGType type,
  }
  
  /* load a register with an immediate value */
-static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
-                             tcg_target_long sval, bool in_prologue)
+static void tcg_out_movi(TCGContext *s, TCGType type,
+                         TCGReg ret, tcg_target_long sval)
  {
      tcg_target_ulong uval;
  
@@ -780,19 +819,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
      }
  
      /* Try all 48-bit insns that can load it in one go.  */
-    if (HAVE_FACILITY(EXT_IMM)) {
-        if (sval == (int32_t)sval) {
-            tcg_out_insn(s, RIL, LGFI, ret, sval);
-            return;
-        }
-        if (uval <= 0xffffffff) {
-            tcg_out_insn(s, RIL, LLILF, ret, uval);
-            return;
-        }
-        if ((uval & 0xffffffff) == 0) {
-            tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
-            return;
-        }
+    if (sval == (int32_t)sval) {
+        tcg_out_insn(s, RIL, LGFI, ret, sval);
+        return;
+    }
+    if (uval <= 0xffffffff) {
+        tcg_out_insn(s, RIL, LLILF, ret, uval);
+        return;
+    }
+    if ((uval & 0xffffffff) == 0) {
+        tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
+        return;
      }
  
      /* Try for PC-relative address load.  For odd addresses,
@@ -803,45 +840,11 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
              tcg_out_insn(s, RIL, LARL, ret, off);
              return;
          }
-    } else if (USE_REG_TB && !in_prologue) {
-        ptrdiff_t off = tcg_tbrel_diff(s, (void *)sval);
-        if (off == sextract64(off, 0, 20)) {
-            /* This is certain to be an address within TB, and therefore
-               OFF will be negative; don't try RX_LA.  */
-            tcg_out_insn(s, RXY, LAY, ret, TCG_REG_TB, TCG_REG_NONE, off);
-            return;
-        }
-    }
-
-    /* A 32-bit unsigned value can be loaded in 2 insns.  And given
-       that LLILL, LLIHL, LLILF above did not succeed, we know that
-       both insns are required.  */
-    if (uval <= 0xffffffff) {
-        tcg_out_insn(s, RI, LLILL, ret, uval);
-        tcg_out_insn(s, RI, IILH, ret, uval >> 16);
-        return;
      }
  
      /* Otherwise, stuff it in the constant pool.  */
-    if (HAVE_FACILITY(GEN_INST_EXT)) {
-        tcg_out_insn(s, RIL, LGRL, ret, 0);
-        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
-    } else if (USE_REG_TB && !in_prologue) {
-        tcg_out_insn(s, RXY, LG, ret, TCG_REG_TB, TCG_REG_NONE, 0);
-        new_pool_label(s, sval, R_390_20, s->code_ptr - 2,
-                       tcg_tbrel_diff(s, NULL));
-    } else {
-        TCGReg base = ret ? ret : TCG_TMP0;
-        tcg_out_insn(s, RIL, LARL, base, 0);
-        new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
-        tcg_out_insn(s, RXY, LG, ret, base, TCG_REG_NONE, 0);
-    }
-}
-
-static void tcg_out_movi(TCGContext *s, TCGType type,
-                         TCGReg ret, tcg_target_long sval)
-{
-    tcg_out_movi_int(s, type, ret, sval, false);
+    tcg_out_insn(s, RIL, LGRL, ret, 0);
+    new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
  }
  
  /* Emit a load/store type instruction.  Inputs are:
@@ -970,35 +973,6 @@ static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
      return false;
  }
  
-/* load data from an absolute host address */
-static void tcg_out_ld_abs(TCGContext *s, TCGType type,
-                           TCGReg dest, const void *abs)
-{
-    intptr_t addr = (intptr_t)abs;
-
-    if (HAVE_FACILITY(GEN_INST_EXT) && !(addr & 1)) {
-        ptrdiff_t disp = tcg_pcrel_diff(s, abs) >> 1;
-        if (disp == (int32_t)disp) {
-            if (type == TCG_TYPE_I32) {
-                tcg_out_insn(s, RIL, LRL, dest, disp);
-            } else {
-                tcg_out_insn(s, RIL, LGRL, dest, disp);
-            }
-            return;
-        }
-    }
-    if (USE_REG_TB) {
-        ptrdiff_t disp = tcg_tbrel_diff(s, abs);
-        if (disp == sextract64(disp, 0, 20)) {
-            tcg_out_ld(s, type, dest, TCG_REG_TB, disp);
-            return;
-        }
-    }
-
-    tcg_out_movi(s, TCG_TYPE_PTR, dest, addr & ~0xffff);
-    tcg_out_ld(s, type, dest, dest, addr & 0xffff);
-}
-
  static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
                                   int msb, int lsb, int ofs, int z)
  {
@@ -1010,82 +984,22 @@ static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
  
  static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
  {
-    if (HAVE_FACILITY(EXT_IMM)) {
-        tcg_out_insn(s, RRE, LGBR, dest, src);
-        return;
-    }
-
-    if (type == TCG_TYPE_I32) {
-        if (dest == src) {
-            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 24);
-        } else {
-            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 24);
-        }
-        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 24);
-    } else {
-        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 56);
-        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 56);
-    }
+    tcg_out_insn(s, RRE, LGBR, dest, src);
  }
  
  static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
  {
-    if (HAVE_FACILITY(EXT_IMM)) {
-        tcg_out_insn(s, RRE, LLGCR, dest, src);
-        return;
-    }
-
-    if (dest == src) {
-        tcg_out_movi(s, type, TCG_TMP0, 0xff);
-        src = TCG_TMP0;
-    } else {
-        tcg_out_movi(s, type, dest, 0xff);
-    }
-    if (type == TCG_TYPE_I32) {
-        tcg_out_insn(s, RR, NR, dest, src);
-    } else {
-        tcg_out_insn(s, RRE, NGR, dest, src);
-    }
+    tcg_out_insn(s, RRE, LLGCR, dest, src);
  }
  
  static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
  {
-    if (HAVE_FACILITY(EXT_IMM)) {
-        tcg_out_insn(s, RRE, LGHR, dest, src);
-        return;
-    }
-
-    if (type == TCG_TYPE_I32) {
-        if (dest == src) {
-            tcg_out_sh32(s, RS_SLL, dest, TCG_REG_NONE, 16);
-        } else {
-            tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 16);
-        }
-        tcg_out_sh32(s, RS_SRA, dest, TCG_REG_NONE, 16);
-    } else {
-        tcg_out_sh64(s, RSY_SLLG, dest, src, TCG_REG_NONE, 48);
-        tcg_out_sh64(s, RSY_SRAG, dest, dest, TCG_REG_NONE, 48);
-    }
+    tcg_out_insn(s, RRE, LGHR, dest, src);
  }
  
  static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
  {
-    if (HAVE_FACILITY(EXT_IMM)) {
-        tcg_out_insn(s, RRE, LLGHR, dest, src);
-        return;
-    }
-
-    if (dest == src) {
-        tcg_out_movi(s, type, TCG_TMP0, 0xffff);
-        src = TCG_TMP0;
-    } else {
-        tcg_out_movi(s, type, dest, 0xffff);
-    }
-    if (type == TCG_TYPE_I32) {
-        tcg_out_insn(s, RR, NR, dest, src);
-    } else {
-        tcg_out_insn(s, RRE, NGR, dest, src);
-    }
+    tcg_out_insn(s, RRE, LLGHR, dest, src);
  }
  
  static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
@@ -1158,52 +1072,38 @@ static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
          tgen_ext32u(s, dest, dest);
          return;
      }
-    if (HAVE_FACILITY(EXT_IMM)) {
-        if ((val & valid) == 0xff) {
-            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
-            return;
-        }
-        if ((val & valid) == 0xffff) {
-            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
-            return;
-        }
+    if ((val & valid) == 0xff) {
+        tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
+        return;
+    }
+    if ((val & valid) == 0xffff) {
+        tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
+        return;
      }
  
      /* Try all 32-bit insns that can perform it in one go.  */
      for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = ~(0xffffull << i*16);
+        tcg_target_ulong mask = ~(0xffffull << i * 16);
          if (((val | ~valid) & mask) == mask) {
-            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            tcg_out_insn_RI(s, ni_insns[i], dest, val >> i * 16);
              return;
          }
      }
  
      /* Try all 48-bit insns that can perform it in one go.  */
-    if (HAVE_FACILITY(EXT_IMM)) {
-        for (i = 0; i < 2; i++) {
-            tcg_target_ulong mask = ~(0xffffffffull << i*32);
-            if (((val | ~valid) & mask) == mask) {
-                tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
-                return;
-            }
+    for (i = 0; i < 2; i++) {
+        tcg_target_ulong mask = ~(0xffffffffull << i * 32);
+        if (((val | ~valid) & mask) == mask) {
+            tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i * 32);
+            return;
          }
      }
-    if (HAVE_FACILITY(GEN_INST_EXT) && risbg_mask(val)) {
+    if (risbg_mask(val)) {
          tgen_andi_risbg(s, dest, dest, val);
          return;
      }
  
-    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
-    if (USE_REG_TB) {
-        if (!maybe_out_small_movi(s, type, TCG_TMP0, val)) {
-            tcg_out_insn(s, RXY, NG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
-            new_pool_label(s, val & valid, R_390_20, s->code_ptr - 2,
-                           tcg_tbrel_diff(s, NULL));
-            return;
-        }
-    } else {
-        tcg_out_movi(s, type, TCG_TMP0, val);
-    }
+    tcg_out_movi(s, type, TCG_TMP0, val);
      if (type == TCG_TYPE_I32) {
          tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
      } else {
@@ -1229,40 +1129,32 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
  
      /* Try all 32-bit insns that can perform it in one go.  */
      for (i = 0; i < 4; i++) {
-        tcg_target_ulong mask = (0xffffull << i*16);
+        tcg_target_ulong mask = (0xffffull << i * 16);
          if ((val & mask) != 0 && (val & ~mask) == 0) {
-            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i*16);
+            tcg_out_insn_RI(s, oi_insns[i], dest, val >> i * 16);
              return;
          }
      }
  
      /* Try all 48-bit insns that can perform it in one go.  */
-    if (HAVE_FACILITY(EXT_IMM)) {
-        for (i = 0; i < 2; i++) {
-            tcg_target_ulong mask = (0xffffffffull << i*32);
-            if ((val & mask) != 0 && (val & ~mask) == 0) {
-                tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i*32);
-                return;
-            }
+    for (i = 0; i < 2; i++) {
+        tcg_target_ulong mask = (0xffffffffull << i * 32);
+        if ((val & mask) != 0 && (val & ~mask) == 0) {
+            tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i * 32);
+            return;
          }
      }
  
-    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
      if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
          if (type == TCG_TYPE_I32) {
              tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
          } else {
              tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
          }
-    } else if (USE_REG_TB) {
-        tcg_out_insn(s, RXY, OG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
-        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
-                       tcg_tbrel_diff(s, NULL));
      } else {
          /* Perform the OR via sequential modifications to the high and
             low parts.  Do this via recursion to handle 16-bit vs 32-bit
             masks in each half.  */
-        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
          tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
          tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
      }
@@ -1271,31 +1163,23 @@ static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
  static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
  {
      /* Try all 48-bit insns that can perform it in one go.  */
-    if (HAVE_FACILITY(EXT_IMM)) {
-        if ((val & 0xffffffff00000000ull) == 0) {
-            tcg_out_insn(s, RIL, XILF, dest, val);
-            return;
-        }
-        if ((val & 0x00000000ffffffffull) == 0) {
-            tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
-            return;
-        }
+    if ((val & 0xffffffff00000000ull) == 0) {
+        tcg_out_insn(s, RIL, XILF, dest, val);
+        return;
+    }
+    if ((val & 0x00000000ffffffffull) == 0) {
+        tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
+        return;
      }
  
-    /* Use the constant pool if USE_REG_TB, but not for small constants.  */
      if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
          if (type == TCG_TYPE_I32) {
              tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
          } else {
              tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
          }
-    } else if (USE_REG_TB) {
-        tcg_out_insn(s, RXY, XG, dest, TCG_REG_TB, TCG_REG_NONE, 0);
-        new_pool_label(s, val, R_390_20, s->code_ptr - 2,
-                       tcg_tbrel_diff(s, NULL));
      } else {
          /* Perform the xor by parts.  */
-        tcg_debug_assert(HAVE_FACILITY(EXT_IMM));
          if (val & 0xffffffff) {
              tcg_out_insn(s, RIL, XILF, dest, val);
          }
@@ -1329,46 +1213,25 @@ static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
              goto exit;
          }
  
-        if (HAVE_FACILITY(EXT_IMM)) {
-            if (type == TCG_TYPE_I32) {
-                op = (is_unsigned ? RIL_CLFI : RIL_CFI);
-                tcg_out_insn_RIL(s, op, r1, c2);
-                goto exit;
-            } else if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
-                op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
-                tcg_out_insn_RIL(s, op, r1, c2);
-                goto exit;
-            }
+        if (type == TCG_TYPE_I32) {
+            op = (is_unsigned ? RIL_CLFI : RIL_CFI);
+            tcg_out_insn_RIL(s, op, r1, c2);
+            goto exit;
+        }
+        if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
+            op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
+            tcg_out_insn_RIL(s, op, r1, c2);
+            goto exit;
          }
  
          /* Use the constant pool, but not for small constants.  */
          if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
              c2 = TCG_TMP0;
              /* fall through to reg-reg */
-        } else if (USE_REG_TB) {
-            if (type == TCG_TYPE_I32) {
-                op = (is_unsigned ? RXY_CLY : RXY_CY);
-                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
-                new_pool_label(s, (uint32_t)c2, R_390_20, s->code_ptr - 2,
-                               4 - tcg_tbrel_diff(s, NULL));
-            } else {
-                op = (is_unsigned ? RXY_CLG : RXY_CG);
-                tcg_out_insn_RXY(s, op, r1, TCG_REG_TB, TCG_REG_NONE, 0);
-                new_pool_label(s, c2, R_390_20, s->code_ptr - 2,
-                               tcg_tbrel_diff(s, NULL));
-            }
-            goto exit;
          } else {
-            if (type == TCG_TYPE_I32) {
-                op = (is_unsigned ? RIL_CLRL : RIL_CRL);
-                tcg_out_insn_RIL(s, op, r1, 0);
-                new_pool_label(s, (uint32_t)c2, R_390_PC32DBL,
-                               s->code_ptr - 2, 2 + 4);
-            } else {
-                op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
-                tcg_out_insn_RIL(s, op, r1, 0);
-                new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
-            }
+            op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
+            tcg_out_insn_RIL(s, op, r1, 0);
+            new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
              goto exit;
          }
      }
@@ -1389,7 +1252,6 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
                           TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
  {
      int cc;
-    bool have_loc;
  
      /* With LOC2, we can always emit the minimum 3 insns.  */
      if (HAVE_FACILITY(LOAD_ON_COND2)) {
@@ -1400,9 +1262,6 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
          return;
      }
  
-    have_loc = HAVE_FACILITY(LOAD_ON_COND);
-
-    /* For HAVE_LOC, only the paths through GTU/GT/LEU/LE are smaller.  */
   restart:
      switch (cond) {
      case TCG_COND_NE:
@@ -1447,59 +1306,35 @@ static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
      case TCG_COND_LT:
      case TCG_COND_GE:
          /* Swap operands so that we can use LEU/GTU/GT/LE.  */
-        if (c2const) {
-            if (have_loc) {
-                break;
-            }
-            tcg_out_movi(s, type, TCG_TMP0, c2);
-            c2 = c1;
-            c2const = 0;
-            c1 = TCG_TMP0;
-        } else {
+        if (!c2const) {
              TCGReg t = c1;
              c1 = c2;
              c2 = t;
+            cond = tcg_swap_cond(cond);
+            goto restart;
          }
-        cond = tcg_swap_cond(cond);
-        goto restart;
+        break;
  
      default:
          g_assert_not_reached();
      }
  
      cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
-    if (have_loc) {
-        /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
-        tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
-        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
-        tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
-    } else {
-        /* Emit: d = 1; if (cc) goto over; d = 0; over:  */
-        tcg_out_movi(s, type, dest, 1);
-        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
-        tcg_out_movi(s, type, dest, 0);
-    }
+    /* Emit: d = 0, t = 1, d = (cc ? t : d).  */
+    tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
+    tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
+    tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
  }
  
  static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
                           TCGReg c1, TCGArg c2, int c2const,
                           TCGArg v3, int v3const)
  {
-    int cc;
-    if (HAVE_FACILITY(LOAD_ON_COND)) {
-        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
-        if (v3const) {
-            tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
-        } else {
-            tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
-        }
+    int cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
+    if (v3const) {
+        tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
      } else {
-        c = tcg_invert_cond(c);
-        cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
-
-        /* Emit: if (cc) goto over; dest = r3; over:  */
-        tcg_out_insn(s, RI, BRC, cc, (4 + 4) >> 1);
-        tcg_out_insn(s, RRE, LGR, dest, v3);
+        tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
      }
  }
  
@@ -1519,14 +1354,8 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
          } else {
              tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
          }
-        if (HAVE_FACILITY(LOAD_ON_COND)) {
-            /* Emit: if (one bit found) dest = r0.  */
-            tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
-        } else {
-            /* Emit: if (no one bit found) goto over; dest = r0; over:  */
-            tcg_out_insn(s, RI, BRC, 8, (4 + 4) >> 1);
-            tcg_out_insn(s, RRE, LGR, dest, TCG_REG_R0);
-        }
+        /* Emit: if (one bit found) dest = r0.  */
+        tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
      }
  }
  
@@ -1561,10 +1390,6 @@ static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
  {
      if (l->has_value) {
          tgen_gotoi(s, cc, l->u.value_ptr);
-    } else if (USE_LONG_BRANCHES) {
-        tcg_out16(s, RIL_BRCL | (cc << 4));
-        tcg_out_reloc(s, s->code_ptr, R_390_PC32DBL, l, 2);
-        s->code_ptr += 2;
      } else {
          tcg_out16(s, RI_BRC | (cc << 4));
          tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
@@ -1594,54 +1419,53 @@ static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
                          TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
  {
      int cc;
+    bool is_unsigned = is_unsigned_cond(c);
+    bool in_range;
+    S390Opcode opc;
  
-    if (HAVE_FACILITY(GEN_INST_EXT)) {
-        bool is_unsigned = is_unsigned_cond(c);
-        bool in_range;
-        S390Opcode opc;
-
-        cc = tcg_cond_to_s390_cond[c];
+    cc = tcg_cond_to_s390_cond[c];
  
-        if (!c2const) {
-            opc = (type == TCG_TYPE_I32
-                   ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
-                   : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
-            tgen_compare_branch(s, opc, cc, r1, c2, l);
-            return;
-        }
+    if (!c2const) {
+        opc = (type == TCG_TYPE_I32
+               ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
+               : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
+        tgen_compare_branch(s, opc, cc, r1, c2, l);
+        return;
+    }
  
-        /* COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
-           If the immediate we've been given does not fit that range, we'll
-           fall back to separate compare and branch instructions using the
-           larger comparison range afforded by COMPARE IMMEDIATE.  */
-        if (type == TCG_TYPE_I32) {
-            if (is_unsigned) {
-                opc = RIE_CLIJ;
-                in_range = (uint32_t)c2 == (uint8_t)c2;
-            } else {
-                opc = RIE_CIJ;
-                in_range = (int32_t)c2 == (int8_t)c2;
-            }
+    /*
+     * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
+     * If the immediate we've been given does not fit that range, we'll
+     * fall back to separate compare and branch instructions using the
+     * larger comparison range afforded by COMPARE IMMEDIATE.
+     */
+    if (type == TCG_TYPE_I32) {
+        if (is_unsigned) {
+            opc = RIE_CLIJ;
+            in_range = (uint32_t)c2 == (uint8_t)c2;
          } else {
-            if (is_unsigned) {
-                opc = RIE_CLGIJ;
-                in_range = (uint64_t)c2 == (uint8_t)c2;
-            } else {
-                opc = RIE_CGIJ;
-                in_range = (int64_t)c2 == (int8_t)c2;
-            }
+            opc = RIE_CIJ;
+            in_range = (int32_t)c2 == (int8_t)c2;
          }
-        if (in_range) {
-            tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
-            return;
+    } else {
+        if (is_unsigned) {
+            opc = RIE_CLGIJ;
+            in_range = (uint64_t)c2 == (uint8_t)c2;
+        } else {
+            opc = RIE_CGIJ;
+            in_range = (int64_t)c2 == (int8_t)c2;
          }
      }
+    if (in_range) {
+        tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
+        return;
+    }
  
      cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
      tgen_branch(s, cc, l);
  }
  
-static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
  {
      ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
      if (off == (int32_t)off) {
@@ -1652,6 +1476,12 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest)
      }
  }
  
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
+                         const TCGHelperInfo *info)
+{
+    tcg_out_call_int(s, dest);
+}
+
  static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
                                     TCGReg base, TCGReg index, int disp)
  {
@@ -1699,10 +1529,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
          tcg_out_insn(s, RXY, LGF, data, base, index, disp);
          break;
  
-    case MO_Q | MO_BSWAP:
+    case MO_UQ | MO_BSWAP:
          tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
          break;
-    case MO_Q:
+    case MO_UQ:
          tcg_out_insn(s, RXY, LG, data, base, index, disp);
          break;
  
@@ -1745,10 +1575,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
          }
          break;
  
-    case MO_Q | MO_BSWAP:
+    case MO_UQ | MO_BSWAP:
          tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
          break;
-    case MO_Q:
+    case MO_UQ:
          tcg_out_insn(s, RXY, STG, data, base, index, disp);
          break;
  
@@ -1758,8 +1588,6 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
  }
  
  #if defined(CONFIG_SOFTMMU)
-#include "../tcg-ldst.c.inc"
-
  /* We're expecting to use a 20-bit negative offset on the tlb memory ops.  */
  QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
  QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
@@ -1789,7 +1617,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
         cross pages using the address of the last byte of the access.  */
      a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
      tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
-    if (HAVE_FACILITY(GEN_INST_EXT) && a_off == 0) {
+    if (a_off == 0) {
          tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
      } else {
          tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
@@ -1849,7 +1677,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
      }
      tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
      tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
-    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
+    tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
      tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
  
      tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
@@ -1882,7 +1710,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
      case MO_UL:
          tgen_ext32u(s, TCG_REG_R4, data_reg);
          break;
-    case MO_Q:
+    case MO_UQ:
          tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
          break;
      default:
@@ -1890,12 +1718,59 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
      }
      tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
      tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
-    tcg_out_call(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
  
      tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
      return true;
  }
  #else
+static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
+                                   TCGReg addrlo, unsigned a_bits)
+{
+    unsigned a_mask = (1 << a_bits) - 1;
+    TCGLabelQemuLdst *l = new_ldst_label(s);
+
+    l->is_ld = is_ld;
+    l->addrlo_reg = addrlo;
+
+    /* We are expecting a_bits to max out at 7, much lower than TMLL. */
+    tcg_debug_assert(a_bits < 16);
+    tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
+
+    tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
+    l->label_ptr[0] = s->code_ptr;
+    s->code_ptr += 1;
+
+    l->raddr = tcg_splitwx_to_rx(s->code_ptr);
+}
+
+static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
+                     (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
+        return false;
+    }
+
+    tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
+    tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
+
+    /* "Tail call" to the helper, with the return address back inline. */
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
+    tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
+                                                 : helper_unaligned_st));
+    return true;
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    return tcg_out_fail_alignment(s, l);
+}
+
+static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+    return tcg_out_fail_alignment(s, l);
+}
+
  static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
                                    TCGReg *index_reg, tcg_target_long *disp)
  {
@@ -1934,7 +1809,11 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
  #else
      TCGReg index_reg;
      tcg_target_long disp;
+    unsigned a_bits = get_alignment_bits(opc);
  
+    if (a_bits) {
+        tcg_out_test_alignment(s, true, addr_reg, a_bits);
+    }
      tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
      tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
  #endif
@@ -1961,7 +1840,11 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
  #else
      TCGReg index_reg;
      tcg_target_long disp;
+    unsigned a_bits = get_alignment_bits(opc);
  
+    if (a_bits) {
+        tcg_out_test_alignment(s, false, addr_reg, a_bits);
+    }
      tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
      tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
  #endif
@@ -1992,43 +1875,21 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
  
      case INDEX_op_goto_tb:
          a0 = args[0];
-        if (s->tb_jmp_insn_offset) {
-            /*
-             * branch displacement must be aligned for atomic patching;
-             * see if we need to add extra nop before branch
-             */
-            if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
-                tcg_out16(s, NOP);
-            }
-            tcg_debug_assert(!USE_REG_TB);
-            tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
-            s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
-            s->code_ptr += 2;
-        } else {
-            /* load address stored at s->tb_jmp_target_addr + a0 */
-            tcg_out_ld_abs(s, TCG_TYPE_PTR, TCG_REG_TB,
-                           tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0));
-            /* and go there */
-            tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
-        }
+        /*
+         * branch displacement must be aligned for atomic patching;
+         * see if we need to add extra nop before branch
+         */
+        if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
+            tcg_out16(s, NOP);
+        }
+        tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
+        s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+        s->code_ptr += 2;
          set_jmp_reset_offset(s, a0);
-
-        /* For the unlinked path of goto_tb, we need to reset
-           TCG_REG_TB to the beginning of this TB.  */
-        if (USE_REG_TB) {
-            int ofs = -tcg_current_code_size(s);
-            /* All TB are restricted to 64KiB by unwind info. */
-            tcg_debug_assert(ofs == sextract64(ofs, 0, 20));
-            tcg_out_insn(s, RXY, LAY, TCG_REG_TB,
-                         TCG_REG_TB, TCG_REG_NONE, ofs);
-        }
          break;
  
      case INDEX_op_goto_ptr:
          a0 = args[0];
-        if (USE_REG_TB) {
-            tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0);
-        }
          tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
          break;
  
@@ -2080,10 +1941,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                      tcg_out_insn(s, RI, AHI, a0, a2);
                      break;
                  }
-                if (HAVE_FACILITY(EXT_IMM)) {
-                    tcg_out_insn(s, RIL, AFI, a0, a2);
-                    break;
-                }
+                tcg_out_insn(s, RIL, AFI, a0, a2);
+                break;
              }
              tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
          } else if (a0 == a1) {
@@ -2155,10 +2014,18 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          break;
  
      case INDEX_op_div2_i32:
-        tcg_out_insn(s, RR, DR, TCG_REG_R2, args[4]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RR, DR, args[1], args[4]);
          break;
      case INDEX_op_divu2_i32:
-        tcg_out_insn(s, RRE, DLR, TCG_REG_R2, args[4]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, DLR, args[1], args[4]);
          break;
  
      case INDEX_op_shl_i32:
@@ -2326,17 +2193,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
                      tcg_out_insn(s, RI, AGHI, a0, a2);
                      break;
                  }
-                if (HAVE_FACILITY(EXT_IMM)) {
-                    if (a2 == (int32_t)a2) {
-                        tcg_out_insn(s, RIL, AGFI, a0, a2);
-                        break;
-                    } else if (a2 == (uint32_t)a2) {
-                        tcg_out_insn(s, RIL, ALGFI, a0, a2);
-                        break;
-                    } else if (-a2 == (uint32_t)-a2) {
-                        tcg_out_insn(s, RIL, SLGFI, a0, -a2);
-                        break;
-                    }
+                if (a2 == (int32_t)a2) {
+                    tcg_out_insn(s, RIL, AGFI, a0, a2);
+                    break;
+                }
+                if (a2 == (uint32_t)a2) {
+                    tcg_out_insn(s, RIL, ALGFI, a0, a2);
+                    break;
+                }
+                if (-a2 == (uint32_t)-a2) {
+                    tcg_out_insn(s, RIL, SLGFI, a0, -a2);
+                    break;
                  }
              }
              tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
@@ -2351,8 +2218,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          if (const_args[2]) {
              a2 = -a2;
              goto do_addi_64;
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, SGR, a0, a2);
          } else {
              tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
          }
@@ -2363,8 +2228,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          if (const_args[2]) {
              tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
              tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
          } else {
              tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
          }
@@ -2374,8 +2237,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          if (const_args[2]) {
              tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
              tgen_ori(s, TCG_TYPE_I64, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, OGR, a0, a2);
          } else {
              tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
          }
@@ -2385,8 +2246,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          if (const_args[2]) {
              tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
              tgen_xori(s, TCG_TYPE_I64, a0, a2);
-        } else if (a0 == a1) {
-            tcg_out_insn(s, RRE, XGR, a0, a2);
          } else {
              tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
          }
@@ -2412,17 +2271,30 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          break;
  
      case INDEX_op_div2_i64:
-        /* ??? We get an unnecessary sign-extension of the dividend
-           into R3 with this definition, but as we do in fact always
-           produce both quotient and remainder using INDEX_op_div_i64
-           instead requires jumping through even more hoops.  */
-        tcg_out_insn(s, RRE, DSGR, TCG_REG_R2, args[4]);
+        /*
+         * ??? We get an unnecessary sign-extension of the dividend
+         * into op0 with this definition, but as we do in fact always
+         * produce both quotient and remainder using INDEX_op_div_i64
+         * instead requires jumping through even more hoops.
+         */
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
          break;
      case INDEX_op_divu2_i64:
-        tcg_out_insn(s, RRE, DLGR, TCG_REG_R2, args[4]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert(args[1] == args[3]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
          break;
      case INDEX_op_mulu2_i64:
-        tcg_out_insn(s, RRE, MLGR, TCG_REG_R2, args[3]);
+        tcg_debug_assert(args[0] == args[2]);
+        tcg_debug_assert((args[1] & 1) == 0);
+        tcg_debug_assert(args[0] == args[1] + 1);
+        tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
          break;
  
      case INDEX_op_shl_i64:
@@ -2551,7 +2423,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
          /* The host memory model is quite strong, we simply need to
             serialize the instruction stream.  */
          if (args[0] & TCG_MO_ST_LD) {
-            tcg_out_insn(s, RR, BCR, HAVE_FACILITY(FAST_BCR_SER) ? 14 : 15, 0);
+            /* fast-bcr-serialization facility (45) is present */
+            tcg_out_insn(s, RR, BCR, 14, 0);
          }
          break;
  
@@ -2572,6 +2445,7 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
          if (vece == MO_64) {
              return true;
          }
+        src = dst;
      }
  
      /*
@@ -2612,7 +2486,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
                  msb = clz32(val);
                  lsb = 31 - ctz32(val);
              }
-            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_32);
+            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
              return;
          }
      } else {
@@ -2626,7 +2500,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
                  msb = clz64(val);
                  lsb = 63 - ctz64(val);
              }
-            tcg_out_insn(s, VRIb, VGM, dst, lsb, msb, MO_64);
+            tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
              return;
          }
      }
@@ -2653,7 +2527,8 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
  
  static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
                             unsigned vecl, unsigned vece,
-                           const TCGArg *args, const int *const_args)
+                           const TCGArg args[TCG_MAX_OP_ARGS],
+                           const int const_args[TCG_MAX_OP_ARGS])
  {
      TCGType type = vecl + TCG_TYPE_V64;
      TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
@@ -2669,6 +2544,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
          tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
          break;
  
+    case INDEX_op_abs_vec:
+        tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
+        break;
+    case INDEX_op_neg_vec:
+        tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
+        break;
+    case INDEX_op_not_vec:
+        tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
+        break;
+
      case INDEX_op_add_vec:
          tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
          break;
@@ -2678,12 +2563,84 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
      case INDEX_op_and_vec:
          tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
          break;
+    case INDEX_op_andc_vec:
+        tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
+        break;
+    case INDEX_op_mul_vec:
+        tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
+        break;
      case INDEX_op_or_vec:
          tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
          break;
+    case INDEX_op_orc_vec:
+        tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
+        break;
      case INDEX_op_xor_vec:
          tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
          break;
+    case INDEX_op_nand_vec:
+        tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
+        break;
+    case INDEX_op_nor_vec:
+        tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
+        break;
+    case INDEX_op_eqv_vec:
+        tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
+        break;
+
+    case INDEX_op_shli_vec:
+        tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_shri_vec:
+        tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_sari_vec:
+        tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_rotli_vec:
+        tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
+        break;
+    case INDEX_op_shls_vec:
+        tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_shrs_vec:
+        tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_sars_vec:
+        tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_rotls_vec:
+        tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
+        break;
+    case INDEX_op_shlv_vec:
+        tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
+        break;
+    case INDEX_op_shrv_vec:
+        tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
+        break;
+    case INDEX_op_sarv_vec:
+        tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
+        break;
+    case INDEX_op_rotlv_vec:
+        tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
+        break;
+
+    case INDEX_op_smin_vec:
+        tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
+        break;
+    case INDEX_op_smax_vec:
+        tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
+        break;
+    case INDEX_op_umin_vec:
+        tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
+        break;
+    case INDEX_op_umax_vec:
+        tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
+        break;
+
+    case INDEX_op_bitsel_vec:
+        tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
+        break;
  
      case INDEX_op_cmp_vec:
          switch ((TCGCond)args[3]) {
@@ -2701,6 +2658,16 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
          }
          break;
  
+    case INDEX_op_s390_vuph_vec:
+        tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
+        break;
+    case INDEX_op_s390_vupl_vec:
+        tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
+        break;
+    case INDEX_op_s390_vpks_vec:
+        tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
+        break;
+
      case INDEX_op_mov_vec:   /* Always emitted via tcg_out_mov.  */
      case INDEX_op_dup_vec:   /* Always emitted via tcg_out_dup_vec.  */
      default:
@@ -2711,14 +2678,46 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
  int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
  {
      switch (opc) {
+    case INDEX_op_abs_vec:
      case INDEX_op_add_vec:
      case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
+    case INDEX_op_bitsel_vec:
+    case INDEX_op_eqv_vec:
+    case INDEX_op_nand_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_nor_vec:
+    case INDEX_op_not_vec:
      case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
+    case INDEX_op_rotli_vec:
+    case INDEX_op_rotls_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_sars_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shls_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_shrs_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_smin_vec:
      case INDEX_op_sub_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_umin_vec:
      case INDEX_op_xor_vec:
          return 1;
      case INDEX_op_cmp_vec:
+    case INDEX_op_cmpsel_vec:
+    case INDEX_op_rotrv_vec:
          return -1;
+    case INDEX_op_mul_vec:
+        return vece < MO_64;
+    case INDEX_op_ssadd_vec:
+    case INDEX_op_sssub_vec:
+        return vece < MO_64 ? -1 : 0;
      default:
          return 0;
      }
@@ -2774,11 +2773,63 @@ static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
      }
  }
  
+static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
+                              TCGv_vec c1, TCGv_vec c2,
+                              TCGv_vec v3, TCGv_vec v4, TCGCond cond)
+{
+    TCGv_vec t = tcg_temp_new_vec(type);
+
+    if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
+        /* Invert the sense of the compare by swapping arguments.  */
+        tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
+    } else {
+        tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
+    }
+    tcg_temp_free_vec(t);
+}
+
+static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
+                           TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
+{
+    TCGv_vec h1 = tcg_temp_new_vec(type);
+    TCGv_vec h2 = tcg_temp_new_vec(type);
+    TCGv_vec l1 = tcg_temp_new_vec(type);
+    TCGv_vec l2 = tcg_temp_new_vec(type);
+
+    tcg_debug_assert (vece < MO_64);
+
+    /* Unpack with sign-extension. */
+    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+              tcgv_vec_arg(h1), tcgv_vec_arg(v1));
+    vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
+              tcgv_vec_arg(h2), tcgv_vec_arg(v2));
+
+    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+              tcgv_vec_arg(l1), tcgv_vec_arg(v1));
+    vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
+              tcgv_vec_arg(l2), tcgv_vec_arg(v2));
+
+    /* Arithmetic on a wider element size. */
+    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
+              tcgv_vec_arg(h1), tcgv_vec_arg(h2));
+    vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
+              tcgv_vec_arg(l1), tcgv_vec_arg(l2));
+
+    /* Pack with saturation. */
+    vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
+              tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
+
+    tcg_temp_free_vec(h1);
+    tcg_temp_free_vec(h2);
+    tcg_temp_free_vec(l1);
+    tcg_temp_free_vec(l2);
+}
+
  void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
                         TCGArg a0, ...)
  {
      va_list va;
-    TCGv_vec v0, v1, v2;
+    TCGv_vec v0, v1, v2, v3, v4, t0;
  
      va_start(va, a0);
      v0 = temp_tcgv_vec(arg_temp(a0));
@@ -2790,6 +2841,26 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
          expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
          break;
  
+    case INDEX_op_cmpsel_vec:
+        v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+        v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
+        expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
+        break;
+
+    case INDEX_op_rotrv_vec:
+        t0 = tcg_temp_new_vec(type);
+        tcg_gen_neg_vec(vece, t0, v2);
+        tcg_gen_rotlv_vec(vece, v0, v1, t0);
+        tcg_temp_free_vec(t0);
+        break;
+
+    case INDEX_op_ssadd_vec:
+        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
+        break;
+    case INDEX_op_sssub_vec:
+        expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
+        break;
+
      default:
          g_assert_not_reached();
      }
@@ -2847,29 +2918,17 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
      case INDEX_op_or_i64:
      case INDEX_op_xor_i32:
      case INDEX_op_xor_i64:
-        return (HAVE_FACILITY(DISTINCT_OPS)
-                ? C_O1_I2(r, r, ri)
-                : C_O1_I2(r, 0, ri));
+        return C_O1_I2(r, r, ri);
  
      case INDEX_op_mul_i32:
-        /* If we have the general-instruction-extensions, then we have
-           MULTIPLY SINGLE IMMEDIATE with a signed 32-bit, otherwise we
-           have only MULTIPLY HALFWORD IMMEDIATE, with a signed 16-bit.  */
-        return (HAVE_FACILITY(GEN_INST_EXT)
-                ? C_O1_I2(r, 0, ri)
-                : C_O1_I2(r, 0, rI));
-
+        return C_O1_I2(r, 0, ri);
      case INDEX_op_mul_i64:
-        return (HAVE_FACILITY(GEN_INST_EXT)
-                ? C_O1_I2(r, 0, rJ)
-                : C_O1_I2(r, 0, rI));
+        return C_O1_I2(r, 0, rJ);
  
      case INDEX_op_shl_i32:
      case INDEX_op_shr_i32:
      case INDEX_op_sar_i32:
-        return (HAVE_FACILITY(DISTINCT_OPS)
-                ? C_O1_I2(r, r, ri)
-                : C_O1_I2(r, 0, ri));
+        return C_O1_I2(r, r, ri);
  
      case INDEX_op_brcond_i32:
      case INDEX_op_brcond_i64:
@@ -2919,22 +2978,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
      case INDEX_op_div2_i64:
      case INDEX_op_divu2_i32:
      case INDEX_op_divu2_i64:
-        return C_O2_I3(b, a, 0, 1, r);
+        return C_O2_I3(o, m, 0, 1, r);
  
      case INDEX_op_mulu2_i64:
-        return C_O2_I2(b, a, 0, r);
+        return C_O2_I2(o, m, 0, r);
  
      case INDEX_op_add2_i32:
      case INDEX_op_sub2_i32:
-        return (HAVE_FACILITY(EXT_IMM)
-                ? C_O2_I4(r, r, 0, 1, ri, r)
-                : C_O2_I4(r, r, 0, 1, r, r));
+        return C_O2_I4(r, r, 0, 1, ri, r);
  
      case INDEX_op_add2_i64:
      case INDEX_op_sub2_i64:
-        return (HAVE_FACILITY(EXT_IMM)
-                ? C_O2_I4(r, r, 0, 1, rA, r)
-                : C_O2_I4(r, r, 0, 1, r, r));
+        return C_O2_I4(r, r, 0, 1, rA, r);
  
      case INDEX_op_st_vec:
          return C_O0_I2(v, r);
@@ -2943,13 +2998,46 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
          return C_O1_I1(v, r);
      case INDEX_op_dup_vec:
          return C_O1_I1(v, vr);
+    case INDEX_op_abs_vec:
+    case INDEX_op_neg_vec:
+    case INDEX_op_not_vec:
+    case INDEX_op_rotli_vec:
+    case INDEX_op_sari_vec:
+    case INDEX_op_shli_vec:
+    case INDEX_op_shri_vec:
+    case INDEX_op_s390_vuph_vec:
+    case INDEX_op_s390_vupl_vec:
+        return C_O1_I1(v, v);
      case INDEX_op_add_vec:
      case INDEX_op_sub_vec:
      case INDEX_op_and_vec:
+    case INDEX_op_andc_vec:
      case INDEX_op_or_vec:
+    case INDEX_op_orc_vec:
      case INDEX_op_xor_vec:
+    case INDEX_op_nand_vec:
+    case INDEX_op_nor_vec:
+    case INDEX_op_eqv_vec:
      case INDEX_op_cmp_vec:
+    case INDEX_op_mul_vec:
+    case INDEX_op_rotlv_vec:
+    case INDEX_op_rotrv_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+    case INDEX_op_smax_vec:
+    case INDEX_op_smin_vec:
+    case INDEX_op_umax_vec:
+    case INDEX_op_umin_vec:
+    case INDEX_op_s390_vpks_vec:
          return C_O1_I2(v, v, v);
+    case INDEX_op_rotls_vec:
+    case INDEX_op_shls_vec:
+    case INDEX_op_shrs_vec:
+    case INDEX_op_sars_vec:
+        return C_O1_I2(v, v, r);
+    case INDEX_op_bitsel_vec:
+        return C_O1_I3(v, v, v, v);
  
      default:
          g_assert_not_reached();
@@ -2967,6 +3055,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
  static void query_s390_facilities(void)
  {
      unsigned long hwcap = qemu_getauxval(AT_HWCAP);
+    const char *which;
  
      /* Is STORE FACILITY LIST EXTENDED available?  Honestly, I believe this
         is present on all 64-bit systems, but let's check for it anyway.  */
@@ -2988,6 +3077,38 @@ static void query_s390_facilities(void)
      if (!(hwcap & HWCAP_S390_VXRS)) {
          s390_facilities[2] = 0;
      }
+
+    /*
+     * Minimum supported cpu revision is z196.
+     * Check for all required facilities.
+     * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
+     */
+    if (!HAVE_FACILITY(LONG_DISP)) {
+        which = "long-displacement";
+        goto fail;
+    }
+    if (!HAVE_FACILITY(EXT_IMM)) {
+        which = "extended-immediate";
+        goto fail;
+    }
+    if (!HAVE_FACILITY(GEN_INST_EXT)) {
+        which = "general-instructions-extension";
+        goto fail;
+    }
+    /*
+     * Facility 45 is a big bin that contains: distinct-operands,
+     * fast-BCR-serialization, high-word, population-count,
+     * interlocked-access-1, and load/store-on-condition-1
+     */
+    if (!HAVE_FACILITY(45)) {
+        which = "45";
+        goto fail;
+    }
+    return;
+
+ fail:
+    error_report("%s: missing required facility %s", __func__, which);
+    exit(EXIT_FAILURE);
  }
  
  static void tcg_target_init(TCGContext *s)
@@ -3044,9 +3165,6 @@ static void tcg_target_init(TCGContext *s)
      /* XXX many insns can't be used with R0, so we better avoid it for now */
      tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
      tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
-    if (USE_REG_TB) {
-        tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB);
-    }
  }
  
  #define FRAME_SIZE  ((int)(TCG_TARGET_CALL_STACK_OFFSET          \
@@ -3067,16 +3185,12 @@ static void tcg_target_qemu_prologue(TCGContext *s)
  
  #ifndef CONFIG_SOFTMMU
      if (guest_base >= 0x80000) {
-        tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, true);
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
          tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
      }
  #endif
  
      tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-    if (USE_REG_TB) {
-        tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB,
-                    tcg_target_call_iarg_regs[1]);
-    }
  
      /* br %r3 (go to TB) */
      tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);