2 * Tiny Code Generator for QEMU
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27 /* We only support generating code for 64-bit mode. */
28 #if TCG_TARGET_REG_BITS != 64
29 #error "unsupported code generation mode"
32 #include "../tcg-ldst.c.inc"
33 #include "../tcg-pool.c.inc"
36 #define TCG_CT_CONST_S16 0x100
37 #define TCG_CT_CONST_S32 0x200
38 #define TCG_CT_CONST_S33 0x400
39 #define TCG_CT_CONST_ZERO 0x800
41 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
42 #define ALL_VECTOR_REGS MAKE_64BIT_MASK(32, 32)
45 * For softmmu, we need to avoid conflicts with the first 3
46 * argument registers to perform the tlb lookup, and to call
47 * the helper function.
50 #define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
52 #define SOFTMMU_RESERVE_REGS 0
56 /* Several places within the instruction set 0 means "no register"
57 rather than TCG_REG_R0. */
58 #define TCG_REG_NONE 0
60 /* A scratch register that may be be used throughout the backend. */
61 #define TCG_TMP0 TCG_REG_R1
63 #ifndef CONFIG_SOFTMMU
64 #define TCG_GUEST_BASE_REG TCG_REG_R13
67 /* All of the following instructions are prefixed with their instruction
68 format, and are defined as 8- or 16-bit quantities, even when the two
69 halves of the 16-bit quantity may appear 32 bits apart in the insn.
70 This makes it easy to copy the values from the tables in Appendix B. */
71 typedef enum S390Opcode {
268 VRRc_VCEQ = 0xe7f8, /* we leave the m5 cs field 0 */
269 VRRc_VCH = 0xe7fb, /* " */
270 VRRc_VCHL = 0xe7f9, /* " */
271 VRRc_VERLLV = 0xe773,
273 VRRc_VESRAV = 0xe77a,
274 VRRc_VESRLV = 0xe778,
287 VRRc_VPKS = 0xe797, /* we leave the m5 cs field 0 */
312 #ifdef CONFIG_DEBUG_TCG
313 static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
314 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
315 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
316 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
317 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
318 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
319 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
320 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
324 /* Since R6 is a potential argument register, choose it last of the
325 call-saved registers. Likewise prefer the call-clobbered registers
326 in reverse order to maximize the chance of avoiding the arguments. */
327 static const int tcg_target_reg_alloc_order[] = {
328 /* Call saved registers. */
337 /* Call clobbered registers. */
341 /* Argument registers, in reverse order of allocation. */
347 /* V8-V15 are call saved, and omitted. */
374 static const int tcg_target_call_iarg_regs[] = {
382 static const int tcg_target_call_oarg_regs[] = {
390 #define S390_CC_NE (S390_CC_LT | S390_CC_GT)
391 #define S390_CC_LE (S390_CC_LT | S390_CC_EQ)
392 #define S390_CC_GE (S390_CC_GT | S390_CC_EQ)
393 #define S390_CC_NEVER 0
394 #define S390_CC_ALWAYS 15
396 /* Condition codes that result from a COMPARE and COMPARE LOGICAL. */
397 static const uint8_t tcg_cond_to_s390_cond[] = {
398 [TCG_COND_EQ] = S390_CC_EQ,
399 [TCG_COND_NE] = S390_CC_NE,
400 [TCG_COND_LT] = S390_CC_LT,
401 [TCG_COND_LE] = S390_CC_LE,
402 [TCG_COND_GT] = S390_CC_GT,
403 [TCG_COND_GE] = S390_CC_GE,
404 [TCG_COND_LTU] = S390_CC_LT,
405 [TCG_COND_LEU] = S390_CC_LE,
406 [TCG_COND_GTU] = S390_CC_GT,
407 [TCG_COND_GEU] = S390_CC_GE,
410 /* Condition codes that result from a LOAD AND TEST. Here, we have no
411 unsigned instruction variation, however since the test is vs zero we
412 can re-map the outcomes appropriately. */
413 static const uint8_t tcg_cond_to_ltr_cond[] = {
414 [TCG_COND_EQ] = S390_CC_EQ,
415 [TCG_COND_NE] = S390_CC_NE,
416 [TCG_COND_LT] = S390_CC_LT,
417 [TCG_COND_LE] = S390_CC_LE,
418 [TCG_COND_GT] = S390_CC_GT,
419 [TCG_COND_GE] = S390_CC_GE,
420 [TCG_COND_LTU] = S390_CC_NEVER,
421 [TCG_COND_LEU] = S390_CC_EQ,
422 [TCG_COND_GTU] = S390_CC_NE,
423 [TCG_COND_GEU] = S390_CC_ALWAYS,
426 #ifdef CONFIG_SOFTMMU
427 static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
428 [MO_UB] = helper_ret_ldub_mmu,
429 [MO_SB] = helper_ret_ldsb_mmu,
430 [MO_LEUW] = helper_le_lduw_mmu,
431 [MO_LESW] = helper_le_ldsw_mmu,
432 [MO_LEUL] = helper_le_ldul_mmu,
433 [MO_LESL] = helper_le_ldsl_mmu,
434 [MO_LEUQ] = helper_le_ldq_mmu,
435 [MO_BEUW] = helper_be_lduw_mmu,
436 [MO_BESW] = helper_be_ldsw_mmu,
437 [MO_BEUL] = helper_be_ldul_mmu,
438 [MO_BESL] = helper_be_ldsl_mmu,
439 [MO_BEUQ] = helper_be_ldq_mmu,
442 static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
443 [MO_UB] = helper_ret_stb_mmu,
444 [MO_LEUW] = helper_le_stw_mmu,
445 [MO_LEUL] = helper_le_stl_mmu,
446 [MO_LEUQ] = helper_le_stq_mmu,
447 [MO_BEUW] = helper_be_stw_mmu,
448 [MO_BEUL] = helper_be_stl_mmu,
449 [MO_BEUQ] = helper_be_stq_mmu,
453 static const tcg_insn_unit *tb_ret_addr;
454 uint64_t s390_facilities[3];
456 static inline bool is_general_reg(TCGReg r)
458 return r <= TCG_REG_R15;
461 static inline bool is_vector_reg(TCGReg r)
463 return r >= TCG_REG_V0 && r <= TCG_REG_V31;
466 static bool patch_reloc(tcg_insn_unit *src_rw, int type,
467 intptr_t value, intptr_t addend)
469 const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
474 pcrel2 = (tcg_insn_unit *)value - src_rx;
478 if (pcrel2 == (int16_t)pcrel2) {
479 tcg_patch16(src_rw, pcrel2);
484 if (pcrel2 == (int32_t)pcrel2) {
485 tcg_patch32(src_rw, pcrel2);
490 if (value == sextract64(value, 0, 20)) {
491 old = *(uint32_t *)src_rw & 0xf00000ff;
492 old |= ((value & 0xfff) << 16) | ((value & 0xff000) >> 4);
493 tcg_patch32(src_rw, old);
498 g_assert_not_reached();
503 /* Test if a constant matches the constraint. */
504 static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
506 if (ct & TCG_CT_CONST) {
510 if (type == TCG_TYPE_I32) {
514 /* The following are mutually exclusive. */
515 if (ct & TCG_CT_CONST_S16) {
516 return val == (int16_t)val;
517 } else if (ct & TCG_CT_CONST_S32) {
518 return val == (int32_t)val;
519 } else if (ct & TCG_CT_CONST_S33) {
520 return val >= -0xffffffffll && val <= 0xffffffffll;
521 } else if (ct & TCG_CT_CONST_ZERO) {
528 /* Emit instructions according to the given instruction format. */
530 static void tcg_out_insn_RR(TCGContext *s, S390Opcode op, TCGReg r1, TCGReg r2)
532 tcg_out16(s, (op << 8) | (r1 << 4) | r2);
535 static void tcg_out_insn_RRE(TCGContext *s, S390Opcode op,
536 TCGReg r1, TCGReg r2)
538 tcg_out32(s, (op << 16) | (r1 << 4) | r2);
541 static void tcg_out_insn_RRF(TCGContext *s, S390Opcode op,
542 TCGReg r1, TCGReg r2, int m3)
544 tcg_out32(s, (op << 16) | (m3 << 12) | (r1 << 4) | r2);
547 static void tcg_out_insn_RI(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
549 tcg_out32(s, (op << 16) | (r1 << 20) | (i2 & 0xffff));
552 static void tcg_out_insn_RIE(TCGContext *s, S390Opcode op, TCGReg r1,
555 tcg_out16(s, (op & 0xff00) | (r1 << 4) | m3);
556 tcg_out32(s, (i2 << 16) | (op & 0xff));
559 static void tcg_out_insn_RIL(TCGContext *s, S390Opcode op, TCGReg r1, int i2)
561 tcg_out16(s, op | (r1 << 4));
565 static void tcg_out_insn_RS(TCGContext *s, S390Opcode op, TCGReg r1,
566 TCGReg b2, TCGReg r3, int disp)
568 tcg_out32(s, (op << 24) | (r1 << 20) | (r3 << 16) | (b2 << 12)
572 static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
573 TCGReg b2, TCGReg r3, int disp)
575 tcg_out16(s, (op & 0xff00) | (r1 << 4) | r3);
576 tcg_out32(s, (op & 0xff) | (b2 << 28)
577 | ((disp & 0xfff) << 16) | ((disp & 0xff000) >> 4));
580 #define tcg_out_insn_RX tcg_out_insn_RS
581 #define tcg_out_insn_RXY tcg_out_insn_RSY
583 static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
586 * Shift bit 4 of each regno to its corresponding bit of RXB.
587 * RXB itself begins at bit 8 of the instruction so 8 - 4 = 4
588 * is the left-shift of the 4th operand.
590 return ((v1 & 0x10) << (4 + 3))
591 | ((v2 & 0x10) << (4 + 2))
592 | ((v3 & 0x10) << (4 + 1))
593 | ((v4 & 0x10) << (4 + 0));
596 static void tcg_out_insn_VRIa(TCGContext *s, S390Opcode op,
597 TCGReg v1, uint16_t i2, int m3)
599 tcg_debug_assert(is_vector_reg(v1));
600 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
602 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
605 static void tcg_out_insn_VRIb(TCGContext *s, S390Opcode op,
606 TCGReg v1, uint8_t i2, uint8_t i3, int m4)
608 tcg_debug_assert(is_vector_reg(v1));
609 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4));
610 tcg_out16(s, (i2 << 8) | (i3 & 0xff));
611 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
614 static void tcg_out_insn_VRIc(TCGContext *s, S390Opcode op,
615 TCGReg v1, uint16_t i2, TCGReg v3, int m4)
617 tcg_debug_assert(is_vector_reg(v1));
618 tcg_debug_assert(is_vector_reg(v3));
619 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
621 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
624 static void tcg_out_insn_VRRa(TCGContext *s, S390Opcode op,
625 TCGReg v1, TCGReg v2, int m3)
627 tcg_debug_assert(is_vector_reg(v1));
628 tcg_debug_assert(is_vector_reg(v2));
629 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
630 tcg_out32(s, (op & 0x00ff) | RXB(v1, v2, 0, 0) | (m3 << 12));
633 static void tcg_out_insn_VRRc(TCGContext *s, S390Opcode op,
634 TCGReg v1, TCGReg v2, TCGReg v3, int m4)
636 tcg_debug_assert(is_vector_reg(v1));
637 tcg_debug_assert(is_vector_reg(v2));
638 tcg_debug_assert(is_vector_reg(v3));
639 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
640 tcg_out16(s, v3 << 12);
641 tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, 0) | (m4 << 12));
644 static void tcg_out_insn_VRRe(TCGContext *s, S390Opcode op,
645 TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
647 tcg_debug_assert(is_vector_reg(v1));
648 tcg_debug_assert(is_vector_reg(v2));
649 tcg_debug_assert(is_vector_reg(v3));
650 tcg_debug_assert(is_vector_reg(v4));
651 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v2 & 0xf));
652 tcg_out16(s, v3 << 12);
653 tcg_out16(s, (op & 0x00ff) | RXB(v1, v2, v3, v4) | (v4 << 12));
656 static void tcg_out_insn_VRRf(TCGContext *s, S390Opcode op,
657 TCGReg v1, TCGReg r2, TCGReg r3)
659 tcg_debug_assert(is_vector_reg(v1));
660 tcg_debug_assert(is_general_reg(r2));
661 tcg_debug_assert(is_general_reg(r3));
662 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r2);
663 tcg_out16(s, r3 << 12);
664 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0));
667 static void tcg_out_insn_VRSa(TCGContext *s, S390Opcode op, TCGReg v1,
668 intptr_t d2, TCGReg b2, TCGReg v3, int m4)
670 tcg_debug_assert(is_vector_reg(v1));
671 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
672 tcg_debug_assert(is_general_reg(b2));
673 tcg_debug_assert(is_vector_reg(v3));
674 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | (v3 & 0xf));
675 tcg_out16(s, b2 << 12 | d2);
676 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, v3, 0) | (m4 << 12));
679 static void tcg_out_insn_VRSb(TCGContext *s, S390Opcode op, TCGReg v1,
680 intptr_t d2, TCGReg b2, TCGReg r3, int m4)
682 tcg_debug_assert(is_vector_reg(v1));
683 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
684 tcg_debug_assert(is_general_reg(b2));
685 tcg_debug_assert(is_general_reg(r3));
686 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | r3);
687 tcg_out16(s, b2 << 12 | d2);
688 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m4 << 12));
691 static void tcg_out_insn_VRSc(TCGContext *s, S390Opcode op, TCGReg r1,
692 intptr_t d2, TCGReg b2, TCGReg v3, int m4)
694 tcg_debug_assert(is_general_reg(r1));
695 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
696 tcg_debug_assert(is_general_reg(b2));
697 tcg_debug_assert(is_vector_reg(v3));
698 tcg_out16(s, (op & 0xff00) | (r1 << 4) | (v3 & 0xf));
699 tcg_out16(s, b2 << 12 | d2);
700 tcg_out16(s, (op & 0x00ff) | RXB(0, 0, v3, 0) | (m4 << 12));
703 static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
704 TCGReg b2, TCGReg x2, intptr_t d2, int m3)
706 tcg_debug_assert(is_vector_reg(v1));
707 tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
708 tcg_debug_assert(is_general_reg(x2));
709 tcg_debug_assert(is_general_reg(b2));
710 tcg_out16(s, (op & 0xff00) | ((v1 & 0xf) << 4) | x2);
711 tcg_out16(s, (b2 << 12) | d2);
712 tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
715 /* Emit an opcode with "type-checking" of the format. */
716 #define tcg_out_insn(S, FMT, OP, ...) \
717 glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
720 /* emit 64-bit shifts */
721 static void tcg_out_sh64(TCGContext* s, S390Opcode op, TCGReg dest,
722 TCGReg src, TCGReg sh_reg, int sh_imm)
724 tcg_out_insn_RSY(s, op, dest, sh_reg, src, sh_imm);
727 /* emit 32-bit shifts */
728 static void tcg_out_sh32(TCGContext* s, S390Opcode op, TCGReg dest,
729 TCGReg sh_reg, int sh_imm)
731 tcg_out_insn_RS(s, op, dest, sh_reg, 0, sh_imm);
734 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg dst, TCGReg src)
741 if (likely(is_general_reg(dst) && is_general_reg(src))) {
742 tcg_out_insn(s, RR, LR, dst, src);
748 if (likely(is_general_reg(dst))) {
749 if (likely(is_general_reg(src))) {
750 tcg_out_insn(s, RRE, LGR, dst, src);
752 tcg_out_insn(s, VRSc, VLGV, dst, 0, 0, src, 3);
755 } else if (is_general_reg(src)) {
756 tcg_out_insn(s, VRSb, VLVG, dst, 0, 0, src, 3);
763 tcg_out_insn(s, VRRa, VLR, dst, src, 0);
767 g_assert_not_reached();
772 static const S390Opcode lli_insns[4] = {
773 RI_LLILL, RI_LLILH, RI_LLIHL, RI_LLIHH
776 static bool maybe_out_small_movi(TCGContext *s, TCGType type,
777 TCGReg ret, tcg_target_long sval)
779 tcg_target_ulong uval = sval;
782 if (type == TCG_TYPE_I32) {
783 uval = (uint32_t)sval;
784 sval = (int32_t)sval;
787 /* Try all 32-bit insns that can load it in one go. */
788 if (sval >= -0x8000 && sval < 0x8000) {
789 tcg_out_insn(s, RI, LGHI, ret, sval);
793 for (i = 0; i < 4; i++) {
794 tcg_target_long mask = 0xffffull << i * 16;
795 if ((uval & mask) == uval) {
796 tcg_out_insn_RI(s, lli_insns[i], ret, uval >> i * 16);
804 /* load a register with an immediate value */
805 static void tcg_out_movi(TCGContext *s, TCGType type,
806 TCGReg ret, tcg_target_long sval)
808 tcg_target_ulong uval;
810 /* Try all 32-bit insns that can load it in one go. */
811 if (maybe_out_small_movi(s, type, ret, sval)) {
816 if (type == TCG_TYPE_I32) {
817 uval = (uint32_t)sval;
818 sval = (int32_t)sval;
821 /* Try all 48-bit insns that can load it in one go. */
822 if (sval == (int32_t)sval) {
823 tcg_out_insn(s, RIL, LGFI, ret, sval);
826 if (uval <= 0xffffffff) {
827 tcg_out_insn(s, RIL, LLILF, ret, uval);
830 if ((uval & 0xffffffff) == 0) {
831 tcg_out_insn(s, RIL, LLIHF, ret, uval >> 32);
835 /* Try for PC-relative address load. For odd addresses,
836 attempt to use an offset from the start of the TB. */
837 if ((sval & 1) == 0) {
838 ptrdiff_t off = tcg_pcrel_diff(s, (void *)sval) >> 1;
839 if (off == (int32_t)off) {
840 tcg_out_insn(s, RIL, LARL, ret, off);
845 /* Otherwise, stuff it in the constant pool. */
846 tcg_out_insn(s, RIL, LGRL, ret, 0);
847 new_pool_label(s, sval, R_390_PC32DBL, s->code_ptr - 2, 2);
850 /* Emit a load/store type instruction. Inputs are:
851 DATA: The register to be loaded or stored.
852 BASE+OFS: The effective address.
853 OPC_RX: If the operation has an RX format opcode (e.g. STC), otherwise 0.
854 OPC_RXY: The RXY format opcode for the operation (e.g. STCY). */
856 static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, S390Opcode opc_rxy,
857 TCGReg data, TCGReg base, TCGReg index,
860 if (ofs < -0x80000 || ofs >= 0x80000) {
861 /* Combine the low 20 bits of the offset with the actual load insn;
862 the high 44 bits must come from an immediate load. */
863 tcg_target_long low = ((ofs & 0xfffff) ^ 0x80000) - 0x80000;
864 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs - low);
867 /* If we were already given an index register, add it in. */
868 if (index != TCG_REG_NONE) {
869 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
874 if (opc_rx && ofs >= 0 && ofs < 0x1000) {
875 tcg_out_insn_RX(s, opc_rx, data, base, index, ofs);
877 tcg_out_insn_RXY(s, opc_rxy, data, base, index, ofs);
881 static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
882 TCGReg data, TCGReg base, TCGReg index,
883 tcg_target_long ofs, int m3)
885 if (ofs < 0 || ofs >= 0x1000) {
886 if (ofs >= -0x80000 && ofs < 0x80000) {
887 tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
889 index = TCG_REG_NONE;
892 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
893 if (index != TCG_REG_NONE) {
894 tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
900 tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
903 /* load data without address translation or endianness conversion */
904 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
905 TCGReg base, intptr_t ofs)
909 if (likely(is_general_reg(data))) {
910 tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
913 tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
917 if (likely(is_general_reg(data))) {
918 tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
924 tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
928 /* Hint quadword aligned. */
929 tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
933 g_assert_not_reached();
937 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
938 TCGReg base, intptr_t ofs)
942 if (likely(is_general_reg(data))) {
943 tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
945 tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
950 if (likely(is_general_reg(data))) {
951 tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
957 tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
961 /* Hint quadword aligned. */
962 tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
966 g_assert_not_reached();
970 static inline bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
971 TCGReg base, intptr_t ofs)
976 static inline void tcg_out_risbg(TCGContext *s, TCGReg dest, TCGReg src,
977 int msb, int lsb, int ofs, int z)
980 tcg_out16(s, (RIE_RISBG & 0xff00) | (dest << 4) | src);
981 tcg_out16(s, (msb << 8) | (z << 7) | lsb);
982 tcg_out16(s, (ofs << 8) | (RIE_RISBG & 0xff));
985 static void tgen_ext8s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
987 tcg_out_insn(s, RRE, LGBR, dest, src);
990 static void tgen_ext8u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
992 tcg_out_insn(s, RRE, LLGCR, dest, src);
995 static void tgen_ext16s(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
997 tcg_out_insn(s, RRE, LGHR, dest, src);
1000 static void tgen_ext16u(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
1002 tcg_out_insn(s, RRE, LLGHR, dest, src);
1005 static inline void tgen_ext32s(TCGContext *s, TCGReg dest, TCGReg src)
1007 tcg_out_insn(s, RRE, LGFR, dest, src);
1010 static inline void tgen_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
1012 tcg_out_insn(s, RRE, LLGFR, dest, src);
1015 /* Accept bit patterns like these:
1020 Copied from gcc sources. */
1021 static inline bool risbg_mask(uint64_t c)
1024 /* We don't change the number of transitions by inverting,
1025 so make sure we start with the LSB zero. */
1029 /* Reject all zeros or all ones. */
1033 /* Find the first transition. */
1035 /* Invert to look for a second transition. */
1037 /* Erase the first transition. */
1039 /* Find the second transition, if any. */
1041 /* Match if all the bits are 1's, or if c is zero. */
1045 static void tgen_andi_risbg(TCGContext *s, TCGReg out, TCGReg in, uint64_t val)
1048 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
1049 /* Achieve wraparound by swapping msb and lsb. */
1050 msb = 64 - ctz64(~val);
1051 lsb = clz64(~val) - 1;
1054 lsb = 63 - ctz64(val);
1056 tcg_out_risbg(s, out, in, msb, lsb, 0, 1);
1059 static void tgen_andi(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1061 static const S390Opcode ni_insns[4] = {
1062 RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
1064 static const S390Opcode nif_insns[2] = {
1067 uint64_t valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
1070 /* Look for the zero-extensions. */
1071 if ((val & valid) == 0xffffffff) {
1072 tgen_ext32u(s, dest, dest);
1075 if ((val & valid) == 0xff) {
1076 tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
1079 if ((val & valid) == 0xffff) {
1080 tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
1084 /* Try all 32-bit insns that can perform it in one go. */
1085 for (i = 0; i < 4; i++) {
1086 tcg_target_ulong mask = ~(0xffffull << i * 16);
1087 if (((val | ~valid) & mask) == mask) {
1088 tcg_out_insn_RI(s, ni_insns[i], dest, val >> i * 16);
1093 /* Try all 48-bit insns that can perform it in one go. */
1094 for (i = 0; i < 2; i++) {
1095 tcg_target_ulong mask = ~(0xffffffffull << i * 32);
1096 if (((val | ~valid) & mask) == mask) {
1097 tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i * 32);
1101 if (risbg_mask(val)) {
1102 tgen_andi_risbg(s, dest, dest, val);
1106 tcg_out_movi(s, type, TCG_TMP0, val);
1107 if (type == TCG_TYPE_I32) {
1108 tcg_out_insn(s, RR, NR, dest, TCG_TMP0);
1110 tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
1114 static void tgen_ori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1116 static const S390Opcode oi_insns[4] = {
1117 RI_OILL, RI_OILH, RI_OIHL, RI_OIHH
1119 static const S390Opcode oif_insns[2] = {
1125 /* Look for no-op. */
1126 if (unlikely(val == 0)) {
1130 /* Try all 32-bit insns that can perform it in one go. */
1131 for (i = 0; i < 4; i++) {
1132 tcg_target_ulong mask = (0xffffull << i * 16);
1133 if ((val & mask) != 0 && (val & ~mask) == 0) {
1134 tcg_out_insn_RI(s, oi_insns[i], dest, val >> i * 16);
1139 /* Try all 48-bit insns that can perform it in one go. */
1140 for (i = 0; i < 2; i++) {
1141 tcg_target_ulong mask = (0xffffffffull << i * 32);
1142 if ((val & mask) != 0 && (val & ~mask) == 0) {
1143 tcg_out_insn_RIL(s, oif_insns[i], dest, val >> i * 32);
1148 if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1149 if (type == TCG_TYPE_I32) {
1150 tcg_out_insn(s, RR, OR, dest, TCG_TMP0);
1152 tcg_out_insn(s, RRE, OGR, dest, TCG_TMP0);
1155 /* Perform the OR via sequential modifications to the high and
1156 low parts. Do this via recursion to handle 16-bit vs 32-bit
1157 masks in each half. */
1158 tgen_ori(s, type, dest, val & 0x00000000ffffffffull);
1159 tgen_ori(s, type, dest, val & 0xffffffff00000000ull);
1163 static void tgen_xori(TCGContext *s, TCGType type, TCGReg dest, uint64_t val)
1165 /* Try all 48-bit insns that can perform it in one go. */
1166 if ((val & 0xffffffff00000000ull) == 0) {
1167 tcg_out_insn(s, RIL, XILF, dest, val);
1170 if ((val & 0x00000000ffffffffull) == 0) {
1171 tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1175 if (maybe_out_small_movi(s, type, TCG_TMP0, val)) {
1176 if (type == TCG_TYPE_I32) {
1177 tcg_out_insn(s, RR, XR, dest, TCG_TMP0);
1179 tcg_out_insn(s, RRE, XGR, dest, TCG_TMP0);
1182 /* Perform the xor by parts. */
1183 if (val & 0xffffffff) {
1184 tcg_out_insn(s, RIL, XILF, dest, val);
1186 if (val > 0xffffffff) {
1187 tcg_out_insn(s, RIL, XIHF, dest, val >> 32);
1192 static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
1193 TCGArg c2, bool c2const, bool need_carry)
1195 bool is_unsigned = is_unsigned_cond(c);
1200 if (!(is_unsigned && need_carry)) {
1201 if (type == TCG_TYPE_I32) {
1202 tcg_out_insn(s, RR, LTR, r1, r1);
1204 tcg_out_insn(s, RRE, LTGR, r1, r1);
1206 return tcg_cond_to_ltr_cond[c];
1210 if (!is_unsigned && c2 == (int16_t)c2) {
1211 op = (type == TCG_TYPE_I32 ? RI_CHI : RI_CGHI);
1212 tcg_out_insn_RI(s, op, r1, c2);
1216 if (type == TCG_TYPE_I32) {
1217 op = (is_unsigned ? RIL_CLFI : RIL_CFI);
1218 tcg_out_insn_RIL(s, op, r1, c2);
1221 if (c2 == (is_unsigned ? (TCGArg)(uint32_t)c2 : (TCGArg)(int32_t)c2)) {
1222 op = (is_unsigned ? RIL_CLGFI : RIL_CGFI);
1223 tcg_out_insn_RIL(s, op, r1, c2);
1227 /* Use the constant pool, but not for small constants. */
1228 if (maybe_out_small_movi(s, type, TCG_TMP0, c2)) {
1230 /* fall through to reg-reg */
1232 op = (is_unsigned ? RIL_CLGRL : RIL_CGRL);
1233 tcg_out_insn_RIL(s, op, r1, 0);
1234 new_pool_label(s, c2, R_390_PC32DBL, s->code_ptr - 2, 2);
1239 if (type == TCG_TYPE_I32) {
1240 op = (is_unsigned ? RR_CLR : RR_CR);
1241 tcg_out_insn_RR(s, op, r1, c2);
1243 op = (is_unsigned ? RRE_CLGR : RRE_CGR);
1244 tcg_out_insn_RRE(s, op, r1, c2);
1248 return tcg_cond_to_s390_cond[c];
1251 static void tgen_setcond(TCGContext *s, TCGType type, TCGCond cond,
1252 TCGReg dest, TCGReg c1, TCGArg c2, int c2const)
1256 /* With LOC2, we can always emit the minimum 3 insns. */
1257 if (HAVE_FACILITY(LOAD_ON_COND2)) {
1258 /* Emit: d = 0, d = (cc ? 1 : d). */
1259 cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1260 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1261 tcg_out_insn(s, RIE, LOCGHI, dest, 1, cc);
1268 /* X != 0 is X > 0. */
1269 if (c2const && c2 == 0) {
1270 cond = TCG_COND_GTU;
1278 /* The result of a compare has CC=2 for GT and CC=3 unused.
1279 ADD LOGICAL WITH CARRY considers (CC & 2) the carry bit. */
1280 tgen_cmp(s, type, cond, c1, c2, c2const, true);
1281 tcg_out_movi(s, type, dest, 0);
1282 tcg_out_insn(s, RRE, ALCGR, dest, dest);
1286 /* X == 0 is X <= 0. */
1287 if (c2const && c2 == 0) {
1288 cond = TCG_COND_LEU;
1296 /* As above, but we're looking for borrow, or !carry.
1297 The second insn computes d - d - borrow, or -1 for true
1298 and 0 for false. So we must mask to 1 bit afterward. */
1299 tgen_cmp(s, type, cond, c1, c2, c2const, true);
1300 tcg_out_insn(s, RRE, SLBGR, dest, dest);
1301 tgen_andi(s, type, dest, 1);
1308 /* Swap operands so that we can use LEU/GTU/GT/LE. */
1313 cond = tcg_swap_cond(cond);
1319 g_assert_not_reached();
1322 cc = tgen_cmp(s, type, cond, c1, c2, c2const, false);
1323 /* Emit: d = 0, t = 1, d = (cc ? t : d). */
1324 tcg_out_movi(s, TCG_TYPE_I64, dest, 0);
1325 tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, 1);
1326 tcg_out_insn(s, RRF, LOCGR, dest, TCG_TMP0, cc);
1329 static void tgen_movcond(TCGContext *s, TCGType type, TCGCond c, TCGReg dest,
1330 TCGReg c1, TCGArg c2, int c2const,
1331 TCGArg v3, int v3const)
1333 int cc = tgen_cmp(s, type, c, c1, c2, c2const, false);
1335 tcg_out_insn(s, RIE, LOCGHI, dest, v3, cc);
1337 tcg_out_insn(s, RRF, LOCGR, dest, v3, cc);
1341 static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
1342 TCGArg a2, int a2const)
1344 /* Since this sets both R and R+1, we have no choice but to store the
1345 result into R0, allowing R1 == TCG_TMP0 to be clobbered as well. */
1346 QEMU_BUILD_BUG_ON(TCG_TMP0 != TCG_REG_R1);
1347 tcg_out_insn(s, RRE, FLOGR, TCG_REG_R0, a1);
1349 if (a2const && a2 == 64) {
1350 tcg_out_mov(s, TCG_TYPE_I64, dest, TCG_REG_R0);
1353 tcg_out_movi(s, TCG_TYPE_I64, dest, a2);
1355 tcg_out_mov(s, TCG_TYPE_I64, dest, a2);
1357 /* Emit: if (one bit found) dest = r0. */
1358 tcg_out_insn(s, RRF, LOCGR, dest, TCG_REG_R0, 2);
1362 static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
1363 int ofs, int len, int z)
1365 int lsb = (63 - ofs);
1366 int msb = lsb - (len - 1);
1367 tcg_out_risbg(s, dest, src, msb, lsb, ofs, z);
1370 static void tgen_extract(TCGContext *s, TCGReg dest, TCGReg src,
1373 tcg_out_risbg(s, dest, src, 64 - len, 63, 64 - ofs, 1);
1376 static void tgen_gotoi(TCGContext *s, int cc, const tcg_insn_unit *dest)
1378 ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1379 if (off == (int16_t)off) {
1380 tcg_out_insn(s, RI, BRC, cc, off);
1381 } else if (off == (int32_t)off) {
1382 tcg_out_insn(s, RIL, BRCL, cc, off);
1384 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1385 tcg_out_insn(s, RR, BCR, cc, TCG_TMP0);
1389 static void tgen_branch(TCGContext *s, int cc, TCGLabel *l)
1392 tgen_gotoi(s, cc, l->u.value_ptr);
1394 tcg_out16(s, RI_BRC | (cc << 4));
1395 tcg_out_reloc(s, s->code_ptr, R_390_PC16DBL, l, 2);
1400 static void tgen_compare_branch(TCGContext *s, S390Opcode opc, int cc,
1401 TCGReg r1, TCGReg r2, TCGLabel *l)
1403 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1404 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | r2);
1406 tcg_out16(s, cc << 12 | (opc & 0xff));
1409 static void tgen_compare_imm_branch(TCGContext *s, S390Opcode opc, int cc,
1410 TCGReg r1, int i2, TCGLabel *l)
1412 tcg_out_reloc(s, s->code_ptr + 1, R_390_PC16DBL, l, 2);
1413 tcg_out16(s, (opc & 0xff00) | (r1 << 4) | cc);
1415 tcg_out16(s, (i2 << 8) | (opc & 0xff));
1418 static void tgen_brcond(TCGContext *s, TCGType type, TCGCond c,
1419 TCGReg r1, TCGArg c2, int c2const, TCGLabel *l)
1422 bool is_unsigned = is_unsigned_cond(c);
1426 cc = tcg_cond_to_s390_cond[c];
1429 opc = (type == TCG_TYPE_I32
1430 ? (is_unsigned ? RIE_CLRJ : RIE_CRJ)
1431 : (is_unsigned ? RIE_CLGRJ : RIE_CGRJ));
1432 tgen_compare_branch(s, opc, cc, r1, c2, l);
1437 * COMPARE IMMEDIATE AND BRANCH RELATIVE has an 8-bit immediate field.
1438 * If the immediate we've been given does not fit that range, we'll
1439 * fall back to separate compare and branch instructions using the
1440 * larger comparison range afforded by COMPARE IMMEDIATE.
1442 if (type == TCG_TYPE_I32) {
1445 in_range = (uint32_t)c2 == (uint8_t)c2;
1448 in_range = (int32_t)c2 == (int8_t)c2;
1453 in_range = (uint64_t)c2 == (uint8_t)c2;
1456 in_range = (int64_t)c2 == (int8_t)c2;
1460 tgen_compare_imm_branch(s, opc, cc, r1, c2, l);
1464 cc = tgen_cmp(s, type, c, r1, c2, c2const, false);
1465 tgen_branch(s, cc, l);
1468 static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *dest)
1470 ptrdiff_t off = tcg_pcrel_diff(s, dest) >> 1;
1471 if (off == (int32_t)off) {
1472 tcg_out_insn(s, RIL, BRASL, TCG_REG_R14, off);
1474 tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)dest);
1475 tcg_out_insn(s, RR, BASR, TCG_REG_R14, TCG_TMP0);
1479 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *dest,
1480 const TCGHelperInfo *info)
1482 tcg_out_call_int(s, dest);
1485 static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg data,
1486 TCGReg base, TCGReg index, int disp)
1488 switch (opc & (MO_SSIZE | MO_BSWAP)) {
1490 tcg_out_insn(s, RXY, LLGC, data, base, index, disp);
1493 tcg_out_insn(s, RXY, LGB, data, base, index, disp);
1496 case MO_UW | MO_BSWAP:
1497 /* swapped unsigned halfword load with upper bits zeroed */
1498 tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1499 tgen_ext16u(s, TCG_TYPE_I64, data, data);
1502 tcg_out_insn(s, RXY, LLGH, data, base, index, disp);
1505 case MO_SW | MO_BSWAP:
1506 /* swapped sign-extended halfword load */
1507 tcg_out_insn(s, RXY, LRVH, data, base, index, disp);
1508 tgen_ext16s(s, TCG_TYPE_I64, data, data);
1511 tcg_out_insn(s, RXY, LGH, data, base, index, disp);
1514 case MO_UL | MO_BSWAP:
1515 /* swapped unsigned int load with upper bits zeroed */
1516 tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1517 tgen_ext32u(s, data, data);
1520 tcg_out_insn(s, RXY, LLGF, data, base, index, disp);
1523 case MO_SL | MO_BSWAP:
1524 /* swapped sign-extended int load */
1525 tcg_out_insn(s, RXY, LRV, data, base, index, disp);
1526 tgen_ext32s(s, data, data);
1529 tcg_out_insn(s, RXY, LGF, data, base, index, disp);
1532 case MO_UQ | MO_BSWAP:
1533 tcg_out_insn(s, RXY, LRVG, data, base, index, disp);
1536 tcg_out_insn(s, RXY, LG, data, base, index, disp);
1544 static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg data,
1545 TCGReg base, TCGReg index, int disp)
1547 switch (opc & (MO_SIZE | MO_BSWAP)) {
1549 if (disp >= 0 && disp < 0x1000) {
1550 tcg_out_insn(s, RX, STC, data, base, index, disp);
1552 tcg_out_insn(s, RXY, STCY, data, base, index, disp);
1556 case MO_UW | MO_BSWAP:
1557 tcg_out_insn(s, RXY, STRVH, data, base, index, disp);
1560 if (disp >= 0 && disp < 0x1000) {
1561 tcg_out_insn(s, RX, STH, data, base, index, disp);
1563 tcg_out_insn(s, RXY, STHY, data, base, index, disp);
1567 case MO_UL | MO_BSWAP:
1568 tcg_out_insn(s, RXY, STRV, data, base, index, disp);
1571 if (disp >= 0 && disp < 0x1000) {
1572 tcg_out_insn(s, RX, ST, data, base, index, disp);
1574 tcg_out_insn(s, RXY, STY, data, base, index, disp);
1578 case MO_UQ | MO_BSWAP:
1579 tcg_out_insn(s, RXY, STRVG, data, base, index, disp);
1582 tcg_out_insn(s, RXY, STG, data, base, index, disp);
1590 #if defined(CONFIG_SOFTMMU)
1591 /* We're expecting to use a 20-bit negative offset on the tlb memory ops. */
1592 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
1593 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
1595 /* Load and compare a TLB entry, leaving the flags set. Loads the TLB
1596 addend into R2. Returns a register with the santitized guest address. */
1597 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
1598 int mem_index, bool is_ld)
1600 unsigned s_bits = opc & MO_SIZE;
1601 unsigned a_bits = get_alignment_bits(opc);
1602 unsigned s_mask = (1 << s_bits) - 1;
1603 unsigned a_mask = (1 << a_bits) - 1;
1604 int fast_off = TLB_MASK_TABLE_OFS(mem_index);
1605 int mask_off = fast_off + offsetof(CPUTLBDescFast, mask);
1606 int table_off = fast_off + offsetof(CPUTLBDescFast, table);
1610 tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
1611 TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
1612 tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
1613 tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
1615 /* For aligned accesses, we check the first byte and include the alignment
1616 bits within the address. For unaligned access, we check that we don't
1617 cross pages using the address of the last byte of the access. */
1618 a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
1619 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
1621 tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
1623 tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
1624 tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
1628 ofs = offsetof(CPUTLBEntry, addr_read);
1630 ofs = offsetof(CPUTLBEntry, addr_write);
1632 if (TARGET_LONG_BITS == 32) {
1633 tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1635 tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
1638 tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
1639 offsetof(CPUTLBEntry, addend));
1641 if (TARGET_LONG_BITS == 32) {
1642 tgen_ext32u(s, TCG_REG_R3, addr_reg);
1648 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
1649 TCGReg data, TCGReg addr,
1650 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
1652 TCGLabelQemuLdst *label = new_ldst_label(s);
1654 label->is_ld = is_ld;
1656 label->datalo_reg = data;
1657 label->addrlo_reg = addr;
1658 label->raddr = tcg_splitwx_to_rx(raddr);
1659 label->label_ptr[0] = label_ptr;
1662 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1664 TCGReg addr_reg = lb->addrlo_reg;
1665 TCGReg data_reg = lb->datalo_reg;
1666 MemOpIdx oi = lb->oi;
1667 MemOp opc = get_memop(oi);
1669 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1670 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1674 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1675 if (TARGET_LONG_BITS == 64) {
1676 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1678 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
1679 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
1680 tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
1681 tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
1683 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1687 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
1689 TCGReg addr_reg = lb->addrlo_reg;
1690 TCGReg data_reg = lb->datalo_reg;
1691 MemOpIdx oi = lb->oi;
1692 MemOp opc = get_memop(oi);
1694 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
1695 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1699 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1700 if (TARGET_LONG_BITS == 64) {
1701 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
1703 switch (opc & MO_SIZE) {
1705 tgen_ext8u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1708 tgen_ext16u(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1711 tgen_ext32u(s, TCG_REG_R4, data_reg);
1714 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, data_reg);
1719 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
1720 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
1721 tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
1723 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
1727 static void tcg_out_test_alignment(TCGContext *s, bool is_ld,
1728 TCGReg addrlo, unsigned a_bits)
1730 unsigned a_mask = (1 << a_bits) - 1;
1731 TCGLabelQemuLdst *l = new_ldst_label(s);
1734 l->addrlo_reg = addrlo;
1736 /* We are expecting a_bits to max out at 7, much lower than TMLL. */
1737 tcg_debug_assert(a_bits < 16);
1738 tcg_out_insn(s, RI, TMLL, addrlo, a_mask);
1740 tcg_out16(s, RI_BRC | (7 << 4)); /* CC in {1,2,3} */
1741 l->label_ptr[0] = s->code_ptr;
1744 l->raddr = tcg_splitwx_to_rx(s->code_ptr);
1747 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
1749 if (!patch_reloc(l->label_ptr[0], R_390_PC16DBL,
1750 (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
1754 tcg_out_mov(s, TCG_TYPE_TL, TCG_REG_R3, l->addrlo_reg);
1755 tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
1757 /* "Tail call" to the helper, with the return address back inline. */
1758 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R14, (uintptr_t)l->raddr);
1759 tgen_gotoi(s, S390_CC_ALWAYS, (const void *)(l->is_ld ? helper_unaligned_ld
1760 : helper_unaligned_st));
1764 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1766 return tcg_out_fail_alignment(s, l);
1769 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
1771 return tcg_out_fail_alignment(s, l);
1774 static void tcg_prepare_user_ldst(TCGContext *s, TCGReg *addr_reg,
1775 TCGReg *index_reg, tcg_target_long *disp)
1777 if (TARGET_LONG_BITS == 32) {
1778 tgen_ext32u(s, TCG_TMP0, *addr_reg);
1779 *addr_reg = TCG_TMP0;
1781 if (guest_base < 0x80000) {
1782 *index_reg = TCG_REG_NONE;
1785 *index_reg = TCG_GUEST_BASE_REG;
1789 #endif /* CONFIG_SOFTMMU */
1791 static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1794 MemOp opc = get_memop(oi);
1795 #ifdef CONFIG_SOFTMMU
1796 unsigned mem_index = get_mmuidx(oi);
1797 tcg_insn_unit *label_ptr;
1800 base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
1802 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1803 label_ptr = s->code_ptr;
1806 tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1808 add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1811 tcg_target_long disp;
1812 unsigned a_bits = get_alignment_bits(opc);
1815 tcg_out_test_alignment(s, true, addr_reg, a_bits);
1817 tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1818 tcg_out_qemu_ld_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1822 static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
1825 MemOp opc = get_memop(oi);
1826 #ifdef CONFIG_SOFTMMU
1827 unsigned mem_index = get_mmuidx(oi);
1828 tcg_insn_unit *label_ptr;
1831 base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
1833 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
1834 label_ptr = s->code_ptr;
1837 tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
1839 add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
1842 tcg_target_long disp;
1843 unsigned a_bits = get_alignment_bits(opc);
1846 tcg_out_test_alignment(s, false, addr_reg, a_bits);
1848 tcg_prepare_user_ldst(s, &addr_reg, &index_reg, &disp);
1849 tcg_out_qemu_st_direct(s, opc, data_reg, addr_reg, index_reg, disp);
1853 # define OP_32_64(x) \
1854 case glue(glue(INDEX_op_,x),_i32): \
1855 case glue(glue(INDEX_op_,x),_i64)
1857 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
1858 const TCGArg args[TCG_MAX_OP_ARGS],
1859 const int const_args[TCG_MAX_OP_ARGS])
1865 case INDEX_op_exit_tb:
1866 /* Reuse the zeroing that exists for goto_ptr. */
1869 tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue);
1871 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0);
1872 tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr);
1876 case INDEX_op_goto_tb:
1879 * branch displacement must be aligned for atomic patching;
1880 * see if we need to add extra nop before branch
1882 if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) {
1885 tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4));
1886 s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
1888 set_jmp_reset_offset(s, a0);
1891 case INDEX_op_goto_ptr:
1893 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0);
1897 /* ??? LLC (RXY format) is only present with the extended-immediate
1898 facility, whereas LLGC is always present. */
1899 tcg_out_mem(s, 0, RXY_LLGC, args[0], args[1], TCG_REG_NONE, args[2]);
1903 /* ??? LB is no smaller than LGB, so no point to using it. */
1904 tcg_out_mem(s, 0, RXY_LGB, args[0], args[1], TCG_REG_NONE, args[2]);
1908 /* ??? LLH (RXY format) is only present with the extended-immediate
1909 facility, whereas LLGH is always present. */
1910 tcg_out_mem(s, 0, RXY_LLGH, args[0], args[1], TCG_REG_NONE, args[2]);
1913 case INDEX_op_ld16s_i32:
1914 tcg_out_mem(s, RX_LH, RXY_LHY, args[0], args[1], TCG_REG_NONE, args[2]);
1917 case INDEX_op_ld_i32:
1918 tcg_out_ld(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1922 tcg_out_mem(s, RX_STC, RXY_STCY, args[0], args[1],
1923 TCG_REG_NONE, args[2]);
1927 tcg_out_mem(s, RX_STH, RXY_STHY, args[0], args[1],
1928 TCG_REG_NONE, args[2]);
1931 case INDEX_op_st_i32:
1932 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
1935 case INDEX_op_add_i32:
1936 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
1937 if (const_args[2]) {
1940 if (a2 == (int16_t)a2) {
1941 tcg_out_insn(s, RI, AHI, a0, a2);
1944 tcg_out_insn(s, RIL, AFI, a0, a2);
1947 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
1948 } else if (a0 == a1) {
1949 tcg_out_insn(s, RR, AR, a0, a2);
1951 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
1954 case INDEX_op_sub_i32:
1955 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
1956 if (const_args[2]) {
1959 } else if (a0 == a1) {
1960 tcg_out_insn(s, RR, SR, a0, a2);
1962 tcg_out_insn(s, RRF, SRK, a0, a1, a2);
1966 case INDEX_op_and_i32:
1967 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
1968 if (const_args[2]) {
1969 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1970 tgen_andi(s, TCG_TYPE_I32, a0, a2);
1971 } else if (a0 == a1) {
1972 tcg_out_insn(s, RR, NR, a0, a2);
1974 tcg_out_insn(s, RRF, NRK, a0, a1, a2);
1977 case INDEX_op_or_i32:
1978 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
1979 if (const_args[2]) {
1980 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1981 tgen_ori(s, TCG_TYPE_I32, a0, a2);
1982 } else if (a0 == a1) {
1983 tcg_out_insn(s, RR, OR, a0, a2);
1985 tcg_out_insn(s, RRF, ORK, a0, a1, a2);
1988 case INDEX_op_xor_i32:
1989 a0 = args[0], a1 = args[1], a2 = (uint32_t)args[2];
1990 if (const_args[2]) {
1991 tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
1992 tgen_xori(s, TCG_TYPE_I32, a0, a2);
1993 } else if (a0 == a1) {
1994 tcg_out_insn(s, RR, XR, args[0], args[2]);
1996 tcg_out_insn(s, RRF, XRK, a0, a1, a2);
2000 case INDEX_op_neg_i32:
2001 tcg_out_insn(s, RR, LCR, args[0], args[1]);
2004 case INDEX_op_mul_i32:
2005 if (const_args[2]) {
2006 if ((int32_t)args[2] == (int16_t)args[2]) {
2007 tcg_out_insn(s, RI, MHI, args[0], args[2]);
2009 tcg_out_insn(s, RIL, MSFI, args[0], args[2]);
2012 tcg_out_insn(s, RRE, MSR, args[0], args[2]);
2016 case INDEX_op_div2_i32:
2017 tcg_debug_assert(args[0] == args[2]);
2018 tcg_debug_assert(args[1] == args[3]);
2019 tcg_debug_assert((args[1] & 1) == 0);
2020 tcg_debug_assert(args[0] == args[1] + 1);
2021 tcg_out_insn(s, RR, DR, args[1], args[4]);
2023 case INDEX_op_divu2_i32:
2024 tcg_debug_assert(args[0] == args[2]);
2025 tcg_debug_assert(args[1] == args[3]);
2026 tcg_debug_assert((args[1] & 1) == 0);
2027 tcg_debug_assert(args[0] == args[1] + 1);
2028 tcg_out_insn(s, RRE, DLR, args[1], args[4]);
2031 case INDEX_op_shl_i32:
2035 a0 = args[0], a1 = args[1], a2 = (int32_t)args[2];
2037 if (const_args[2]) {
2038 tcg_out_sh32(s, op, a0, TCG_REG_NONE, a2);
2040 tcg_out_sh32(s, op, a0, a2, 0);
2043 /* Using tcg_out_sh64 here for the format; it is a 32-bit shift. */
2044 if (const_args[2]) {
2045 tcg_out_sh64(s, op2, a0, a1, TCG_REG_NONE, a2);
2047 tcg_out_sh64(s, op2, a0, a1, a2, 0);
2051 case INDEX_op_shr_i32:
2055 case INDEX_op_sar_i32:
2060 case INDEX_op_rotl_i32:
2061 /* ??? Using tcg_out_sh64 here for the format; it is a 32-bit rol. */
2062 if (const_args[2]) {
2063 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_REG_NONE, args[2]);
2065 tcg_out_sh64(s, RSY_RLL, args[0], args[1], args[2], 0);
2068 case INDEX_op_rotr_i32:
2069 if (const_args[2]) {
2070 tcg_out_sh64(s, RSY_RLL, args[0], args[1],
2071 TCG_REG_NONE, (32 - args[2]) & 31);
2073 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2074 tcg_out_sh64(s, RSY_RLL, args[0], args[1], TCG_TMP0, 0);
2078 case INDEX_op_ext8s_i32:
2079 tgen_ext8s(s, TCG_TYPE_I32, args[0], args[1]);
2081 case INDEX_op_ext16s_i32:
2082 tgen_ext16s(s, TCG_TYPE_I32, args[0], args[1]);
2084 case INDEX_op_ext8u_i32:
2085 tgen_ext8u(s, TCG_TYPE_I32, args[0], args[1]);
2087 case INDEX_op_ext16u_i32:
2088 tgen_ext16u(s, TCG_TYPE_I32, args[0], args[1]);
2091 case INDEX_op_bswap16_i32:
2092 a0 = args[0], a1 = args[1], a2 = args[2];
2093 tcg_out_insn(s, RRE, LRVR, a0, a1);
2094 if (a2 & TCG_BSWAP_OS) {
2095 tcg_out_sh32(s, RS_SRA, a0, TCG_REG_NONE, 16);
2097 tcg_out_sh32(s, RS_SRL, a0, TCG_REG_NONE, 16);
2100 case INDEX_op_bswap16_i64:
2101 a0 = args[0], a1 = args[1], a2 = args[2];
2102 tcg_out_insn(s, RRE, LRVGR, a0, a1);
2103 if (a2 & TCG_BSWAP_OS) {
2104 tcg_out_sh64(s, RSY_SRAG, a0, a0, TCG_REG_NONE, 48);
2106 tcg_out_sh64(s, RSY_SRLG, a0, a0, TCG_REG_NONE, 48);
2110 case INDEX_op_bswap32_i32:
2111 tcg_out_insn(s, RRE, LRVR, args[0], args[1]);
2113 case INDEX_op_bswap32_i64:
2114 a0 = args[0], a1 = args[1], a2 = args[2];
2115 tcg_out_insn(s, RRE, LRVR, a0, a1);
2116 if (a2 & TCG_BSWAP_OS) {
2117 tgen_ext32s(s, a0, a0);
2118 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
2119 tgen_ext32u(s, a0, a0);
2123 case INDEX_op_add2_i32:
2124 if (const_args[4]) {
2125 tcg_out_insn(s, RIL, ALFI, args[0], args[4]);
2127 tcg_out_insn(s, RR, ALR, args[0], args[4]);
2129 tcg_out_insn(s, RRE, ALCR, args[1], args[5]);
2131 case INDEX_op_sub2_i32:
2132 if (const_args[4]) {
2133 tcg_out_insn(s, RIL, SLFI, args[0], args[4]);
2135 tcg_out_insn(s, RR, SLR, args[0], args[4]);
2137 tcg_out_insn(s, RRE, SLBR, args[1], args[5]);
2141 tgen_branch(s, S390_CC_ALWAYS, arg_label(args[0]));
2144 case INDEX_op_brcond_i32:
2145 tgen_brcond(s, TCG_TYPE_I32, args[2], args[0],
2146 args[1], const_args[1], arg_label(args[3]));
2148 case INDEX_op_setcond_i32:
2149 tgen_setcond(s, TCG_TYPE_I32, args[3], args[0], args[1],
2150 args[2], const_args[2]);
2152 case INDEX_op_movcond_i32:
2153 tgen_movcond(s, TCG_TYPE_I32, args[5], args[0], args[1],
2154 args[2], const_args[2], args[3], const_args[3]);
2157 case INDEX_op_qemu_ld_i32:
2158 /* ??? Technically we can use a non-extending instruction. */
2159 case INDEX_op_qemu_ld_i64:
2160 tcg_out_qemu_ld(s, args[0], args[1], args[2]);
2162 case INDEX_op_qemu_st_i32:
2163 case INDEX_op_qemu_st_i64:
2164 tcg_out_qemu_st(s, args[0], args[1], args[2]);
2167 case INDEX_op_ld16s_i64:
2168 tcg_out_mem(s, 0, RXY_LGH, args[0], args[1], TCG_REG_NONE, args[2]);
2170 case INDEX_op_ld32u_i64:
2171 tcg_out_mem(s, 0, RXY_LLGF, args[0], args[1], TCG_REG_NONE, args[2]);
2173 case INDEX_op_ld32s_i64:
2174 tcg_out_mem(s, 0, RXY_LGF, args[0], args[1], TCG_REG_NONE, args[2]);
2176 case INDEX_op_ld_i64:
2177 tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2180 case INDEX_op_st32_i64:
2181 tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
2183 case INDEX_op_st_i64:
2184 tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
2187 case INDEX_op_add_i64:
2188 a0 = args[0], a1 = args[1], a2 = args[2];
2189 if (const_args[2]) {
2192 if (a2 == (int16_t)a2) {
2193 tcg_out_insn(s, RI, AGHI, a0, a2);
2196 if (a2 == (int32_t)a2) {
2197 tcg_out_insn(s, RIL, AGFI, a0, a2);
2200 if (a2 == (uint32_t)a2) {
2201 tcg_out_insn(s, RIL, ALGFI, a0, a2);
2204 if (-a2 == (uint32_t)-a2) {
2205 tcg_out_insn(s, RIL, SLGFI, a0, -a2);
2209 tcg_out_mem(s, RX_LA, RXY_LAY, a0, a1, TCG_REG_NONE, a2);
2210 } else if (a0 == a1) {
2211 tcg_out_insn(s, RRE, AGR, a0, a2);
2213 tcg_out_insn(s, RX, LA, a0, a1, a2, 0);
2216 case INDEX_op_sub_i64:
2217 a0 = args[0], a1 = args[1], a2 = args[2];
2218 if (const_args[2]) {
2222 tcg_out_insn(s, RRF, SGRK, a0, a1, a2);
2226 case INDEX_op_and_i64:
2227 a0 = args[0], a1 = args[1], a2 = args[2];
2228 if (const_args[2]) {
2229 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2230 tgen_andi(s, TCG_TYPE_I64, args[0], args[2]);
2232 tcg_out_insn(s, RRF, NGRK, a0, a1, a2);
2235 case INDEX_op_or_i64:
2236 a0 = args[0], a1 = args[1], a2 = args[2];
2237 if (const_args[2]) {
2238 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2239 tgen_ori(s, TCG_TYPE_I64, a0, a2);
2241 tcg_out_insn(s, RRF, OGRK, a0, a1, a2);
2244 case INDEX_op_xor_i64:
2245 a0 = args[0], a1 = args[1], a2 = args[2];
2246 if (const_args[2]) {
2247 tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
2248 tgen_xori(s, TCG_TYPE_I64, a0, a2);
2250 tcg_out_insn(s, RRF, XGRK, a0, a1, a2);
2254 case INDEX_op_neg_i64:
2255 tcg_out_insn(s, RRE, LCGR, args[0], args[1]);
2257 case INDEX_op_bswap64_i64:
2258 tcg_out_insn(s, RRE, LRVGR, args[0], args[1]);
2261 case INDEX_op_mul_i64:
2262 if (const_args[2]) {
2263 if (args[2] == (int16_t)args[2]) {
2264 tcg_out_insn(s, RI, MGHI, args[0], args[2]);
2266 tcg_out_insn(s, RIL, MSGFI, args[0], args[2]);
2269 tcg_out_insn(s, RRE, MSGR, args[0], args[2]);
2273 case INDEX_op_div2_i64:
2275 * ??? We get an unnecessary sign-extension of the dividend
2276 * into op0 with this definition, but as we do in fact always
2277 * produce both quotient and remainder using INDEX_op_div_i64
2278 * instead requires jumping through even more hoops.
2280 tcg_debug_assert(args[0] == args[2]);
2281 tcg_debug_assert(args[1] == args[3]);
2282 tcg_debug_assert((args[1] & 1) == 0);
2283 tcg_debug_assert(args[0] == args[1] + 1);
2284 tcg_out_insn(s, RRE, DSGR, args[1], args[4]);
2286 case INDEX_op_divu2_i64:
2287 tcg_debug_assert(args[0] == args[2]);
2288 tcg_debug_assert(args[1] == args[3]);
2289 tcg_debug_assert((args[1] & 1) == 0);
2290 tcg_debug_assert(args[0] == args[1] + 1);
2291 tcg_out_insn(s, RRE, DLGR, args[1], args[4]);
2293 case INDEX_op_mulu2_i64:
2294 tcg_debug_assert(args[0] == args[2]);
2295 tcg_debug_assert((args[1] & 1) == 0);
2296 tcg_debug_assert(args[0] == args[1] + 1);
2297 tcg_out_insn(s, RRE, MLGR, args[1], args[3]);
2300 case INDEX_op_shl_i64:
2303 if (const_args[2]) {
2304 tcg_out_sh64(s, op, args[0], args[1], TCG_REG_NONE, args[2]);
2306 tcg_out_sh64(s, op, args[0], args[1], args[2], 0);
2309 case INDEX_op_shr_i64:
2312 case INDEX_op_sar_i64:
2316 case INDEX_op_rotl_i64:
2317 if (const_args[2]) {
2318 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2319 TCG_REG_NONE, args[2]);
2321 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], args[2], 0);
2324 case INDEX_op_rotr_i64:
2325 if (const_args[2]) {
2326 tcg_out_sh64(s, RSY_RLLG, args[0], args[1],
2327 TCG_REG_NONE, (64 - args[2]) & 63);
2329 /* We can use the smaller 32-bit negate because only the
2330 low 6 bits are examined for the rotate. */
2331 tcg_out_insn(s, RR, LCR, TCG_TMP0, args[2]);
2332 tcg_out_sh64(s, RSY_RLLG, args[0], args[1], TCG_TMP0, 0);
2336 case INDEX_op_ext8s_i64:
2337 tgen_ext8s(s, TCG_TYPE_I64, args[0], args[1]);
2339 case INDEX_op_ext16s_i64:
2340 tgen_ext16s(s, TCG_TYPE_I64, args[0], args[1]);
2342 case INDEX_op_ext_i32_i64:
2343 case INDEX_op_ext32s_i64:
2344 tgen_ext32s(s, args[0], args[1]);
2346 case INDEX_op_ext8u_i64:
2347 tgen_ext8u(s, TCG_TYPE_I64, args[0], args[1]);
2349 case INDEX_op_ext16u_i64:
2350 tgen_ext16u(s, TCG_TYPE_I64, args[0], args[1]);
2352 case INDEX_op_extu_i32_i64:
2353 case INDEX_op_ext32u_i64:
2354 tgen_ext32u(s, args[0], args[1]);
2357 case INDEX_op_add2_i64:
2358 if (const_args[4]) {
2359 if ((int64_t)args[4] >= 0) {
2360 tcg_out_insn(s, RIL, ALGFI, args[0], args[4]);
2362 tcg_out_insn(s, RIL, SLGFI, args[0], -args[4]);
2365 tcg_out_insn(s, RRE, ALGR, args[0], args[4]);
2367 tcg_out_insn(s, RRE, ALCGR, args[1], args[5]);
2369 case INDEX_op_sub2_i64:
2370 if (const_args[4]) {
2371 if ((int64_t)args[4] >= 0) {
2372 tcg_out_insn(s, RIL, SLGFI, args[0], args[4]);
2374 tcg_out_insn(s, RIL, ALGFI, args[0], -args[4]);
2377 tcg_out_insn(s, RRE, SLGR, args[0], args[4]);
2379 tcg_out_insn(s, RRE, SLBGR, args[1], args[5]);
2382 case INDEX_op_brcond_i64:
2383 tgen_brcond(s, TCG_TYPE_I64, args[2], args[0],
2384 args[1], const_args[1], arg_label(args[3]));
2386 case INDEX_op_setcond_i64:
2387 tgen_setcond(s, TCG_TYPE_I64, args[3], args[0], args[1],
2388 args[2], const_args[2]);
2390 case INDEX_op_movcond_i64:
2391 tgen_movcond(s, TCG_TYPE_I64, args[5], args[0], args[1],
2392 args[2], const_args[2], args[3], const_args[3]);
2396 a0 = args[0], a1 = args[1], a2 = args[2];
2397 if (const_args[1]) {
2398 tgen_deposit(s, a0, a2, args[3], args[4], 1);
2400 /* Since we can't support "0Z" as a constraint, we allow a1 in
2401 any register. Fix things up as if a matching constraint. */
2403 TCGType type = (opc == INDEX_op_deposit_i64);
2405 tcg_out_mov(s, type, TCG_TMP0, a2);
2408 tcg_out_mov(s, type, a0, a1);
2410 tgen_deposit(s, a0, a2, args[3], args[4], 0);
2415 tgen_extract(s, args[0], args[1], args[2], args[3]);
2418 case INDEX_op_clz_i64:
2419 tgen_clz(s, args[0], args[1], args[2], const_args[2]);
2423 /* The host memory model is quite strong, we simply need to
2424 serialize the instruction stream. */
2425 if (args[0] & TCG_MO_ST_LD) {
2426 /* fast-bcr-serialization facility (45) is present */
2427 tcg_out_insn(s, RR, BCR, 14, 0);
2431 case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
2432 case INDEX_op_mov_i64:
2433 case INDEX_op_call: /* Always emitted via tcg_out_call. */
2439 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
2440 TCGReg dst, TCGReg src)
2442 if (is_general_reg(src)) {
2443 /* Replicate general register into two MO_64. */
2444 tcg_out_insn(s, VRRf, VLVGP, dst, src, src);
2445 if (vece == MO_64) {
2452 * Recall that the "standard" integer, within a vector, is the
2453 * rightmost element of the leftmost doubleword, a-la VLLEZ.
2455 tcg_out_insn(s, VRIc, VREP, dst, (8 >> vece) - 1, src, vece);
2459 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
2460 TCGReg dst, TCGReg base, intptr_t offset)
2462 tcg_out_vrx_mem(s, VRX_VLREP, dst, base, TCG_REG_NONE, offset, vece);
2466 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
2467 TCGReg dst, int64_t val)
2469 int i, mask, msb, lsb;
2471 /* Look for int16_t elements. */
2472 if (vece <= MO_16 ||
2473 (vece == MO_32 ? (int32_t)val : val) == (int16_t)val) {
2474 tcg_out_insn(s, VRIa, VREPI, dst, val, vece);
2478 /* Look for bit masks. */
2479 if (vece == MO_32) {
2480 if (risbg_mask((int32_t)val)) {
2481 /* Handle wraparound by swapping msb and lsb. */
2482 if ((val & 0x80000001u) == 0x80000001u) {
2483 msb = 32 - ctz32(~val);
2484 lsb = clz32(~val) - 1;
2487 lsb = 31 - ctz32(val);
2489 tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_32);
2493 if (risbg_mask(val)) {
2494 /* Handle wraparound by swapping msb and lsb. */
2495 if ((val & 0x8000000000000001ull) == 0x8000000000000001ull) {
2496 /* Handle wraparound by swapping msb and lsb. */
2497 msb = 64 - ctz64(~val);
2498 lsb = clz64(~val) - 1;
2501 lsb = 63 - ctz64(val);
2503 tcg_out_insn(s, VRIb, VGM, dst, msb, lsb, MO_64);
2508 /* Look for all bytes 0x00 or 0xff. */
2509 for (i = mask = 0; i < 8; i++) {
2510 uint8_t byte = val >> (i * 8);
2513 } else if (byte != 0) {
2518 tcg_out_insn(s, VRIa, VGBM, dst, mask * 0x0101, 0);
2522 /* Otherwise, stuff it in the constant pool. */
2523 tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
2524 new_pool_label(s, val, R_390_PC32DBL, s->code_ptr - 2, 2);
2525 tcg_out_insn(s, VRX, VLREP, dst, TCG_TMP0, TCG_REG_NONE, 0, MO_64);
2528 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
2529 unsigned vecl, unsigned vece,
2530 const TCGArg args[TCG_MAX_OP_ARGS],
2531 const int const_args[TCG_MAX_OP_ARGS])
2533 TCGType type = vecl + TCG_TYPE_V64;
2534 TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
2537 case INDEX_op_ld_vec:
2538 tcg_out_ld(s, type, a0, a1, a2);
2540 case INDEX_op_st_vec:
2541 tcg_out_st(s, type, a0, a1, a2);
2543 case INDEX_op_dupm_vec:
2544 tcg_out_dupm_vec(s, type, vece, a0, a1, a2);
2547 case INDEX_op_abs_vec:
2548 tcg_out_insn(s, VRRa, VLP, a0, a1, vece);
2550 case INDEX_op_neg_vec:
2551 tcg_out_insn(s, VRRa, VLC, a0, a1, vece);
2553 case INDEX_op_not_vec:
2554 tcg_out_insn(s, VRRc, VNO, a0, a1, a1, 0);
2557 case INDEX_op_add_vec:
2558 tcg_out_insn(s, VRRc, VA, a0, a1, a2, vece);
2560 case INDEX_op_sub_vec:
2561 tcg_out_insn(s, VRRc, VS, a0, a1, a2, vece);
2563 case INDEX_op_and_vec:
2564 tcg_out_insn(s, VRRc, VN, a0, a1, a2, 0);
2566 case INDEX_op_andc_vec:
2567 tcg_out_insn(s, VRRc, VNC, a0, a1, a2, 0);
2569 case INDEX_op_mul_vec:
2570 tcg_out_insn(s, VRRc, VML, a0, a1, a2, vece);
2572 case INDEX_op_or_vec:
2573 tcg_out_insn(s, VRRc, VO, a0, a1, a2, 0);
2575 case INDEX_op_orc_vec:
2576 tcg_out_insn(s, VRRc, VOC, a0, a1, a2, 0);
2578 case INDEX_op_xor_vec:
2579 tcg_out_insn(s, VRRc, VX, a0, a1, a2, 0);
2581 case INDEX_op_nand_vec:
2582 tcg_out_insn(s, VRRc, VNN, a0, a1, a2, 0);
2584 case INDEX_op_nor_vec:
2585 tcg_out_insn(s, VRRc, VNO, a0, a1, a2, 0);
2587 case INDEX_op_eqv_vec:
2588 tcg_out_insn(s, VRRc, VNX, a0, a1, a2, 0);
2591 case INDEX_op_shli_vec:
2592 tcg_out_insn(s, VRSa, VESL, a0, a2, TCG_REG_NONE, a1, vece);
2594 case INDEX_op_shri_vec:
2595 tcg_out_insn(s, VRSa, VESRL, a0, a2, TCG_REG_NONE, a1, vece);
2597 case INDEX_op_sari_vec:
2598 tcg_out_insn(s, VRSa, VESRA, a0, a2, TCG_REG_NONE, a1, vece);
2600 case INDEX_op_rotli_vec:
2601 tcg_out_insn(s, VRSa, VERLL, a0, a2, TCG_REG_NONE, a1, vece);
2603 case INDEX_op_shls_vec:
2604 tcg_out_insn(s, VRSa, VESL, a0, 0, a2, a1, vece);
2606 case INDEX_op_shrs_vec:
2607 tcg_out_insn(s, VRSa, VESRL, a0, 0, a2, a1, vece);
2609 case INDEX_op_sars_vec:
2610 tcg_out_insn(s, VRSa, VESRA, a0, 0, a2, a1, vece);
2612 case INDEX_op_rotls_vec:
2613 tcg_out_insn(s, VRSa, VERLL, a0, 0, a2, a1, vece);
2615 case INDEX_op_shlv_vec:
2616 tcg_out_insn(s, VRRc, VESLV, a0, a1, a2, vece);
2618 case INDEX_op_shrv_vec:
2619 tcg_out_insn(s, VRRc, VESRLV, a0, a1, a2, vece);
2621 case INDEX_op_sarv_vec:
2622 tcg_out_insn(s, VRRc, VESRAV, a0, a1, a2, vece);
2624 case INDEX_op_rotlv_vec:
2625 tcg_out_insn(s, VRRc, VERLLV, a0, a1, a2, vece);
2628 case INDEX_op_smin_vec:
2629 tcg_out_insn(s, VRRc, VMN, a0, a1, a2, vece);
2631 case INDEX_op_smax_vec:
2632 tcg_out_insn(s, VRRc, VMX, a0, a1, a2, vece);
2634 case INDEX_op_umin_vec:
2635 tcg_out_insn(s, VRRc, VMNL, a0, a1, a2, vece);
2637 case INDEX_op_umax_vec:
2638 tcg_out_insn(s, VRRc, VMXL, a0, a1, a2, vece);
2641 case INDEX_op_bitsel_vec:
2642 tcg_out_insn(s, VRRe, VSEL, a0, a2, args[3], a1);
2645 case INDEX_op_cmp_vec:
2646 switch ((TCGCond)args[3]) {
2648 tcg_out_insn(s, VRRc, VCEQ, a0, a1, a2, vece);
2651 tcg_out_insn(s, VRRc, VCH, a0, a1, a2, vece);
2654 tcg_out_insn(s, VRRc, VCHL, a0, a1, a2, vece);
2657 g_assert_not_reached();
2661 case INDEX_op_s390_vuph_vec:
2662 tcg_out_insn(s, VRRa, VUPH, a0, a1, vece);
2664 case INDEX_op_s390_vupl_vec:
2665 tcg_out_insn(s, VRRa, VUPL, a0, a1, vece);
2667 case INDEX_op_s390_vpks_vec:
2668 tcg_out_insn(s, VRRc, VPKS, a0, a1, a2, vece);
2671 case INDEX_op_mov_vec: /* Always emitted via tcg_out_mov. */
2672 case INDEX_op_dup_vec: /* Always emitted via tcg_out_dup_vec. */
2674 g_assert_not_reached();
2678 int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
2681 case INDEX_op_abs_vec:
2682 case INDEX_op_add_vec:
2683 case INDEX_op_and_vec:
2684 case INDEX_op_andc_vec:
2685 case INDEX_op_bitsel_vec:
2686 case INDEX_op_eqv_vec:
2687 case INDEX_op_nand_vec:
2688 case INDEX_op_neg_vec:
2689 case INDEX_op_nor_vec:
2690 case INDEX_op_not_vec:
2691 case INDEX_op_or_vec:
2692 case INDEX_op_orc_vec:
2693 case INDEX_op_rotli_vec:
2694 case INDEX_op_rotls_vec:
2695 case INDEX_op_rotlv_vec:
2696 case INDEX_op_sari_vec:
2697 case INDEX_op_sars_vec:
2698 case INDEX_op_sarv_vec:
2699 case INDEX_op_shli_vec:
2700 case INDEX_op_shls_vec:
2701 case INDEX_op_shlv_vec:
2702 case INDEX_op_shri_vec:
2703 case INDEX_op_shrs_vec:
2704 case INDEX_op_shrv_vec:
2705 case INDEX_op_smax_vec:
2706 case INDEX_op_smin_vec:
2707 case INDEX_op_sub_vec:
2708 case INDEX_op_umax_vec:
2709 case INDEX_op_umin_vec:
2710 case INDEX_op_xor_vec:
2712 case INDEX_op_cmp_vec:
2713 case INDEX_op_cmpsel_vec:
2714 case INDEX_op_rotrv_vec:
2716 case INDEX_op_mul_vec:
2717 return vece < MO_64;
2718 case INDEX_op_ssadd_vec:
2719 case INDEX_op_sssub_vec:
2720 return vece < MO_64 ? -1 : 0;
2726 static bool expand_vec_cmp_noinv(TCGType type, unsigned vece, TCGv_vec v0,
2727 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2729 bool need_swap = false, need_inv = false;
2747 need_swap = need_inv = true;
2750 g_assert_not_reached();
2754 cond = tcg_invert_cond(cond);
2758 t1 = v1, v1 = v2, v2 = t1;
2759 cond = tcg_swap_cond(cond);
2762 vec_gen_4(INDEX_op_cmp_vec, type, vece, tcgv_vec_arg(v0),
2763 tcgv_vec_arg(v1), tcgv_vec_arg(v2), cond);
2768 static void expand_vec_cmp(TCGType type, unsigned vece, TCGv_vec v0,
2769 TCGv_vec v1, TCGv_vec v2, TCGCond cond)
2771 if (expand_vec_cmp_noinv(type, vece, v0, v1, v2, cond)) {
2772 tcg_gen_not_vec(vece, v0, v0);
2776 static void expand_vec_cmpsel(TCGType type, unsigned vece, TCGv_vec v0,
2777 TCGv_vec c1, TCGv_vec c2,
2778 TCGv_vec v3, TCGv_vec v4, TCGCond cond)
2780 TCGv_vec t = tcg_temp_new_vec(type);
2782 if (expand_vec_cmp_noinv(type, vece, t, c1, c2, cond)) {
2783 /* Invert the sense of the compare by swapping arguments. */
2784 tcg_gen_bitsel_vec(vece, v0, t, v4, v3);
2786 tcg_gen_bitsel_vec(vece, v0, t, v3, v4);
2788 tcg_temp_free_vec(t);
2791 static void expand_vec_sat(TCGType type, unsigned vece, TCGv_vec v0,
2792 TCGv_vec v1, TCGv_vec v2, TCGOpcode add_sub_opc)
2794 TCGv_vec h1 = tcg_temp_new_vec(type);
2795 TCGv_vec h2 = tcg_temp_new_vec(type);
2796 TCGv_vec l1 = tcg_temp_new_vec(type);
2797 TCGv_vec l2 = tcg_temp_new_vec(type);
2799 tcg_debug_assert (vece < MO_64);
2801 /* Unpack with sign-extension. */
2802 vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2803 tcgv_vec_arg(h1), tcgv_vec_arg(v1));
2804 vec_gen_2(INDEX_op_s390_vuph_vec, type, vece,
2805 tcgv_vec_arg(h2), tcgv_vec_arg(v2));
2807 vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2808 tcgv_vec_arg(l1), tcgv_vec_arg(v1));
2809 vec_gen_2(INDEX_op_s390_vupl_vec, type, vece,
2810 tcgv_vec_arg(l2), tcgv_vec_arg(v2));
2812 /* Arithmetic on a wider element size. */
2813 vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(h1),
2814 tcgv_vec_arg(h1), tcgv_vec_arg(h2));
2815 vec_gen_3(add_sub_opc, type, vece + 1, tcgv_vec_arg(l1),
2816 tcgv_vec_arg(l1), tcgv_vec_arg(l2));
2818 /* Pack with saturation. */
2819 vec_gen_3(INDEX_op_s390_vpks_vec, type, vece + 1,
2820 tcgv_vec_arg(v0), tcgv_vec_arg(h1), tcgv_vec_arg(l1));
2822 tcg_temp_free_vec(h1);
2823 tcg_temp_free_vec(h2);
2824 tcg_temp_free_vec(l1);
2825 tcg_temp_free_vec(l2);
2828 void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
2832 TCGv_vec v0, v1, v2, v3, v4, t0;
2835 v0 = temp_tcgv_vec(arg_temp(a0));
2836 v1 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2837 v2 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2840 case INDEX_op_cmp_vec:
2841 expand_vec_cmp(type, vece, v0, v1, v2, va_arg(va, TCGArg));
2844 case INDEX_op_cmpsel_vec:
2845 v3 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2846 v4 = temp_tcgv_vec(arg_temp(va_arg(va, TCGArg)));
2847 expand_vec_cmpsel(type, vece, v0, v1, v2, v3, v4, va_arg(va, TCGArg));
2850 case INDEX_op_rotrv_vec:
2851 t0 = tcg_temp_new_vec(type);
2852 tcg_gen_neg_vec(vece, t0, v2);
2853 tcg_gen_rotlv_vec(vece, v0, v1, t0);
2854 tcg_temp_free_vec(t0);
2857 case INDEX_op_ssadd_vec:
2858 expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_add_vec);
2860 case INDEX_op_sssub_vec:
2861 expand_vec_sat(type, vece, v0, v1, v2, INDEX_op_sub_vec);
2865 g_assert_not_reached();
2870 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
2873 case INDEX_op_goto_ptr:
2876 case INDEX_op_ld8u_i32:
2877 case INDEX_op_ld8u_i64:
2878 case INDEX_op_ld8s_i32:
2879 case INDEX_op_ld8s_i64:
2880 case INDEX_op_ld16u_i32:
2881 case INDEX_op_ld16u_i64:
2882 case INDEX_op_ld16s_i32:
2883 case INDEX_op_ld16s_i64:
2884 case INDEX_op_ld_i32:
2885 case INDEX_op_ld32u_i64:
2886 case INDEX_op_ld32s_i64:
2887 case INDEX_op_ld_i64:
2888 return C_O1_I1(r, r);
2890 case INDEX_op_st8_i32:
2891 case INDEX_op_st8_i64:
2892 case INDEX_op_st16_i32:
2893 case INDEX_op_st16_i64:
2894 case INDEX_op_st_i32:
2895 case INDEX_op_st32_i64:
2896 case INDEX_op_st_i64:
2897 return C_O0_I2(r, r);
2899 case INDEX_op_add_i32:
2900 case INDEX_op_add_i64:
2901 case INDEX_op_shl_i64:
2902 case INDEX_op_shr_i64:
2903 case INDEX_op_sar_i64:
2904 case INDEX_op_rotl_i32:
2905 case INDEX_op_rotl_i64:
2906 case INDEX_op_rotr_i32:
2907 case INDEX_op_rotr_i64:
2908 case INDEX_op_clz_i64:
2909 case INDEX_op_setcond_i32:
2910 case INDEX_op_setcond_i64:
2911 return C_O1_I2(r, r, ri);
2913 case INDEX_op_sub_i32:
2914 case INDEX_op_sub_i64:
2915 case INDEX_op_and_i32:
2916 case INDEX_op_and_i64:
2917 case INDEX_op_or_i32:
2918 case INDEX_op_or_i64:
2919 case INDEX_op_xor_i32:
2920 case INDEX_op_xor_i64:
2921 return C_O1_I2(r, r, ri);
2923 case INDEX_op_mul_i32:
2924 return C_O1_I2(r, 0, ri);
2925 case INDEX_op_mul_i64:
2926 return C_O1_I2(r, 0, rJ);
2928 case INDEX_op_shl_i32:
2929 case INDEX_op_shr_i32:
2930 case INDEX_op_sar_i32:
2931 return C_O1_I2(r, r, ri);
2933 case INDEX_op_brcond_i32:
2934 case INDEX_op_brcond_i64:
2935 return C_O0_I2(r, ri);
2937 case INDEX_op_bswap16_i32:
2938 case INDEX_op_bswap16_i64:
2939 case INDEX_op_bswap32_i32:
2940 case INDEX_op_bswap32_i64:
2941 case INDEX_op_bswap64_i64:
2942 case INDEX_op_neg_i32:
2943 case INDEX_op_neg_i64:
2944 case INDEX_op_ext8s_i32:
2945 case INDEX_op_ext8s_i64:
2946 case INDEX_op_ext8u_i32:
2947 case INDEX_op_ext8u_i64:
2948 case INDEX_op_ext16s_i32:
2949 case INDEX_op_ext16s_i64:
2950 case INDEX_op_ext16u_i32:
2951 case INDEX_op_ext16u_i64:
2952 case INDEX_op_ext32s_i64:
2953 case INDEX_op_ext32u_i64:
2954 case INDEX_op_ext_i32_i64:
2955 case INDEX_op_extu_i32_i64:
2956 case INDEX_op_extract_i32:
2957 case INDEX_op_extract_i64:
2958 return C_O1_I1(r, r);
2960 case INDEX_op_qemu_ld_i32:
2961 case INDEX_op_qemu_ld_i64:
2962 return C_O1_I1(r, L);
2963 case INDEX_op_qemu_st_i64:
2964 case INDEX_op_qemu_st_i32:
2965 return C_O0_I2(L, L);
2967 case INDEX_op_deposit_i32:
2968 case INDEX_op_deposit_i64:
2969 return C_O1_I2(r, rZ, r);
2971 case INDEX_op_movcond_i32:
2972 case INDEX_op_movcond_i64:
2973 return (HAVE_FACILITY(LOAD_ON_COND2)
2974 ? C_O1_I4(r, r, ri, rI, 0)
2975 : C_O1_I4(r, r, ri, r, 0));
2977 case INDEX_op_div2_i32:
2978 case INDEX_op_div2_i64:
2979 case INDEX_op_divu2_i32:
2980 case INDEX_op_divu2_i64:
2981 return C_O2_I3(o, m, 0, 1, r);
2983 case INDEX_op_mulu2_i64:
2984 return C_O2_I2(o, m, 0, r);
2986 case INDEX_op_add2_i32:
2987 case INDEX_op_sub2_i32:
2988 return C_O2_I4(r, r, 0, 1, ri, r);
2990 case INDEX_op_add2_i64:
2991 case INDEX_op_sub2_i64:
2992 return C_O2_I4(r, r, 0, 1, rA, r);
2994 case INDEX_op_st_vec:
2995 return C_O0_I2(v, r);
2996 case INDEX_op_ld_vec:
2997 case INDEX_op_dupm_vec:
2998 return C_O1_I1(v, r);
2999 case INDEX_op_dup_vec:
3000 return C_O1_I1(v, vr);
3001 case INDEX_op_abs_vec:
3002 case INDEX_op_neg_vec:
3003 case INDEX_op_not_vec:
3004 case INDEX_op_rotli_vec:
3005 case INDEX_op_sari_vec:
3006 case INDEX_op_shli_vec:
3007 case INDEX_op_shri_vec:
3008 case INDEX_op_s390_vuph_vec:
3009 case INDEX_op_s390_vupl_vec:
3010 return C_O1_I1(v, v);
3011 case INDEX_op_add_vec:
3012 case INDEX_op_sub_vec:
3013 case INDEX_op_and_vec:
3014 case INDEX_op_andc_vec:
3015 case INDEX_op_or_vec:
3016 case INDEX_op_orc_vec:
3017 case INDEX_op_xor_vec:
3018 case INDEX_op_nand_vec:
3019 case INDEX_op_nor_vec:
3020 case INDEX_op_eqv_vec:
3021 case INDEX_op_cmp_vec:
3022 case INDEX_op_mul_vec:
3023 case INDEX_op_rotlv_vec:
3024 case INDEX_op_rotrv_vec:
3025 case INDEX_op_shlv_vec:
3026 case INDEX_op_shrv_vec:
3027 case INDEX_op_sarv_vec:
3028 case INDEX_op_smax_vec:
3029 case INDEX_op_smin_vec:
3030 case INDEX_op_umax_vec:
3031 case INDEX_op_umin_vec:
3032 case INDEX_op_s390_vpks_vec:
3033 return C_O1_I2(v, v, v);
3034 case INDEX_op_rotls_vec:
3035 case INDEX_op_shls_vec:
3036 case INDEX_op_shrs_vec:
3037 case INDEX_op_sars_vec:
3038 return C_O1_I2(v, v, r);
3039 case INDEX_op_bitsel_vec:
3040 return C_O1_I3(v, v, v, v);
3043 g_assert_not_reached();
3048 * Mainline glibc added HWCAP_S390_VX before it was kernel abi.
3049 * Some distros have fixed this up locally, others have not.
3051 #ifndef HWCAP_S390_VXRS
3052 #define HWCAP_S390_VXRS 2048
3055 static void query_s390_facilities(void)
3057 unsigned long hwcap = qemu_getauxval(AT_HWCAP);
3060 /* Is STORE FACILITY LIST EXTENDED available? Honestly, I believe this
3061 is present on all 64-bit systems, but let's check for it anyway. */
3062 if (hwcap & HWCAP_S390_STFLE) {
3063 register int r0 __asm__("0") = ARRAY_SIZE(s390_facilities) - 1;
3064 register void *r1 __asm__("1") = s390_facilities;
3067 asm volatile(".word 0xb2b0,0x1000"
3068 : "=r"(r0) : "r"(r0), "r"(r1) : "memory", "cc");
3072 * Use of vector registers requires os support beyond the facility bit.
3073 * If the kernel does not advertise support, disable the facility bits.
3074 * There is nothing else we currently care about in the 3rd word, so
3075 * disable VECTOR with one store.
3077 if (!(hwcap & HWCAP_S390_VXRS)) {
3078 s390_facilities[2] = 0;
3082 * Minimum supported cpu revision is z196.
3083 * Check for all required facilities.
3084 * ZARCH_ACTIVE is done via preprocessor check for 64-bit.
3086 if (!HAVE_FACILITY(LONG_DISP)) {
3087 which = "long-displacement";
3090 if (!HAVE_FACILITY(EXT_IMM)) {
3091 which = "extended-immediate";
3094 if (!HAVE_FACILITY(GEN_INST_EXT)) {
3095 which = "general-instructions-extension";
3099 * Facility 45 is a big bin that contains: distinct-operands,
3100 * fast-BCR-serialization, high-word, population-count,
3101 * interlocked-access-1, and load/store-on-condition-1
3103 if (!HAVE_FACILITY(45)) {
3110 error_report("%s: missing required facility %s", __func__, which);
3114 static void tcg_target_init(TCGContext *s)
3116 query_s390_facilities();
3118 tcg_target_available_regs[TCG_TYPE_I32] = 0xffff;
3119 tcg_target_available_regs[TCG_TYPE_I64] = 0xffff;
3120 if (HAVE_FACILITY(VECTOR)) {
3121 tcg_target_available_regs[TCG_TYPE_V64] = 0xffffffff00000000ull;
3122 tcg_target_available_regs[TCG_TYPE_V128] = 0xffffffff00000000ull;
3125 tcg_target_call_clobber_regs = 0;
3126 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R0);
3127 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R1);
3128 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R2);
3129 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R3);
3130 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R4);
3131 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R5);
3132 /* The r6 register is technically call-saved, but it's also a parameter
3133 register, so it can get killed by setup for the qemu_st helper. */
3134 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R6);
3135 /* The return register can be considered call-clobbered. */
3136 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_R14);
3138 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V0);
3139 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V1);
3140 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V2);
3141 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V3);
3142 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V4);
3143 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V5);
3144 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V6);
3145 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V7);
3146 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V16);
3147 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V17);
3148 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V18);
3149 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V19);
3150 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V20);
3151 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V21);
3152 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V22);
3153 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V23);
3154 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V24);
3155 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V25);
3156 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V26);
3157 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V27);
3158 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V28);
3159 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V29);
3160 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V30);
3161 tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_V31);
3163 s->reserved_regs = 0;
3164 tcg_regset_set_reg(s->reserved_regs, TCG_TMP0);
3165 /* XXX many insns can't be used with R0, so we better avoid it for now */
3166 tcg_regset_set_reg(s->reserved_regs, TCG_REG_R0);
3167 tcg_regset_set_reg(s->reserved_regs, TCG_REG_CALL_STACK);
3170 #define FRAME_SIZE ((int)(TCG_TARGET_CALL_STACK_OFFSET \
3171 + TCG_STATIC_CALL_ARGS_SIZE \
3172 + CPU_TEMP_BUF_NLONGS * sizeof(long)))
3174 static void tcg_target_qemu_prologue(TCGContext *s)
3176 /* stmg %r6,%r15,48(%r15) (save registers) */
3177 tcg_out_insn(s, RXY, STMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15, 48);
3179 /* aghi %r15,-frame_size */
3180 tcg_out_insn(s, RI, AGHI, TCG_REG_R15, -FRAME_SIZE);
3182 tcg_set_frame(s, TCG_REG_CALL_STACK,
3183 TCG_STATIC_CALL_ARGS_SIZE + TCG_TARGET_CALL_STACK_OFFSET,
3184 CPU_TEMP_BUF_NLONGS * sizeof(long));
3186 #ifndef CONFIG_SOFTMMU
3187 if (guest_base >= 0x80000) {
3188 tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
3189 tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
3193 tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
3195 /* br %r3 (go to TB) */
3196 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, tcg_target_call_iarg_regs[1]);
3199 * Return path for goto_ptr. Set return value to 0, a-la exit_tb,
3200 * and fall through to the rest of the epilogue.
3202 tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
3203 tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, 0);
3206 tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
3208 /* lmg %r6,%r15,fs+48(%r15) (restore registers) */
3209 tcg_out_insn(s, RXY, LMG, TCG_REG_R6, TCG_REG_R15, TCG_REG_R15,
3212 /* br %r14 (return) */
3213 tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_R14);
3216 static void tcg_out_nop_fill(tcg_insn_unit *p, int count)
3218 memset(p, 0x07, count * sizeof(tcg_insn_unit));
3223 uint8_t fde_def_cfa[4];
3224 uint8_t fde_reg_ofs[18];
3227 /* We're expecting a 2 byte uleb128 encoded value. */
3228 QEMU_BUILD_BUG_ON(FRAME_SIZE >= (1 << 14));
3230 #define ELF_HOST_MACHINE EM_S390
3232 static const DebugFrame debug_frame = {
3233 .h.cie.len = sizeof(DebugFrameCIE)-4, /* length after .len member */
3236 .h.cie.code_align = 1,
3237 .h.cie.data_align = 8, /* sleb128 8 */
3238 .h.cie.return_column = TCG_REG_R14,
3240 /* Total FDE size does not include the "len" member. */
3241 .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
3244 12, TCG_REG_CALL_STACK, /* DW_CFA_def_cfa %r15, ... */
3245 (FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
3249 0x86, 6, /* DW_CFA_offset, %r6, 48 */
3250 0x87, 7, /* DW_CFA_offset, %r7, 56 */
3251 0x88, 8, /* DW_CFA_offset, %r8, 64 */
3252 0x89, 9, /* DW_CFA_offset, %r92, 72 */
3253 0x8a, 10, /* DW_CFA_offset, %r10, 80 */
3254 0x8b, 11, /* DW_CFA_offset, %r11, 88 */
3255 0x8c, 12, /* DW_CFA_offset, %r12, 96 */
3256 0x8d, 13, /* DW_CFA_offset, %r13, 104 */
3257 0x8e, 14, /* DW_CFA_offset, %r14, 112 */
3261 void tcg_register_jit(const void *buf, size_t buf_size)
3263 tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));