]> Git Repo - qemu.git/commitdiff
tcg: Add opcode for ctpop
authorRichard Henderson <[email protected]>
Mon, 21 Nov 2016 10:13:39 +0000 (11:13 +0100)
committerRichard Henderson <[email protected]>
Tue, 10 Jan 2017 16:48:56 +0000 (08:48 -0800)
The number of actual invocations of ctpop itself does not warrent
an opcode, but it is very helpful for POWER7 to use in generating
an expansion for ctz.

Reviewed-by: Alex BennĂ©e <[email protected]>
Signed-off-by: Richard Henderson <[email protected]>
16 files changed:
tcg-runtime.c
tcg/aarch64/tcg-target.h
tcg/arm/tcg-target.h
tcg/i386/tcg-target.h
tcg/ia64/tcg-target.h
tcg/mips/tcg-target.h
tcg/optimize.c
tcg/ppc/tcg-target.h
tcg/s390/tcg-target.h
tcg/sparc/tcg-target.h
tcg/tcg-op.c
tcg/tcg-op.h
tcg/tcg-opc.h
tcg/tcg-runtime.h
tcg/tcg.h
tcg/tci/tcg-target.h

index c8b98dfd51dd21f23d884238a20560e3c053a8bf..4c60c9665886c94511b9bcc13bc4b22672006f84 100644 (file)
@@ -131,6 +131,16 @@ uint64_t HELPER(clrsb_i64)(uint64_t arg)
     return clrsb64(arg);
 }
 
+uint32_t HELPER(ctpop_i32)(uint32_t arg)
+{
+    return ctpop32(arg);
+}
+
+uint64_t HELPER(ctpop_i64)(uint64_t arg)
+{
+    return ctpop64(arg);
+}
+
 void HELPER(exit_atomic)(CPUArchState *env)
 {
     cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
index 9d6b00fbba36a730b2e80b70ca9a35f6b3afcc1c..1a5ea23844e93121a3256364fc9a1127228cf05f 100644 (file)
@@ -64,6 +64,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     1
@@ -98,6 +99,7 @@ typedef enum {
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     1
index 4cb94dc103d25d8ffe1f2828213f4b047530a3e2..09a19c6f35d7f0c1f23bf95f5f7d2a0c4b02def6 100644 (file)
@@ -112,6 +112,7 @@ extern bool use_idiv_instructions;
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_clz_i32          use_armv5t_instructions
 #define TCG_TARGET_HAS_ctz_i32          use_armv7_instructions
+#define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      use_armv7_instructions
 #define TCG_TARGET_HAS_extract_i32      use_armv7_instructions
 #define TCG_TARGET_HAS_sextract_i32     use_armv7_instructions
index 8fff287caa6e80ec4e518ee74011cc82134459de..b8f73f53825edca0b0330f3d62b01c820e46e98c 100644 (file)
@@ -95,6 +95,7 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          1
+#define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     1
@@ -129,6 +130,7 @@ extern bool have_bmi1;
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          1
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
index 9a829ae751e9037f6b05d06ddc23ec4ae9e9cd30..42aea03a8be093dcf2c559a6164d7795bdd9bd88 100644 (file)
@@ -144,6 +144,8 @@ typedef enum {
 #define TCG_TARGET_HAS_clz_i64          0
 #define TCG_TARGET_HAS_ctz_i32          0
 #define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i32        0
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_nor_i64          1
 #define TCG_TARGET_HAS_orc_i32          1
 #define TCG_TARGET_HAS_orc_i64          1
index a680f16e4716ac91e8a73aa7fa6345e8053e731d..f46d64a3a7de5b1a7dce94f20dd3ff3e44299363 100644 (file)
@@ -165,6 +165,7 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_rot_i32          use_mips32r2_instructions
 #define TCG_TARGET_HAS_clz_i32          use_mips32r2_instructions
 #define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_movcond_i64      use_movnz_instructions
@@ -179,6 +180,7 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_rot_i64          use_mips32r2_instructions
 #define TCG_TARGET_HAS_clz_i64          use_mips32r2_instructions
 #define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
 #endif
 
 /* optional instructions automatically implemented */
index e7ecce478e2dde6833596604ae64b83a710ac960..adfc56ce62023ac77a0b3d62fdfd28be6089ae27 100644 (file)
@@ -308,6 +308,12 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
     case INDEX_op_ctz_i64:
         return x ? ctz64(x) : y;
 
+    case INDEX_op_ctpop_i32:
+        return ctpop32(x);
+
+    case INDEX_op_ctpop_i64:
+        return ctpop64(x);
+
     CASE_OP_32_64(ext8s):
         return (int8_t)x;
 
@@ -918,6 +924,13 @@ void tcg_optimize(TCGContext *s)
             mask = temps[args[2]].mask | 63;
             break;
 
+        case INDEX_op_ctpop_i32:
+            mask = 32 | 31;
+            break;
+        case INDEX_op_ctpop_i64:
+            mask = 64 | 63;
+            break;
+
         CASE_OP_32_64(setcond):
         case INDEX_op_setcond2_i32:
             mask = 1;
@@ -1031,6 +1044,7 @@ void tcg_optimize(TCGContext *s)
         CASE_OP_32_64(ext8u):
         CASE_OP_32_64(ext16s):
         CASE_OP_32_64(ext16u):
+        CASE_OP_32_64(ctpop):
         case INDEX_op_ext32s_i64:
         case INDEX_op_ext32u_i64:
         case INDEX_op_ext_i32_i64:
index c798c9c85d50f1fd20e24d0bbb1bfb5d2239e51f..57e66cf3ed9ea2eb0154b5a95548f9e241e59c90 100644 (file)
@@ -72,6 +72,7 @@ extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_clz_i32          1
 #define TCG_TARGET_HAS_ctz_i32          have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      1
 #define TCG_TARGET_HAS_extract_i32      1
 #define TCG_TARGET_HAS_sextract_i32     0
@@ -107,6 +108,7 @@ extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_nor_i64          1
 #define TCG_TARGET_HAS_clz_i64          1
 #define TCG_TARGET_HAS_ctz_i64          have_isa_3_00
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      1
 #define TCG_TARGET_HAS_extract_i64      1
 #define TCG_TARGET_HAS_sextract_i64     0
index 22500ba85844736b51ddb3925dd2bc5cfbeb7526..cbdd2a627512f9a6480cb304ce2f72c80af66978 100644 (file)
@@ -79,6 +79,7 @@ extern uint64_t s390_facilities;
 #define TCG_TARGET_HAS_nor_i32        0
 #define TCG_TARGET_HAS_clz_i32        0
 #define TCG_TARGET_HAS_ctz_i32        0
+#define TCG_TARGET_HAS_ctpop_i32      0
 #define TCG_TARGET_HAS_deposit_i32    (s390_facilities & FACILITY_GEN_INST_EXT)
 #define TCG_TARGET_HAS_extract_i32    (s390_facilities & FACILITY_GEN_INST_EXT)
 #define TCG_TARGET_HAS_sextract_i32   0
@@ -112,6 +113,7 @@ extern uint64_t s390_facilities;
 #define TCG_TARGET_HAS_nor_i64        0
 #define TCG_TARGET_HAS_clz_i64        (s390_facilities & FACILITY_EXT_IMM)
 #define TCG_TARGET_HAS_ctz_i64        0
+#define TCG_TARGET_HAS_ctpop_i64      0
 #define TCG_TARGET_HAS_deposit_i64    (s390_facilities & FACILITY_GEN_INST_EXT)
 #define TCG_TARGET_HAS_extract_i64    (s390_facilities & FACILITY_GEN_INST_EXT)
 #define TCG_TARGET_HAS_sextract_i64   0
index 340837ae9a85a3b02c888087b983d48b1c4a26b9..b8b74f96ffba3afff1ae97e943ed4bc4c9e63f51 100644 (file)
@@ -112,6 +112,7 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_clz_i32          0
 #define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_extract_i32      0
 #define TCG_TARGET_HAS_sextract_i32     0
@@ -146,6 +147,7 @@ extern bool use_vis3_instructions;
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_clz_i64          0
 #define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_extract_i64      0
 #define TCG_TARGET_HAS_sextract_i64     0
index 620e26897fb5665d299da046916b32c0ecfe10d5..6f4b1b62cbc229f0d154ac6ba6e444cbc48ef5f9 100644 (file)
@@ -550,6 +550,21 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
     }
 }
 
+void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
+{
+    if (TCG_TARGET_HAS_ctpop_i32) {
+        tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
+    } else if (TCG_TARGET_HAS_ctpop_i64) {
+        TCGv_i64 t = tcg_temp_new_i64();
+        tcg_gen_extu_i32_i64(t, arg1);
+        tcg_gen_ctpop_i64(t, t);
+        tcg_gen_extrl_i64_i32(ret, t);
+        tcg_temp_free_i64(t);
+    } else {
+        gen_helper_ctpop_i32(ret, arg1);
+    }
+}
+
 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
     if (TCG_TARGET_HAS_rot_i32) {
@@ -1874,6 +1889,20 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
     }
 }
 
+void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
+{
+    if (TCG_TARGET_HAS_ctpop_i64) {
+        tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
+    } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
+        tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
+        tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
+        tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
+        tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+    } else {
+        gen_helper_ctpop_i64(ret, arg1);
+    }
+}
+
 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
 {
     if (TCG_TARGET_HAS_rot_i64) {
index c2f3db9288ef58b791233ea4177111e04becb46e..c68e300a6827a5a0e18aa8de90d77555ac9f5ad6 100644 (file)
@@ -291,6 +291,7 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
 void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
 void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
+void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
 void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
 void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
@@ -479,6 +480,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
 void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
 void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
+void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
 void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
 void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
@@ -973,6 +975,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 #define tcg_gen_clzi_tl tcg_gen_clzi_i64
 #define tcg_gen_ctzi_tl tcg_gen_ctzi_i64
 #define tcg_gen_clrsb_tl tcg_gen_clrsb_i64
+#define tcg_gen_ctpop_tl tcg_gen_ctpop_i64
 #define tcg_gen_rotl_tl tcg_gen_rotl_i64
 #define tcg_gen_rotli_tl tcg_gen_rotli_i64
 #define tcg_gen_rotr_tl tcg_gen_rotr_i64
@@ -1069,6 +1072,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 #define tcg_gen_clzi_tl tcg_gen_clzi_i32
 #define tcg_gen_ctzi_tl tcg_gen_ctzi_i32
 #define tcg_gen_clrsb_tl tcg_gen_clrsb_i32
+#define tcg_gen_ctpop_tl tcg_gen_ctpop_i32
 #define tcg_gen_rotl_tl tcg_gen_rotl_i32
 #define tcg_gen_rotli_tl tcg_gen_rotli_i32
 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
index d00db4f47ca9b6230b9a9f332fdd593d70a5c442..f06f89405e09218cdde5cc3e0306b5538f2a1067 100644 (file)
@@ -106,6 +106,7 @@ DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
 DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
 DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
 DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
+DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
 
 DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
 DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
@@ -175,6 +176,7 @@ DEF(nand_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nand_i64))
 DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
 DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
 DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
+DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
 
 DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
 DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
index 0d30f1a90c1673a545802fd80ce3dd83819a48f3..114ea6fecf9be3aabea54785d386ba3095c007cf 100644 (file)
@@ -21,6 +21,8 @@ DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
 DEF_HELPER_FLAGS_1(clrsb_i32, TCG_CALL_NO_RWG_SE, i32, i32)
 DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
+DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
 
 DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
 
index e0262822cb8323a0a07844284e962ffeaadb75da..631c6f69b1f36a5a9e7495249aafd0c91eb8e43d 100644 (file)
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -113,6 +113,7 @@ typedef uint64_t TCGRegSet;
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_clz_i64          0
 #define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_deposit_i64      0
 #define TCG_TARGET_HAS_extract_i64      0
 #define TCG_TARGET_HAS_sextract_i64     0
index 0646444b873315a42593a2126ea82a18122d8029..838bf3a858f419eaf79e6e7d3d34a8c0e0982e04 100644 (file)
@@ -76,6 +76,7 @@
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_clz_i32          0
 #define TCG_TARGET_HAS_ctz_i32          0
+#define TCG_TARGET_HAS_ctpop_i32        0
 #define TCG_TARGET_HAS_neg_i32          1
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_clz_i64          0
 #define TCG_TARGET_HAS_ctz_i64          0
+#define TCG_TARGET_HAS_ctpop_i64        0
 #define TCG_TARGET_HAS_neg_i64          1
 #define TCG_TARGET_HAS_not_i64          1
 #define TCG_TARGET_HAS_orc_i64          0
This page took 0.048196 seconds and 4 git commands to generate.