[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v4 54/64] tcg: Add opcode for ctpop
From: |
Alex Bennée |
Subject: |
Re: [Qemu-devel] [PATCH v4 54/64] tcg: Add opcode for ctpop |
Date: |
Fri, 09 Dec 2016 09:57:49 +0000 |
User-agent: |
mu4e 0.9.18; emacs 25.1.90.2 |
Richard Henderson <address@hidden> writes:
> The number of actual invocations of ctpop itself does not warrent
> an opcode, but it is very helpful for POWER7 to use in generating
> an expansion for ctz.
>
> Signed-off-by: Richard Henderson <address@hidden>
Reviewed-by: Alex Bennée <address@hidden>
> ---
> tcg-runtime.c | 10 ++++++++++
> tcg/aarch64/tcg-target.h | 2 ++
> tcg/arm/tcg-target.h | 1 +
> tcg/i386/tcg-target.h | 2 ++
> tcg/ia64/tcg-target.h | 2 ++
> tcg/mips/tcg-target.h | 1 +
> tcg/optimize.c | 14 ++++++++++++++
> tcg/ppc/tcg-target.h | 2 ++
> tcg/s390/tcg-target.h | 2 ++
> tcg/sparc/tcg-target.h | 2 ++
> tcg/tcg-op.c | 29 +++++++++++++++++++++++++++++
> tcg/tcg-op.h | 4 ++++
> tcg/tcg-opc.h | 2 ++
> tcg/tcg-runtime.h | 2 ++
> tcg/tcg.h | 1 +
> tcg/tci/tcg-target.h | 2 ++
> 16 files changed, 78 insertions(+)
>
> diff --git a/tcg-runtime.c b/tcg-runtime.c
> index c8b98df..4c60c96 100644
> --- a/tcg-runtime.c
> +++ b/tcg-runtime.c
> @@ -131,6 +131,16 @@ uint64_t HELPER(clrsb_i64)(uint64_t arg)
> return clrsb64(arg);
> }
>
> +uint32_t HELPER(ctpop_i32)(uint32_t arg)
> +{
> + return ctpop32(arg);
> +}
> +
> +uint64_t HELPER(ctpop_i64)(uint64_t arg)
> +{
> + return ctpop64(arg);
> +}
> +
> void HELPER(exit_atomic)(CPUArchState *env)
> {
> cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC());
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 9d6b00f..1a5ea23 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -64,6 +64,7 @@ typedef enum {
> #define TCG_TARGET_HAS_nor_i32 0
> #define TCG_TARGET_HAS_clz_i32 1
> #define TCG_TARGET_HAS_ctz_i32 1
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_deposit_i32 1
> #define TCG_TARGET_HAS_extract_i32 1
> #define TCG_TARGET_HAS_sextract_i32 1
> @@ -98,6 +99,7 @@ typedef enum {
> #define TCG_TARGET_HAS_nor_i64 0
> #define TCG_TARGET_HAS_clz_i64 1
> #define TCG_TARGET_HAS_ctz_i64 1
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_deposit_i64 1
> #define TCG_TARGET_HAS_extract_i64 1
> #define TCG_TARGET_HAS_sextract_i64 1
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 4cb94dc..09a19c6 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -112,6 +112,7 @@ extern bool use_idiv_instructions;
> #define TCG_TARGET_HAS_nor_i32 0
> #define TCG_TARGET_HAS_clz_i32 use_armv5t_instructions
> #define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
> #define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
> #define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index 8fff287..b8f73f5 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -95,6 +95,7 @@ extern bool have_bmi1;
> #define TCG_TARGET_HAS_nor_i32 0
> #define TCG_TARGET_HAS_clz_i32 1
> #define TCG_TARGET_HAS_ctz_i32 1
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_deposit_i32 1
> #define TCG_TARGET_HAS_extract_i32 1
> #define TCG_TARGET_HAS_sextract_i32 1
> @@ -129,6 +130,7 @@ extern bool have_bmi1;
> #define TCG_TARGET_HAS_nor_i64 0
> #define TCG_TARGET_HAS_clz_i64 1
> #define TCG_TARGET_HAS_ctz_i64 1
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_deposit_i64 1
> #define TCG_TARGET_HAS_extract_i64 1
> #define TCG_TARGET_HAS_sextract_i64 0
> diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
> index 9a829ae..42aea03 100644
> --- a/tcg/ia64/tcg-target.h
> +++ b/tcg/ia64/tcg-target.h
> @@ -144,6 +144,8 @@ typedef enum {
> #define TCG_TARGET_HAS_clz_i64 0
> #define TCG_TARGET_HAS_ctz_i32 0
> #define TCG_TARGET_HAS_ctz_i64 0
> +#define TCG_TARGET_HAS_ctpop_i32 0
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_nor_i64 1
> #define TCG_TARGET_HAS_orc_i32 1
> #define TCG_TARGET_HAS_orc_i64 1
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index 0526018..aa7c2b2 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -130,6 +130,7 @@ extern bool use_mips32r2_instructions;
> #define TCG_TARGET_HAS_rot_i32 use_mips32r2_instructions
> #define TCG_TARGET_HAS_clz_i32 use_mips32r2_instructions
> #define TCG_TARGET_HAS_ctz_i32 0
> +#define TCG_TARGET_HAS_ctpop_i32 0
>
> /* optional instructions automatically implemented */
> #define TCG_TARGET_HAS_neg_i32 0 /* sub rd, zero, rt */
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index e7ecce4..adfc56c 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -308,6 +308,12 @@ static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg
> x, TCGArg y)
> case INDEX_op_ctz_i64:
> return x ? ctz64(x) : y;
>
> + case INDEX_op_ctpop_i32:
> + return ctpop32(x);
> +
> + case INDEX_op_ctpop_i64:
> + return ctpop64(x);
> +
> CASE_OP_32_64(ext8s):
> return (int8_t)x;
>
> @@ -918,6 +924,13 @@ void tcg_optimize(TCGContext *s)
> mask = temps[args[2]].mask | 63;
> break;
>
> + case INDEX_op_ctpop_i32:
> + mask = 32 | 31;
> + break;
> + case INDEX_op_ctpop_i64:
> + mask = 64 | 63;
> + break;
> +
> CASE_OP_32_64(setcond):
> case INDEX_op_setcond2_i32:
> mask = 1;
> @@ -1031,6 +1044,7 @@ void tcg_optimize(TCGContext *s)
> CASE_OP_32_64(ext8u):
> CASE_OP_32_64(ext16s):
> CASE_OP_32_64(ext16u):
> + CASE_OP_32_64(ctpop):
> case INDEX_op_ext32s_i64:
> case INDEX_op_ext32u_i64:
> case INDEX_op_ext_i32_i64:
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index c798c9c..57e66cf 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -72,6 +72,7 @@ extern bool have_isa_3_00;
> #define TCG_TARGET_HAS_nor_i32 1
> #define TCG_TARGET_HAS_clz_i32 1
> #define TCG_TARGET_HAS_ctz_i32 have_isa_3_00
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_deposit_i32 1
> #define TCG_TARGET_HAS_extract_i32 1
> #define TCG_TARGET_HAS_sextract_i32 0
> @@ -107,6 +108,7 @@ extern bool have_isa_3_00;
> #define TCG_TARGET_HAS_nor_i64 1
> #define TCG_TARGET_HAS_clz_i64 1
> #define TCG_TARGET_HAS_ctz_i64 have_isa_3_00
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_deposit_i64 1
> #define TCG_TARGET_HAS_extract_i64 1
> #define TCG_TARGET_HAS_sextract_i64 0
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 22500ba..cbdd2a6 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -79,6 +79,7 @@ extern uint64_t s390_facilities;
> #define TCG_TARGET_HAS_nor_i32 0
> #define TCG_TARGET_HAS_clz_i32 0
> #define TCG_TARGET_HAS_ctz_i32 0
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_deposit_i32 (s390_facilities &
> FACILITY_GEN_INST_EXT)
> #define TCG_TARGET_HAS_extract_i32 (s390_facilities &
> FACILITY_GEN_INST_EXT)
> #define TCG_TARGET_HAS_sextract_i32 0
> @@ -112,6 +113,7 @@ extern uint64_t s390_facilities;
> #define TCG_TARGET_HAS_nor_i64 0
> #define TCG_TARGET_HAS_clz_i64 (s390_facilities & FACILITY_EXT_IMM)
> #define TCG_TARGET_HAS_ctz_i64 0
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_deposit_i64 (s390_facilities &
> FACILITY_GEN_INST_EXT)
> #define TCG_TARGET_HAS_extract_i64 (s390_facilities &
> FACILITY_GEN_INST_EXT)
> #define TCG_TARGET_HAS_sextract_i64 0
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index 340837a..b8b74f96f 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -112,6 +112,7 @@ extern bool use_vis3_instructions;
> #define TCG_TARGET_HAS_nor_i32 0
> #define TCG_TARGET_HAS_clz_i32 0
> #define TCG_TARGET_HAS_ctz_i32 0
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_deposit_i32 0
> #define TCG_TARGET_HAS_extract_i32 0
> #define TCG_TARGET_HAS_sextract_i32 0
> @@ -146,6 +147,7 @@ extern bool use_vis3_instructions;
> #define TCG_TARGET_HAS_nor_i64 0
> #define TCG_TARGET_HAS_clz_i64 0
> #define TCG_TARGET_HAS_ctz_i64 0
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_deposit_i64 0
> #define TCG_TARGET_HAS_extract_i64 0
> #define TCG_TARGET_HAS_sextract_i64 0
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 620e268..6f4b1b6 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -550,6 +550,21 @@ void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg)
> }
> }
>
> +void tcg_gen_ctpop_i32(TCGv_i32 ret, TCGv_i32 arg1)
> +{
> + if (TCG_TARGET_HAS_ctpop_i32) {
> + tcg_gen_op2_i32(INDEX_op_ctpop_i32, ret, arg1);
> + } else if (TCG_TARGET_HAS_ctpop_i64) {
> + TCGv_i64 t = tcg_temp_new_i64();
> + tcg_gen_extu_i32_i64(t, arg1);
> + tcg_gen_ctpop_i64(t, t);
> + tcg_gen_extrl_i64_i32(ret, t);
> + tcg_temp_free_i64(t);
> + } else {
> + gen_helper_ctpop_i32(ret, arg1);
> + }
> +}
> +
> void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
> {
> if (TCG_TARGET_HAS_rot_i32) {
> @@ -1874,6 +1889,20 @@ void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg)
> }
> }
>
> +void tcg_gen_ctpop_i64(TCGv_i64 ret, TCGv_i64 arg1)
> +{
> + if (TCG_TARGET_HAS_ctpop_i64) {
> + tcg_gen_op2_i64(INDEX_op_ctpop_i64, ret, arg1);
> + } else if (TCG_TARGET_REG_BITS == 32 && TCG_TARGET_HAS_ctpop_i32) {
> + tcg_gen_ctpop_i32(TCGV_HIGH(ret), TCGV_HIGH(arg1));
> + tcg_gen_ctpop_i32(TCGV_LOW(ret), TCGV_LOW(arg1));
> + tcg_gen_add_i32(TCGV_LOW(ret), TCGV_LOW(ret), TCGV_HIGH(ret));
> + tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
> + } else {
> + gen_helper_ctpop_i64(ret, arg1);
> + }
> +}
> +
> void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
> {
> if (TCG_TARGET_HAS_rot_i64) {
> diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
> index c2f3db9..c68e300 100644
> --- a/tcg/tcg-op.h
> +++ b/tcg/tcg-op.h
> @@ -291,6 +291,7 @@ void tcg_gen_ctz_i32(TCGv_i32 ret, TCGv_i32 arg1,
> TCGv_i32 arg2);
> void tcg_gen_clzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
> void tcg_gen_ctzi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2);
> void tcg_gen_clrsb_i32(TCGv_i32 ret, TCGv_i32 arg);
> +void tcg_gen_ctpop_i32(TCGv_i32 a1, TCGv_i32 a2);
> void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
> void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
> void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
> @@ -479,6 +480,7 @@ void tcg_gen_ctz_i64(TCGv_i64 ret, TCGv_i64 arg1,
> TCGv_i64 arg2);
> void tcg_gen_clzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
> void tcg_gen_ctzi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2);
> void tcg_gen_clrsb_i64(TCGv_i64 ret, TCGv_i64 arg);
> +void tcg_gen_ctpop_i64(TCGv_i64 a1, TCGv_i64 a2);
> void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
> void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
> void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
> @@ -973,6 +975,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv,
> TCGv_i64, TCGArg, TCGMemOp);
> #define tcg_gen_clzi_tl tcg_gen_clzi_i64
> #define tcg_gen_ctzi_tl tcg_gen_ctzi_i64
> #define tcg_gen_clrsb_tl tcg_gen_clrsb_i64
> +#define tcg_gen_ctpop_tl tcg_gen_ctpop_i64
> #define tcg_gen_rotl_tl tcg_gen_rotl_i64
> #define tcg_gen_rotli_tl tcg_gen_rotli_i64
> #define tcg_gen_rotr_tl tcg_gen_rotr_i64
> @@ -1069,6 +1072,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv,
> TCGv_i64, TCGArg, TCGMemOp);
> #define tcg_gen_clzi_tl tcg_gen_clzi_i32
> #define tcg_gen_ctzi_tl tcg_gen_ctzi_i32
> #define tcg_gen_clrsb_tl tcg_gen_clrsb_i32
> +#define tcg_gen_ctpop_tl tcg_gen_ctpop_i32
> #define tcg_gen_rotl_tl tcg_gen_rotl_i32
> #define tcg_gen_rotli_tl tcg_gen_rotli_i32
> #define tcg_gen_rotr_tl tcg_gen_rotr_i32
> diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
> index d00db4f..f06f894 100644
> --- a/tcg/tcg-opc.h
> +++ b/tcg/tcg-opc.h
> @@ -106,6 +106,7 @@ DEF(nand_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nand_i32))
> DEF(nor_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_nor_i32))
> DEF(clz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_clz_i32))
> DEF(ctz_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_ctz_i32))
> +DEF(ctpop_i32, 1, 1, 0, IMPL(TCG_TARGET_HAS_ctpop_i32))
>
> DEF(mov_i64, 1, 1, 0, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
> DEF(movi_i64, 1, 0, 1, TCG_OPF_64BIT | TCG_OPF_NOT_PRESENT)
> @@ -175,6 +176,7 @@ DEF(nand_i64, 1, 2, 0, IMPL64 |
> IMPL(TCG_TARGET_HAS_nand_i64))
> DEF(nor_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_nor_i64))
> DEF(clz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_clz_i64))
> DEF(ctz_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctz_i64))
> +DEF(ctpop_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ctpop_i64))
>
> DEF(add2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_add2_i64))
> DEF(sub2_i64, 2, 4, 0, IMPL64 | IMPL(TCG_TARGET_HAS_sub2_i64))
> diff --git a/tcg/tcg-runtime.h b/tcg/tcg-runtime.h
> index 0d30f1a..114ea6f 100644
> --- a/tcg/tcg-runtime.h
> +++ b/tcg/tcg-runtime.h
> @@ -21,6 +21,8 @@ DEF_HELPER_FLAGS_2(clz_i64, TCG_CALL_NO_RWG_SE, i64, i64,
> i64)
> DEF_HELPER_FLAGS_2(ctz_i64, TCG_CALL_NO_RWG_SE, i64, i64, i64)
> DEF_HELPER_FLAGS_1(clrsb_i32, TCG_CALL_NO_RWG_SE, i32, i32)
> DEF_HELPER_FLAGS_1(clrsb_i64, TCG_CALL_NO_RWG_SE, i64, i64)
> +DEF_HELPER_FLAGS_1(ctpop_i32, TCG_CALL_NO_RWG_SE, i32, i32)
> +DEF_HELPER_FLAGS_1(ctpop_i64, TCG_CALL_NO_RWG_SE, i64, i64)
>
> DEF_HELPER_FLAGS_1(exit_atomic, TCG_CALL_NO_WG, noreturn, env)
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index e026282..631c6f6 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -113,6 +113,7 @@ typedef uint64_t TCGRegSet;
> #define TCG_TARGET_HAS_nor_i64 0
> #define TCG_TARGET_HAS_clz_i64 0
> #define TCG_TARGET_HAS_ctz_i64 0
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_deposit_i64 0
> #define TCG_TARGET_HAS_extract_i64 0
> #define TCG_TARGET_HAS_sextract_i64 0
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index 0646444..838bf3a 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -76,6 +76,7 @@
> #define TCG_TARGET_HAS_nor_i32 0
> #define TCG_TARGET_HAS_clz_i32 0
> #define TCG_TARGET_HAS_ctz_i32 0
> +#define TCG_TARGET_HAS_ctpop_i32 0
> #define TCG_TARGET_HAS_neg_i32 1
> #define TCG_TARGET_HAS_not_i32 1
> #define TCG_TARGET_HAS_orc_i32 0
> @@ -108,6 +109,7 @@
> #define TCG_TARGET_HAS_nor_i64 0
> #define TCG_TARGET_HAS_clz_i64 0
> #define TCG_TARGET_HAS_ctz_i64 0
> +#define TCG_TARGET_HAS_ctpop_i64 0
> #define TCG_TARGET_HAS_neg_i64 1
> #define TCG_TARGET_HAS_not_i64 1
> #define TCG_TARGET_HAS_orc_i64 0
--
Alex Bennée
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [Qemu-devel] [PATCH v4 54/64] tcg: Add opcode for ctpop,
Alex Bennée <=