[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 11/38] target-i386: Use mulu2 and muls2
From: |
Blue Swirl |
Subject: |
Re: [Qemu-devel] [PATCH 11/38] target-i386: Use mulu2 and muls2 |
Date: |
Sat, 23 Feb 2013 16:39:41 +0000 |
Applying: target-i386: Use mulu2 and muls2
error: patch failed: target-i386/helper.h:14
error: target-i386/helper.h: patch does not apply
error: patch failed: target-i386/int_helper.c:374
error: target-i386/int_helper.c: patch does not apply
error: patch failed: target-i386/translate.c:4111
error: target-i386/translate.c: patch does not apply
Patch failed at 0011 target-i386: Use mulu2 and muls2
On Wed, Feb 20, 2013 at 7:51 AM, Richard Henderson <address@hidden> wrote:
> These correspond very closely to the insns that we're emulating.
>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> target-i386/helper.h | 4 --
> target-i386/int_helper.c | 40 ------------
> target-i386/translate.c | 167
> ++++++++++++++++-------------------------------
> 3 files changed, 56 insertions(+), 155 deletions(-)
>
> diff --git a/target-i386/helper.h b/target-i386/helper.h
> index 26a0cc8..d6974df 100644
> --- a/target-i386/helper.h
> +++ b/target-i386/helper.h
> @@ -14,12 +14,8 @@ DEF_HELPER_2(idivw_AX, void, env, tl)
> DEF_HELPER_2(divl_EAX, void, env, tl)
> DEF_HELPER_2(idivl_EAX, void, env, tl)
> #ifdef TARGET_X86_64
> -DEF_HELPER_2(mulq_EAX_T0, void, env, tl)
> -DEF_HELPER_2(imulq_EAX_T0, void, env, tl)
> -DEF_HELPER_3(imulq_T0_T1, tl, env, tl, tl)
> DEF_HELPER_2(divq_EAX, void, env, tl)
> DEF_HELPER_2(idivq_EAX, void, env, tl)
> -DEF_HELPER_FLAGS_2(umulh, TCG_CALL_NO_RWG_SE, tl, tl, tl)
> #endif
>
> DEF_HELPER_2(aam, void, env, int)
> diff --git a/target-i386/int_helper.c b/target-i386/int_helper.c
> index 3b56075..74c7c36 100644
> --- a/target-i386/int_helper.c
> +++ b/target-i386/int_helper.c
> @@ -374,46 +374,6 @@ static int idiv64(uint64_t *plow, uint64_t *phigh,
> int64_t b)
> return 0;
> }
>
> -void helper_mulq_EAX_T0(CPUX86State *env, target_ulong t0)
> -{
> - uint64_t r0, r1;
> -
> - mulu64(&r0, &r1, EAX, t0);
> - EAX = r0;
> - EDX = r1;
> - CC_DST = r0;
> - CC_SRC = r1;
> -}
> -
> -target_ulong helper_umulh(target_ulong t0, target_ulong t1)
> -{
> - uint64_t h, l;
> - mulu64(&l, &h, t0, t1);
> - return h;
> -}
> -
> -void helper_imulq_EAX_T0(CPUX86State *env, target_ulong t0)
> -{
> - uint64_t r0, r1;
> -
> - muls64(&r0, &r1, EAX, t0);
> - EAX = r0;
> - EDX = r1;
> - CC_DST = r0;
> - CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
> -}
> -
> -target_ulong helper_imulq_T0_T1(CPUX86State *env, target_ulong t0,
> - target_ulong t1)
> -{
> - uint64_t r0, r1;
> -
> - muls64(&r0, &r1, t0, t1);
> - CC_DST = r0;
> - CC_SRC = ((int64_t)r1 != ((int64_t)r0 >> 63));
> - return r0;
> -}
> -
> void helper_divq_EAX(CPUX86State *env, target_ulong t0)
> {
> uint64_t r0, r1;
> diff --git a/target-i386/translate.c b/target-i386/translate.c
> index 439d19e..1545e3f 100644
> --- a/target-i386/translate.c
> +++ b/target-i386/translate.c
> @@ -4111,31 +4111,18 @@ static void gen_sse(CPUX86State *env, DisasContext
> *s, int b,
> ot = s->dflag == 2 ? OT_QUAD : OT_LONG;
> gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0);
> switch (ot) {
> - TCGv_i64 t0, t1;
> default:
> - t0 = tcg_temp_new_i64();
> - t1 = tcg_temp_new_i64();
> -#ifdef TARGET_X86_64
> - tcg_gen_ext32u_i64(t0, cpu_T[0]);
> - tcg_gen_ext32u_i64(t1, cpu_regs[R_EDX]);
> -#else
> - tcg_gen_extu_i32_i64(t0, cpu_T[0]);
> - tcg_gen_extu_i32_i64(t0, cpu_regs[R_EDX]);
> -#endif
> - tcg_gen_mul_i64(t0, t0, t1);
> - tcg_gen_trunc_i64_tl(cpu_T[0], t0);
> - tcg_gen_shri_i64(t0, t0, 32);
> - tcg_gen_trunc_i64_tl(cpu_T[1], t0);
> - tcg_temp_free_i64(t0);
> - tcg_temp_free_i64(t1);
> - gen_op_mov_reg_T0(OT_LONG, s->vex_v);
> - gen_op_mov_reg_T1(OT_LONG, reg);
> + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EDX]);
> + tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
> + cpu_tmp2_i32, cpu_tmp3_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], cpu_tmp2_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp3_i32);
> break;
> #ifdef TARGET_X86_64
> case OT_QUAD:
> - tcg_gen_mov_tl(cpu_T[1], cpu_regs[R_EDX]);
> - tcg_gen_mul_tl(cpu_regs[s->vex_v], cpu_T[0], cpu_T[1]);
> - gen_helper_umulh(cpu_regs[reg], cpu_T[0], cpu_T[1]);
> + tcg_gen_mulu2_i64(cpu_regs[s->vex_v], cpu_regs[reg],
> + cpu_T[0], cpu_regs[R_EDX]);
> break;
> #endif
> }
> @@ -5034,39 +5021,22 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
> break;
> default:
> case OT_LONG:
> -#ifdef TARGET_X86_64
> - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
> - tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
> - tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
> - tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> - gen_op_mov_reg_T0(OT_LONG, R_EAX);
> - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
> - gen_op_mov_reg_T0(OT_LONG, R_EDX);
> - tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
> -#else
> - {
> - TCGv_i64 t0, t1;
> - t0 = tcg_temp_new_i64();
> - t1 = tcg_temp_new_i64();
> - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
> - tcg_gen_extu_i32_i64(t0, cpu_T[0]);
> - tcg_gen_extu_i32_i64(t1, cpu_T[1]);
> - tcg_gen_mul_i64(t0, t0, t1);
> - tcg_gen_trunc_i64_i32(cpu_T[0], t0);
> - gen_op_mov_reg_T0(OT_LONG, R_EAX);
> - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> - tcg_gen_shri_i64(t0, t0, 32);
> - tcg_gen_trunc_i64_i32(cpu_T[0], t0);
> - gen_op_mov_reg_T0(OT_LONG, R_EDX);
> - tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
> - }
> -#endif
> + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
> + tcg_gen_mulu2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
> + cpu_tmp2_i32, cpu_tmp3_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
> + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
> + tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
> set_cc_op(s, CC_OP_MULL);
> break;
> #ifdef TARGET_X86_64
> case OT_QUAD:
> - gen_helper_mulq_EAX_T0(cpu_env, cpu_T[0]);
> + tcg_gen_mulu2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
> + cpu_T[0], cpu_regs[R_EAX]);
> + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
> + tcg_gen_mov_tl(cpu_cc_src, cpu_regs[R_EDX]);
> set_cc_op(s, CC_OP_MULQ);
> break;
> #endif
> @@ -5102,41 +5072,25 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
> break;
> default:
> case OT_LONG:
> -#ifdef TARGET_X86_64
> - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
> - tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
> - tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
> - tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> - gen_op_mov_reg_T0(OT_LONG, R_EAX);
> - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> - tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
> - tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
> - tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
> - gen_op_mov_reg_T0(OT_LONG, R_EDX);
> -#else
> - {
> - TCGv_i64 t0, t1;
> - t0 = tcg_temp_new_i64();
> - t1 = tcg_temp_new_i64();
> - gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
> - tcg_gen_ext_i32_i64(t0, cpu_T[0]);
> - tcg_gen_ext_i32_i64(t1, cpu_T[1]);
> - tcg_gen_mul_i64(t0, t0, t1);
> - tcg_gen_trunc_i64_i32(cpu_T[0], t0);
> - gen_op_mov_reg_T0(OT_LONG, R_EAX);
> - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> - tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
> - tcg_gen_shri_i64(t0, t0, 32);
> - tcg_gen_trunc_i64_i32(cpu_T[0], t0);
> - gen_op_mov_reg_T0(OT_LONG, R_EDX);
> - tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
> - }
> -#endif
> + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_regs[R_EAX]);
> + tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
> + cpu_tmp2_i32, cpu_tmp3_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[R_EAX], cpu_tmp2_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[R_EDX], cpu_tmp3_i32);
> + tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
> + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
> + tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
> + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
> set_cc_op(s, CC_OP_MULL);
> break;
> #ifdef TARGET_X86_64
> case OT_QUAD:
> - gen_helper_imulq_EAX_T0(cpu_env, cpu_T[0]);
> + tcg_gen_muls2_i64(cpu_regs[R_EAX], cpu_regs[R_EDX],
> + cpu_T[0], cpu_regs[R_EAX]);
> + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[R_EAX]);
> + tcg_gen_sari_tl(cpu_cc_src, cpu_regs[R_EAX], 63);
> + tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_regs[R_EDX]);
> set_cc_op(s, CC_OP_MULQ);
> break;
> #endif
> @@ -5391,37 +5345,27 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
> } else {
> gen_op_mov_TN_reg(ot, 1, reg);
> }
> -
> -#ifdef TARGET_X86_64
> - if (ot == OT_QUAD) {
> - gen_helper_imulq_T0_T1(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]);
> - } else
> -#endif
> - if (ot == OT_LONG) {
> + switch (ot) {
> #ifdef TARGET_X86_64
> - tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
> - tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
> - tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
> - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> - tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
> - tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
> -#else
> - {
> - TCGv_i64 t0, t1;
> - t0 = tcg_temp_new_i64();
> - t1 = tcg_temp_new_i64();
> - tcg_gen_ext_i32_i64(t0, cpu_T[0]);
> - tcg_gen_ext_i32_i64(t1, cpu_T[1]);
> - tcg_gen_mul_i64(t0, t0, t1);
> - tcg_gen_trunc_i64_i32(cpu_T[0], t0);
> - tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> - tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
> - tcg_gen_shri_i64(t0, t0, 32);
> - tcg_gen_trunc_i64_i32(cpu_T[1], t0);
> - tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
> - }
> + case OT_QUAD:
> + tcg_gen_muls2_i64(cpu_regs[reg], cpu_T[1], cpu_T[0], cpu_T[1]);
> + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
> + tcg_gen_sari_tl(cpu_cc_src, cpu_cc_dst, 63);
> + tcg_gen_sub_tl(cpu_cc_src, cpu_cc_src, cpu_T[1]);
> + break;
> #endif
> - } else {
> + case OT_LONG:
> + tcg_gen_trunc_tl_i32(cpu_tmp2_i32, cpu_T[0]);
> + tcg_gen_trunc_tl_i32(cpu_tmp3_i32, cpu_T[1]);
> + tcg_gen_muls2_i32(cpu_tmp2_i32, cpu_tmp3_i32,
> + cpu_tmp2_i32, cpu_tmp3_i32);
> + tcg_gen_extu_i32_tl(cpu_regs[reg], cpu_tmp2_i32);
> + tcg_gen_sari_i32(cpu_tmp2_i32, cpu_tmp2_i32, 31);
> + tcg_gen_mov_tl(cpu_cc_dst, cpu_regs[reg]);
> + tcg_gen_sub_i32(cpu_tmp2_i32, cpu_tmp2_i32, cpu_tmp3_i32);
> + tcg_gen_extu_i32_tl(cpu_cc_src, cpu_tmp2_i32);
> + break;
> + default:
> tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
> tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
> /* XXX: use 32 bit mul which could be faster */
> @@ -5429,8 +5373,9 @@ static target_ulong disas_insn(CPUX86State *env,
> DisasContext *s,
> tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
> tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
> tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
> + gen_op_mov_reg_T0(ot, reg);
> + break;
> }
> - gen_op_mov_reg_T0(ot, reg);
> set_cc_op(s, CC_OP_MULB + ot);
> break;
> case 0x1c0:
> --
> 1.8.1.2
>
- [Qemu-devel] [PATCH 05/38] tcg: Add signed multiword multiplication operations, (continued)
- [Qemu-devel] [PATCH 05/38] tcg: Add signed multiword multiplication operations, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 07/38] tcg: Implement multiword multiply helpers, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 06/38] tcg: Implement a 64-bit to 32-bit extraction helper, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 08/38] tcg: Implement multiword addition helpers, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 12/38] target-i386: Use add2 to implement the ADX extension, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 10/38] tcg-arm: Implement muls2_i32, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 09/38] tcg-i386: Implement multiword arithmetic ops, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 14/38] tcg: Apply life analysis to 64-bit multiword arithmetic ops, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 16/38] target-s390x: Use mulu2 for mlgr insn, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 11/38] target-i386: Use mulu2 and muls2, Richard Henderson, 2013/02/20
- Re: [Qemu-devel] [PATCH 11/38] target-i386: Use mulu2 and muls2,
Blue Swirl <=
- [Qemu-devel] [PATCH 13/38] tcg: Implement muls2 with mulu2, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 18/38] target-arm: Use mul[us]2 and add2 in umlal et al, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 15/38] target-alpha: Use mulu2 for umulh insn, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 28/38] target-ppc: Compute addition carry with setcond, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 32/38] target-ppc: Compute mullwo without branches, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 34/38] target-sparc: Use mul*2 for multiply, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 33/38] target-sparc: Use official add2/sub2 interfaces for addx/subx, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 30/38] target-ppc: Implement neg in terms of subf, Richard Henderson, 2013/02/20
- [Qemu-devel] [PATCH 36/38] target-unicore32: Use mul*2 for do_mult, Richard Henderson, 2013/02/20