[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 07/11] target-mips: optimize gen_muldiv()
From: |
Aurelien Jarno |
Subject: |
[Qemu-devel] [PATCH 07/11] target-mips: optimize gen_muldiv() |
Date: |
Sat, 8 Nov 2008 09:37:16 +0100 |
User-agent: |
Mutt/1.5.18 (2008-05-17) |
Optimize code generation in gen_muldiv():
- Don't do sign extension when the value is already guaranteed to be
sign extended (otherwise, results are marked as UNPREDICTABLE).
- Access the LO, HI registers directly instead of writing them through
a temporary variable.
Signed-off-by: Aurelien Jarno <address@hidden>
---
target-mips/translate.c | 162 ++++++++++++++---------------------------------
1 files changed, 47 insertions(+), 115 deletions(-)
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f1cb7ca..544e5c8 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -604,27 +604,7 @@ static inline void gen_store_gpr (TCGv t, int reg)
tcg_gen_mov_tl(cpu_gpr[reg], t);
}
-/* Moves to/from HI and LO registers. */
-static inline void gen_load_HI (TCGv t, int reg)
-{
- tcg_gen_mov_tl(t, cpu_HI[reg]);
-}
-
-static inline void gen_store_HI (TCGv t, int reg)
-{
- tcg_gen_mov_tl(cpu_HI[reg], t);
-}
-
-static inline void gen_load_LO (TCGv t, int reg)
-{
- tcg_gen_mov_tl(t, cpu_LO[reg]);
-}
-
-static inline void gen_store_LO (TCGv t, int reg)
-{
- tcg_gen_mov_tl(cpu_LO[reg], t);
-}
-
+/* Moves to/from ACX register. */
static inline void gen_load_ACX (TCGv t, int reg)
{
tcg_gen_mov_tl(t, cpu_ACX[reg]);
@@ -1850,23 +1830,23 @@ static void gen_HILO (DisasContext *ctx, uint32_t opc,
int reg)
}
switch (opc) {
case OPC_MFHI:
- gen_load_HI(t0, 0);
+ tcg_gen_mov_tl(t0, cpu_HI[0]);
gen_store_gpr(t0, reg);
opn = "mfhi";
break;
case OPC_MFLO:
- gen_load_LO(t0, 0);
+ tcg_gen_mov_tl(t0, cpu_LO[0]);
gen_store_gpr(t0, reg);
opn = "mflo";
break;
case OPC_MTHI:
gen_load_gpr(t0, reg);
- gen_store_HI(t0, 0);
+ tcg_gen_mov_tl(cpu_HI[0], t0);
opn = "mthi";
break;
case OPC_MTLO:
gen_load_gpr(t0, reg);
- gen_store_LO(t0, 0);
+ tcg_gen_mov_tl(cpu_LO[0], t0);
opn = "mtlo";
break;
default:
@@ -1893,27 +1873,28 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
{
int l1 = gen_new_label();
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
{
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp3 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_ext_tl_i64(r_tmp1, t0);
- tcg_gen_ext_tl_i64(r_tmp2, t1);
- tcg_gen_div_i64(r_tmp3, r_tmp1, r_tmp2);
- tcg_gen_rem_i64(r_tmp2, r_tmp1, r_tmp2);
- tcg_gen_trunc_i64_tl(t0, r_tmp3);
- tcg_gen_trunc_i64_tl(t1, r_tmp2);
+ int l2 = gen_new_label();
+ TCGv r_tmp1 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp2 = tcg_temp_local_new(TCG_TYPE_I32);
+ TCGv r_tmp3 = tcg_temp_local_new(TCG_TYPE_I32);
+
+ tcg_gen_trunc_tl_i32(r_tmp1, t0);
+ tcg_gen_trunc_tl_i32(r_tmp2, t1);
+ tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp1, -1 << 31, l2);
+ tcg_gen_brcondi_i32(TCG_COND_NE, r_tmp2, -1, l2);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_movi_tl(cpu_HI[0], 0);
+ tcg_gen_br(l1);
+ gen_set_label(l2);
+ tcg_gen_div_i32(r_tmp3, r_tmp1, r_tmp2);
+ tcg_gen_rem_i32(r_tmp2, r_tmp1, r_tmp2);
+ tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+ tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp2);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
tcg_temp_free(r_tmp3);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
}
gen_set_label(l1);
}
@@ -1934,13 +1915,11 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_trunc_tl_i32(r_tmp2, t1);
tcg_gen_divu_i32(r_tmp3, r_tmp1, r_tmp2);
tcg_gen_remu_i32(r_tmp1, r_tmp1, r_tmp2);
- tcg_gen_ext_i32_tl(t0, r_tmp3);
- tcg_gen_ext_i32_tl(t1, r_tmp1);
+ tcg_gen_ext_i32_tl(cpu_LO[0], r_tmp3);
+ tcg_gen_ext_i32_tl(cpu_HI[0], r_tmp1);
tcg_temp_free(r_tmp1);
tcg_temp_free(r_tmp2);
tcg_temp_free(r_tmp3);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
}
gen_set_label(l1);
}
@@ -1951,8 +1930,6 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
@@ -1961,10 +1938,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "mult";
break;
@@ -1983,10 +1958,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "multu";
break;
@@ -2001,24 +1974,12 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_brcondi_tl(TCG_COND_NE, t0, -1LL << 63, l2);
tcg_gen_brcondi_tl(TCG_COND_NE, t1, -1LL, l2);
- {
- tcg_gen_movi_tl(t1, 0);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
- tcg_gen_br(l1);
- }
+ tcg_gen_mov_tl(cpu_LO[0], t0);
+ tcg_gen_movi_tl(cpu_HI[0], 0);
+ tcg_gen_br(l1);
gen_set_label(l2);
- {
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_div_i64(r_tmp1, t0, t1);
- tcg_gen_rem_i64(r_tmp2, t0, t1);
- gen_store_LO(r_tmp1, 0);
- gen_store_HI(r_tmp2, 0);
- tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- }
+ tcg_gen_div_i64(cpu_LO[0], t0, t1);
+ tcg_gen_rem_i64(cpu_HI[0], t0, t1);
}
gen_set_label(l1);
}
@@ -2029,17 +1990,8 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
int l1 = gen_new_label();
tcg_gen_brcondi_tl(TCG_COND_EQ, t1, 0, l1);
- {
- TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
- TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
-
- tcg_gen_divu_i64(r_tmp1, t0, t1);
- tcg_gen_remu_i64(r_tmp2, t0, t1);
- tcg_temp_free(r_tmp1);
- tcg_temp_free(r_tmp2);
- gen_store_LO(r_tmp1, 0);
- gen_store_HI(r_tmp2, 0);
- }
+ tcg_gen_divu_i64(cpu_LO[0], t0, t1);
+ tcg_gen_remu_i64(cpu_HI[0], t0, t1);
gen_set_label(l1);
}
opn = "ddivu";
@@ -2058,24 +2010,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_LO[1], t1);
}
opn = "madd";
break;
@@ -2089,19 +2035,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_extu_tl_i64(r_tmp1, t0);
tcg_gen_extu_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_add_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "maddu";
break;
@@ -2110,24 +2052,18 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
TCGv r_tmp1 = tcg_temp_new(TCG_TYPE_I64);
TCGv r_tmp2 = tcg_temp_new(TCG_TYPE_I64);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
tcg_gen_ext_tl_i64(r_tmp1, t0);
tcg_gen_ext_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "msub";
break;
@@ -2141,19 +2077,15 @@ static void gen_muldiv (DisasContext *ctx, uint32_t opc,
tcg_gen_extu_tl_i64(r_tmp1, t0);
tcg_gen_extu_tl_i64(r_tmp2, t1);
tcg_gen_mul_i64(r_tmp1, r_tmp1, r_tmp2);
- gen_load_LO(t0, 0);
- gen_load_HI(t1, 0);
- tcg_gen_concat_tl_i64(r_tmp2, t0, t1);
+ tcg_gen_concat_tl_i64(r_tmp2, cpu_LO[0], cpu_HI[0]);
tcg_gen_sub_i64(r_tmp1, r_tmp1, r_tmp2);
tcg_temp_free(r_tmp2);
tcg_gen_trunc_i64_tl(t0, r_tmp1);
tcg_gen_shri_i64(r_tmp1, r_tmp1, 32);
tcg_gen_trunc_i64_tl(t1, r_tmp1);
tcg_temp_free(r_tmp1);
- tcg_gen_ext32s_tl(t0, t0);
- tcg_gen_ext32s_tl(t1, t1);
- gen_store_LO(t0, 0);
- gen_store_HI(t1, 0);
+ tcg_gen_ext32s_tl(cpu_LO[0], t0);
+ tcg_gen_ext32s_tl(cpu_HI[0], t1);
}
opn = "msubu";
break;
--
1.5.6.5
--
.''`. Aurelien Jarno | GPG: 1024D/F1BCDB73
: :' : Debian developer | Electrical Engineer
`. `' address@hidden | address@hidden
`- people.debian.org/~aurel32 | www.aurel32.net
- [Qemu-devel] [PATCH 0/11] target-mips: optimizations, Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 01/11] target-mips: optimize gen_save_pc(), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 02/11] target-mips: optimize gen_op_addr_add() (1/2), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 03/11] target-mips: optimize gen_op_addr_add() (2/2), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 04/11] target-mips: convert bitfield ops to TCG, Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 05/11] target-mips: convert bit shuffle ops to TCG, Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 06/11] target-mips: optimize gen_arith()/gen_arith_imm(), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 07/11] target-mips: optimize gen_muldiv(),
Aurelien Jarno <=
- [Qemu-devel] [PATCH 08/11] target-mips: optimize gen_farith(), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 09/11] target-mips: optimize movc*(), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 10/11] target-mips: gen_compute_branch1(), Aurelien Jarno, 2008/11/08
- [Qemu-devel] [PATCH 11/11] target-mips: fix temporary variable freeing in op_ldst_##insn(), Aurelien Jarno, 2008/11/08