qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [4508] converted MUL/IMUL to TCG


From: Fabrice Bellard
Subject: [Qemu-devel] [4508] converted MUL/IMUL to TCG
Date: Wed, 21 May 2008 10:12:54 +0000

Revision: 4508
          http://svn.sv.gnu.org/viewvc/?view=rev&root=qemu&revision=4508
Author:   bellard
Date:     2008-05-21 10:12:54 +0000 (Wed, 21 May 2008)

Log Message:
-----------
converted MUL/IMUL to TCG

Modified Paths:
--------------
    trunk/target-i386/helper.c
    trunk/target-i386/op.c
    trunk/target-i386/translate.c

Modified: trunk/target-i386/helper.c
===================================================================
--- trunk/target-i386/helper.c  2008-05-21 02:04:15 UTC (rev 4507)
+++ trunk/target-i386/helper.c  2008-05-21 10:12:54 UTC (rev 4508)
@@ -1609,22 +1609,6 @@
 #endif /* !CONFIG_USER_ONLY */
 
 
-#ifdef BUGGY_GCC_DIV64
-/* gcc 2.95.4 on PowerPC does not seem to like using __udivdi3, so we
-   call it from another function */
-uint32_t div32(uint64_t *q_ptr, uint64_t num, uint32_t den)
-{
-    *q_ptr = num / den;
-    return num % den;
-}
-
-int32_t idiv32(int64_t *q_ptr, int64_t num, int32_t den)
-{
-    *q_ptr = num / den;
-    return num % den;
-}
-#endif
-
 /* division, flags are undefined */
 
 void helper_divb_AL(target_ulong t0)
@@ -1707,12 +1691,8 @@
     if (den == 0) {
         raise_exception(EXCP00_DIVZ);
     }
-#ifdef BUGGY_GCC_DIV64
-    r = div32(&q, num, den);
-#else
     q = (num / den);
     r = (num % den);
-#endif
     if (q > 0xffffffff)
         raise_exception(EXCP00_DIVZ);
     EAX = (uint32_t)q;
@@ -1729,12 +1709,8 @@
     if (den == 0) {
         raise_exception(EXCP00_DIVZ);
     }
-#ifdef BUGGY_GCC_DIV64
-    r = idiv32(&q, num, den);
-#else
     q = (num / den);
     r = (num % den);
-#endif
     if (q != (int32_t)q)
         raise_exception(EXCP00_DIVZ);
     EAX = (uint32_t)q;

Modified: trunk/target-i386/op.c
===================================================================
--- trunk/target-i386/op.c      2008-05-21 02:04:15 UTC (rev 4507)
+++ trunk/target-i386/op.c      2008-05-21 10:12:54 UTC (rev 4508)
@@ -123,104 +123,6 @@
 
 #endif
 
-/* multiply/divide */
-
-/* XXX: add eflags optimizations */
-/* XXX: add non P4 style flags */
-
-void OPPROTO op_mulb_AL_T0(void)
-{
-    unsigned int res;
-    res = (uint8_t)EAX * (uint8_t)T0;
-    EAX = (EAX & ~0xffff) | res;
-    CC_DST = res;
-    CC_SRC = (res & 0xff00);
-}
-
-void OPPROTO op_imulb_AL_T0(void)
-{
-    int res;
-    res = (int8_t)EAX * (int8_t)T0;
-    EAX = (EAX & ~0xffff) | (res & 0xffff);
-    CC_DST = res;
-    CC_SRC = (res != (int8_t)res);
-}
-
-void OPPROTO op_mulw_AX_T0(void)
-{
-    unsigned int res;
-    res = (uint16_t)EAX * (uint16_t)T0;
-    EAX = (EAX & ~0xffff) | (res & 0xffff);
-    EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
-    CC_DST = res;
-    CC_SRC = res >> 16;
-}
-
-void OPPROTO op_imulw_AX_T0(void)
-{
-    int res;
-    res = (int16_t)EAX * (int16_t)T0;
-    EAX = (EAX & ~0xffff) | (res & 0xffff);
-    EDX = (EDX & ~0xffff) | ((res >> 16) & 0xffff);
-    CC_DST = res;
-    CC_SRC = (res != (int16_t)res);
-}
-
-void OPPROTO op_mull_EAX_T0(void)
-{
-    uint64_t res;
-    res = (uint64_t)((uint32_t)EAX) * (uint64_t)((uint32_t)T0);
-    EAX = (uint32_t)res;
-    EDX = (uint32_t)(res >> 32);
-    CC_DST = (uint32_t)res;
-    CC_SRC = (uint32_t)(res >> 32);
-}
-
-void OPPROTO op_imull_EAX_T0(void)
-{
-    int64_t res;
-    res = (int64_t)((int32_t)EAX) * (int64_t)((int32_t)T0);
-    EAX = (uint32_t)(res);
-    EDX = (uint32_t)(res >> 32);
-    CC_DST = res;
-    CC_SRC = (res != (int32_t)res);
-}
-
-void OPPROTO op_imulw_T0_T1(void)
-{
-    int res;
-    res = (int16_t)T0 * (int16_t)T1;
-    T0 = res;
-    CC_DST = res;
-    CC_SRC = (res != (int16_t)res);
-}
-
-void OPPROTO op_imull_T0_T1(void)
-{
-    int64_t res;
-    res = (int64_t)((int32_t)T0) * (int64_t)((int32_t)T1);
-    T0 = res;
-    CC_DST = res;
-    CC_SRC = (res != (int32_t)res);
-}
-
-#ifdef TARGET_X86_64
-void OPPROTO op_mulq_EAX_T0(void)
-{
-    helper_mulq_EAX_T0(T0);
-}
-
-void OPPROTO op_imulq_EAX_T0(void)
-{
-    helper_imulq_EAX_T0(T0);
-}
-
-void OPPROTO op_imulq_T0_T1(void)
-{
-    T0 = helper_imulq_T0_T1(T0, T1);
-}
-#endif
-
 /* constant load & misc op */
 
 /* XXX: consistent names */

Modified: trunk/target-i386/translate.c
===================================================================
--- trunk/target-i386/translate.c       2008-05-21 02:04:15 UTC (rev 4507)
+++ trunk/target-i386/translate.c       2008-05-21 10:12:54 UTC (rev 4508)
@@ -3799,21 +3799,64 @@
         case 4: /* mul */
             switch(ot) {
             case OT_BYTE:
-                gen_op_mulb_AL_T0();
+                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+                tcg_gen_ext8u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext8u_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_andi_tl(cpu_cc_src, cpu_T[0], 0xff00);
                 s->cc_op = CC_OP_MULB;
                 break;
             case OT_WORD:
-                gen_op_mulw_AX_T0();
+                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+                tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext16u_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+                gen_op_mov_reg_T0(OT_WORD, R_EDX);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
                 s->cc_op = CC_OP_MULW;
                 break;
             default:
             case OT_LONG:
-                gen_op_mull_EAX_T0();
+#ifdef TARGET_X86_64
+                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                tcg_gen_ext32u_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32u_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+                gen_op_mov_reg_T0(OT_LONG, R_EDX);
+                tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+#else
+                {
+                    TCGv t0, t1;
+                    t0 = tcg_temp_new(TCG_TYPE_I64);
+                    t1 = tcg_temp_new(TCG_TYPE_I64);
+                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                    tcg_gen_extu_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_extu_i32_i64(t1, cpu_T[1]);
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
+                    tcg_gen_mov_tl(cpu_cc_src, cpu_T[0]);
+                }
+#endif
                 s->cc_op = CC_OP_MULL;
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_op_mulq_EAX_T0();
+                tcg_gen_helper_0_1(helper_mulq_EAX_T0, cpu_T[0]);
                 s->cc_op = CC_OP_MULQ;
                 break;
 #endif
@@ -3822,21 +3865,68 @@
         case 5: /* imul */
             switch(ot) {
             case OT_BYTE:
-                gen_op_imulb_AL_T0();
+                gen_op_mov_TN_reg(OT_BYTE, 1, R_EAX);
+                tcg_gen_ext8s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext8s_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext8s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
                 s->cc_op = CC_OP_MULB;
                 break;
             case OT_WORD:
-                gen_op_imulw_AX_T0();
+                gen_op_mov_TN_reg(OT_WORD, 1, R_EAX);
+                tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+                /* XXX: use 32 bit mul which could be faster */
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_WORD, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 16);
+                gen_op_mov_reg_T0(OT_WORD, R_EDX);
                 s->cc_op = CC_OP_MULW;
                 break;
             default:
             case OT_LONG:
-                gen_op_imull_EAX_T0();
+#ifdef TARGET_X86_64
+                gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                tcg_gen_shri_tl(cpu_T[0], cpu_T[0], 32);
+                gen_op_mov_reg_T0(OT_LONG, R_EDX);
+#else
+                {
+                    TCGv t0, t1;
+                    t0 = tcg_temp_new(TCG_TYPE_I64);
+                    t1 = tcg_temp_new(TCG_TYPE_I64);
+                    gen_op_mov_TN_reg(OT_LONG, 1, R_EAX);
+                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EAX);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    gen_op_mov_reg_T0(OT_LONG, R_EDX);
+                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+                }
+#endif
                 s->cc_op = CC_OP_MULL;
                 break;
 #ifdef TARGET_X86_64
             case OT_QUAD:
-                gen_op_imulq_EAX_T0();
+                tcg_gen_helper_0_1(helper_imulq_EAX_T0, cpu_T[0]);
                 s->cc_op = CC_OP_MULQ;
                 break;
 #endif
@@ -4104,13 +4194,41 @@
 
 #ifdef TARGET_X86_64
         if (ot == OT_QUAD) {
-            gen_op_imulq_T0_T1();
+            tcg_gen_helper_1_2(helper_imulq_T0_T1, cpu_T[0], cpu_T[0], 
cpu_T[1]);
         } else
 #endif
         if (ot == OT_LONG) {
-            gen_op_imull_T0_T1();
+#ifdef TARGET_X86_64
+                tcg_gen_ext32s_tl(cpu_T[0], cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_T[1], cpu_T[1]);
+                tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+                tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                tcg_gen_ext32s_tl(cpu_tmp0, cpu_T[0]);
+                tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
+#else
+                {
+                    TCGv t0, t1;
+                    t0 = tcg_temp_new(TCG_TYPE_I64);
+                    t1 = tcg_temp_new(TCG_TYPE_I64);
+                    tcg_gen_ext_i32_i64(t0, cpu_T[0]);
+                    tcg_gen_ext_i32_i64(t1, cpu_T[1]);
+                    tcg_gen_mul_i64(t0, t0, t1);
+                    tcg_gen_trunc_i64_i32(cpu_T[0], t0);
+                    tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+                    tcg_gen_sari_tl(cpu_tmp0, cpu_T[0], 31);
+                    tcg_gen_shri_i64(t0, t0, 32);
+                    tcg_gen_trunc_i64_i32(cpu_T[1], t0);
+                    tcg_gen_sub_tl(cpu_cc_src, cpu_T[1], cpu_tmp0);
+                }
+#endif
         } else {
-            gen_op_imulw_T0_T1();
+            tcg_gen_ext16s_tl(cpu_T[0], cpu_T[0]);
+            tcg_gen_ext16s_tl(cpu_T[1], cpu_T[1]);
+            /* XXX: use 32 bit mul which could be faster */
+            tcg_gen_mul_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
+            tcg_gen_mov_tl(cpu_cc_dst, cpu_T[0]);
+            tcg_gen_ext16s_tl(cpu_tmp0, cpu_T[0]);
+            tcg_gen_sub_tl(cpu_cc_src, cpu_T[0], cpu_tmp0);
         }
         gen_op_mov_reg_T0(ot, reg);
         s->cc_op = CC_OP_MULB + ot;






reply via email to

[Prev in Thread] Current Thread [Next in Thread]