qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH for-4.1 v2 12/13] tcg/ppc: Update vector support to


From: Richard Henderson
Subject: [Qemu-devel] [PATCH for-4.1 v2 12/13] tcg/ppc: Update vector support to v2.07
Date: Sun, 17 Mar 2019 02:08:33 -0700

This includes single-word loads and stores, lots of double-word
arithmetic, and a few extra logical operations.

Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/ppc/tcg-target.h     |   3 +-
 tcg/ppc/tcg-target.inc.c | 155 +++++++++++++++++++++++++++++++++------
 2 files changed, 134 insertions(+), 24 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5797ad35d5..4bbb33df7e 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -60,6 +60,7 @@ typedef enum {
 
 extern bool have_isa_altivec;
 extern bool have_isa_2_06;
+extern bool have_isa_2_07_vsx;
 extern bool have_isa_3_00;
 
 /* optional instructions automatically implemented */
@@ -142,7 +143,7 @@ extern bool have_isa_3_00;
 #endif
 
 #define TCG_TARGET_HAS_andc_vec         1
-#define TCG_TARGET_HAS_orc_vec          0
+#define TCG_TARGET_HAS_orc_vec          have_isa_2_07_vsx
 #define TCG_TARGET_HAS_not_vec          1
 #define TCG_TARGET_HAS_neg_vec          0
 #define TCG_TARGET_HAS_shi_vec          0
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index e6f3dca394..b8df9b55cf 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -67,6 +67,7 @@ static tcg_insn_unit *tb_ret_addr;
 bool have_isa_altivec;
 bool have_isa_2_06;
 bool have_isa_2_06_vsx;
+bool have_isa_2_07_vsx;
 bool have_isa_3_00;
 
 #define HAVE_ISA_2_06  have_isa_2_06
@@ -473,10 +474,12 @@ static int tcg_target_const_match(tcg_target_long val, 
TCGType type,
 #define LVEWX      XO31(71)
 #define LXSDX      XO31(588)      /* v2.06 */
 #define LXVDSX     XO31(332)      /* v2.06 */
+#define LXSIWZX    XO31(12)       /* v2.07 */
 
 #define STVX       XO31(231)
 #define STVEWX     XO31(199)
 #define STXSDX     XO31(716)      /* v2.06 */
+#define STXSIWX    XO31(140)      /* v2.07 */
 
 #define VADDSBS    VX4(768)
 #define VADDUBS    VX4(512)
@@ -487,6 +490,7 @@ static int tcg_target_const_match(tcg_target_long val, 
TCGType type,
 #define VADDSWS    VX4(896)
 #define VADDUWS    VX4(640)
 #define VADDUWM    VX4(128)
+#define VADDUDM    VX4(192)       /* v2.07 */
 
 #define VSUBSBS    VX4(1792)
 #define VSUBUBS    VX4(1536)
@@ -497,47 +501,62 @@ static int tcg_target_const_match(tcg_target_long val, 
TCGType type,
 #define VSUBSWS    VX4(1920)
 #define VSUBUWS    VX4(1664)
 #define VSUBUWM    VX4(1152)
+#define VSUBUDM    VX4(1216)      /* v2.07 */
 
 #define VMAXSB     VX4(258)
 #define VMAXSH     VX4(322)
 #define VMAXSW     VX4(386)
+#define VMAXSD     VX4(450)       /* v2.07 */
 #define VMAXUB     VX4(2)
 #define VMAXUH     VX4(66)
 #define VMAXUW     VX4(130)
+#define VMAXUD     VX4(194)       /* v2.07 */
 #define VMINSB     VX4(770)
 #define VMINSH     VX4(834)
 #define VMINSW     VX4(898)
+#define VMINSD     VX4(962)       /* v2.07 */
 #define VMINUB     VX4(514)
 #define VMINUH     VX4(578)
 #define VMINUW     VX4(642)
+#define VMINUD     VX4(706)       /* v2.07 */
 
 #define VCMPEQUB   VX4(6)
 #define VCMPEQUH   VX4(70)
 #define VCMPEQUW   VX4(134)
+#define VCMPEQUD   VX4(199)       /* v2.07 */
 #define VCMPGTSB   VX4(774)
 #define VCMPGTSH   VX4(838)
 #define VCMPGTSW   VX4(902)
+#define VCMPGTSD   VX4(967)       /* v2.07 */
 #define VCMPGTUB   VX4(518)
 #define VCMPGTUH   VX4(582)
 #define VCMPGTUW   VX4(646)
+#define VCMPGTUD   VX4(711)       /* v2.07 */
 
 #define VSLB       VX4(260)
 #define VSLH       VX4(324)
 #define VSLW       VX4(388)
+#define VSLD       VX4(1476)      /* v2.07 */
 #define VSRB       VX4(516)
 #define VSRH       VX4(580)
 #define VSRW       VX4(644)
+#define VSRD       VX4(1732)      /* v2.07 */
 #define VSRAB      VX4(772)
 #define VSRAH      VX4(836)
 #define VSRAW      VX4(900)
+#define VSRAD      VX4(964)       /* v2.07 */
 #define VRLB       VX4(4)
 #define VRLH       VX4(68)
 #define VRLW       VX4(132)
+#define VRLD       VX4(196)       /* v2.07 */
 
 #define VMULEUB    VX4(520)
 #define VMULEUH    VX4(584)
+#define VMULEUW    VX4(648)       /* v2.07 */
 #define VMULOUB    VX4(8)
 #define VMULOUH    VX4(72)
+#define VMULOUW    VX4(136)       /* v2.07 */
+#define VMULUWM    VX4(137)       /* v2.07 */
 #define VMSUMUHM   VX4(38)
 
 #define VMRGHB     VX4(12)
@@ -555,6 +574,9 @@ static int tcg_target_const_match(tcg_target_long val, 
TCGType type,
 #define VNOR       VX4(1284)
 #define VOR        VX4(1156)
 #define VXOR       VX4(1220)
+#define VEQV       VX4(1668)      /* v2.07 */
+#define VNAND      VX4(1412)      /* v2.07 */
+#define VORC       VX4(1348)      /* v2.07 */
 
 #define VSPLTB     VX4(524)
 #define VSPLTH     VX4(588)
@@ -568,6 +590,11 @@ static int tcg_target_const_match(tcg_target_long val, 
TCGType type,
 
 #define XXPERMDI   (OPCD(60) | (10 << 3))   /* 2.06 */
 
+#define MFVSRD     XO31(51)       /* v2.07 */
+#define MFVSRWZ    XO31(115)      /* v2.07 */
+#define MTVSRD     XO31(179)      /* v2.07 */
+#define MTVSRWZ    XO31(179)      /* v2.07 */
+
 #define RT(r) ((r)<<21)
 #define RS(r) ((r)<<21)
 #define RA(r) ((r)<<16)
@@ -691,7 +718,15 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, 
TCGReg ret, TCGReg arg)
         if (ret < 32 && arg < 32) {
             tcg_out32(s, OR | SAB(arg, ret, arg));
             break;
-        } else if (ret < 32 || arg < 32) {
+        } else if (ret < 32 && have_isa_2_07_vsx) {
+            tcg_out32(s, (type == TCG_TYPE_I32 ? MFVSRWZ : MFVSRD)
+                      | VRT(arg) | RA(ret) | 1);
+            break;
+        } else if (arg < 32 && have_isa_2_07_vsx) {
+            tcg_out32(s, (type == TCG_TYPE_I32 ? MTVSRWZ : MTVSRD)
+                      | VRT(ret) | RA(arg) | 1);
+            break;
+        } else {
             /* Altivec does not support vector/integer moves.  */
             return false;
         }
@@ -1113,6 +1148,10 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg ret,
             tcg_out_mem_long(s, LWZ, LWZX, ret, base, offset);
             break;
         }
+        if (have_isa_2_07_vsx) {
+            tcg_out_mem_long(s, 0, LXSIWZX | 1, ret & 31, base, offset);
+            break;
+        }
         assert((offset & 3) == 0);
         tcg_out_mem_long(s, 0, LVEWX, ret & 31, base, offset);
         shift = (offset - 4) & 0xc;
@@ -1159,6 +1198,10 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
             tcg_out_mem_long(s, STW, STWX, arg, base, offset);
             break;
         }
+        if (have_isa_2_07_vsx) {
+            tcg_out_mem_long(s, 0, STXSIWX | 1, arg & 31, base, offset);
+            break;
+        }
         assert((offset & 3) == 0);
         shift = (offset - 4) & 0xc;
         if (shift) {
@@ -2891,26 +2934,39 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, 
unsigned vece)
     case INDEX_op_not_vec:
     case INDEX_op_dupm_vec:
         return 1;
+    case INDEX_op_orc_vec:
+        return have_isa_2_07_vsx;
     case INDEX_op_add_vec:
     case INDEX_op_sub_vec:
     case INDEX_op_smax_vec:
     case INDEX_op_smin_vec:
     case INDEX_op_umax_vec:
     case INDEX_op_umin_vec:
+    case INDEX_op_shlv_vec:
+    case INDEX_op_shrv_vec:
+    case INDEX_op_sarv_vec:
+        return vece <= MO_32 || have_isa_2_07_vsx;
     case INDEX_op_ssadd_vec:
     case INDEX_op_sssub_vec:
     case INDEX_op_usadd_vec:
     case INDEX_op_ussub_vec:
-    case INDEX_op_shlv_vec:
-    case INDEX_op_shrv_vec:
-    case INDEX_op_sarv_vec:
         return vece <= MO_32;
     case INDEX_op_cmp_vec:
-    case INDEX_op_mul_vec:
     case INDEX_op_shli_vec:
     case INDEX_op_shri_vec:
     case INDEX_op_sari_vec:
-        return vece <= MO_32 ? -1 : 0;
+        return vece <= MO_32 || have_isa_2_07_vsx ? -1 : 0;
+    case INDEX_op_mul_vec:
+        switch (vece) {
+        case MO_8:
+        case MO_16:
+            return -1;
+        case MO_32:
+            return have_isa_2_07_vsx ? 1 : -1;
+        case MO_64:
+            return have_isa_2_07_vsx ? -1 : 0;
+        }
+        return 0;
     default:
         return 0;
     }
@@ -2974,28 +3030,28 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
                            const TCGArg *args, const int *const_args)
 {
     static const uint32_t
-        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, 0 },
-        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, 0 },
-        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, 0 },
-        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, 0 },
-        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, 0 },
+        add_op[4] = { VADDUBM, VADDUHM, VADDUWM, VADDUDM },
+        sub_op[4] = { VSUBUBM, VSUBUHM, VSUBUWM, VSUBUDM },
+        eq_op[4]  = { VCMPEQUB, VCMPEQUH, VCMPEQUW, VCMPEQUD },
+        gts_op[4] = { VCMPGTSB, VCMPGTSH, VCMPGTSW, VCMPGTSD },
+        gtu_op[4] = { VCMPGTUB, VCMPGTUH, VCMPGTUW, VCMPGTUD },
         ssadd_op[4] = { VADDSBS, VADDSHS, VADDSWS, 0 },
         usadd_op[4] = { VADDUBS, VADDUHS, VADDUWS, 0 },
         sssub_op[4] = { VSUBSBS, VSUBSHS, VSUBSWS, 0 },
         ussub_op[4] = { VSUBUBS, VSUBUHS, VSUBUWS, 0 },
-        umin_op[4] = { VMINUB, VMINUH, VMINUW, 0 }, 
-        smin_op[4] = { VMINSB, VMINSH, VMINSW, 0 }, 
-        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, 0 }, 
-        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, 0 },
-        shlv_op[4] = { VSLB, VSLH, VSLW, 0 },
-        shrv_op[4] = { VSRB, VSRH, VSRW, 0 },
-        sarv_op[4] = { VSRAB, VSRAH, VSRAW, 0 },
+        umin_op[4] = { VMINUB, VMINUH, VMINUW, VMINUD }, 
+        smin_op[4] = { VMINSB, VMINSH, VMINSW, VMINSD }, 
+        umax_op[4] = { VMAXUB, VMAXUH, VMAXUW, VMAXUD }, 
+        smax_op[4] = { VMAXSB, VMAXSH, VMAXSW, VMAXSD },
+        shlv_op[4] = { VSLB, VSLH, VSLW, VSLD },
+        shrv_op[4] = { VSRB, VSRH, VSRW, VSRD },
+        sarv_op[4] = { VSRAB, VSRAH, VSRAW, VSRAD },
         mrgh_op[4] = { VMRGHB, VMRGHH, VMRGHW, 0 },
         mrgl_op[4] = { VMRGLB, VMRGLH, VMRGLW, 0 },
-        muleu_op[4] = { VMULEUB, VMULEUH, 0, 0 },
-        mulou_op[4] = { VMULOUB, VMULOUH, 0, 0 },
+        muleu_op[4] = { VMULEUB, VMULEUH, VMULEUW, 0 },
+        mulou_op[4] = { VMULOUB, VMULOUH, VMULOUW, 0 },
         pkum_op[4] = { VPKUHUM, VPKUWUM, 0, 0 },
-        rotl_op[4] = { VRLB, VRLH, VRLW, 0 };
+        rotl_op[4] = { VRLB, VRLH, VRLW, VRLD };
 
     TCGType type = vecl + TCG_TYPE_V64;
     TCGArg a0 = args[0], a1 = args[1], a2 = args[2];
@@ -3017,6 +3073,10 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
     case INDEX_op_sub_vec:
         insn = sub_op[vece];
         break;
+    case INDEX_op_mul_vec:
+        tcg_debug_assert(vece == MO_32 && have_isa_2_07_vsx);
+        insn = VMULUWM;
+        break;
     case INDEX_op_ssadd_vec:
         insn = ssadd_op[vece];
         break;
@@ -3066,8 +3126,28 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
         insn = VNOR;
         a2 = a1;
         break;
+    case INDEX_op_orc_vec:
+        insn = VORC;
+        break;
 
     case INDEX_op_dup_vec:
+        if (a1 < 32) {
+            bool ok;
+            switch (vece) {
+            case MO_64:
+                ok = tcg_out_mov(s, TCG_TYPE_I64, a0, a1);
+                break;
+            case MO_32:
+            case MO_16:
+            case MO_8:
+                ok = tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
+                break;
+            default:
+                g_assert_not_reached();
+            }
+            tcg_debug_assert(ok);
+            a1 = a0;
+        }
         /* Recall we use VSX integer loads, so the integer is right
            justified within the left (zero-index) double-word.  */
         switch (vece) {
@@ -3165,7 +3245,7 @@ static void expand_vec_cmp(TCGType type, unsigned vece, 
TCGv_vec v0,
 {
     bool need_swap = false, need_inv = false;
 
-    tcg_debug_assert(vece <= MO_32);
+    tcg_debug_assert(vece <= MO_32 || have_isa_2_07_vsx);
 
     switch (cond) {
     case TCG_COND_EQ:
@@ -3229,6 +3309,7 @@ static void expand_vec_mul(TCGType type, unsigned vece, 
TCGv_vec v0,
        break;
 
     case MO_32:
+        tcg_debug_assert(!have_isa_2_07_vsx);
         t3 = tcg_temp_new_vec(type);
         t4 = tcg_temp_new_vec(type);
         tcg_gen_dupi_vec(MO_8, t4, -16);
@@ -3246,6 +3327,27 @@ static void expand_vec_mul(TCGType type, unsigned vece, 
TCGv_vec v0,
         tcg_temp_free_vec(t4);
         break;
 
+    case MO_64:
+        tcg_debug_assert(have_isa_2_07_vsx);
+        t3 = tcg_temp_new_vec(type);
+        t4 = tcg_temp_new_vec(type);
+        tcg_gen_dupi_vec(MO_8, t4, 32);
+        vec_gen_3(INDEX_op_ppc_rotl_vec, type, MO_64, tcgv_vec_arg(t2),
+                  tcgv_vec_arg(v2), tcgv_vec_arg(t4));
+        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_32, tcgv_vec_arg(t3),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+        vec_gen_3(INDEX_op_ppc_muleu_vec, type, MO_32, tcgv_vec_arg(t2),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(t2));
+        vec_gen_3(INDEX_op_ppc_mulou_vec, type, MO_32, tcgv_vec_arg(t1),
+                  tcgv_vec_arg(v1), tcgv_vec_arg(v2));
+        tcg_gen_add_vec(MO_64, t2, t2, t3);
+        vec_gen_3(INDEX_op_shlv_vec, type, MO_32, tcgv_vec_arg(t2),
+                  tcgv_vec_arg(t2), tcgv_vec_arg(t4));
+        tcg_gen_add_vec(MO_64, v0, t1, t2);
+        tcg_temp_free_vec(t3);
+        tcg_temp_free_vec(t4);
+        break;
+
     default:
         g_assert_not_reached();
     }
@@ -3327,6 +3429,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
     static const TCGTargetOpDef sub2
         = { .args_ct_str = { "r", "r", "rI", "rZM", "r", "r" } };
     static const TCGTargetOpDef v_r = { .args_ct_str = { "v", "r" } };
+    static const TCGTargetOpDef v_vr = { .args_ct_str = { "v", "vr" } };
     static const TCGTargetOpDef v_v = { .args_ct_str = { "v", "v" } };
     static const TCGTargetOpDef v_v_v = { .args_ct_str = { "v", "v", "v" } };
     static const TCGTargetOpDef v_v_v_v
@@ -3494,8 +3597,9 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode 
op)
     case INDEX_op_ppc_rotl_vec:
         return &v_v_v;
     case INDEX_op_not_vec:
-    case INDEX_op_dup_vec:
         return &v_v;
+    case INDEX_op_dup_vec:
+        return have_isa_2_07_vsx ? &v_vr : &v_v;
     case INDEX_op_ld_vec:
     case INDEX_op_st_vec:
     case INDEX_op_dupm_vec:
@@ -3522,6 +3626,11 @@ static void tcg_target_init(TCGContext *s)
             have_isa_2_06_vsx = true;
         }
     }
+    if (hwcap2 & PPC_FEATURE2_ARCH_2_07) {
+        if (hwcap & PPC_FEATURE_HAS_VSX) {
+            have_isa_2_07_vsx = true;
+        }
+    }
 #ifdef PPC_FEATURE2_ARCH_3_00
     if (hwcap2 & PPC_FEATURE2_ARCH_3_00) {
         have_isa_3_00 = true;
-- 
2.17.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]