[PATCH v2 33/42] i386: Implement VMASKMOV

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 33/42] i386: Implement VMASKMOV

From:	Paul Brook
Subject:	[PATCH v2 33/42] i386: Implement VMASKMOV
Date:	Sun, 24 Apr 2022 23:01:55 +0100

Decoding these is a bit messy, but at least the integer and float variants
have the same semantics once decoded.

We don't try and be clever with the load forms, instead load the whole
vector then mask out the elements we want.

Signed-off-by: Paul Brook <paul@nowt.org>
---
 target/i386/ops_sse.h        | 48 ++++++++++++++++++++++++++++++++++++
 target/i386/ops_sse_header.h |  4 +++
 target/i386/tcg/translate.c  | 34 +++++++++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index edf14a25d7..ffcba3d02c 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -3240,6 +3240,54 @@ void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg 
*d, Reg *s)
     CC_SRC = ((zf >> 63) ? 0 : CC_Z) | ((cf >> 63) ? 0 : CC_C);
 }
 
+void glue(helper_vpmaskmovd_st, SUFFIX)(CPUX86State *env,
+                                        Reg *s, Reg *v, target_ulong a0)
+{
+    int i;
+
+    for (i = 0; i < (2 << SHIFT); i++) {
+        if (v->L(i) >> 31) {
+            cpu_stl_data_ra(env, a0 + i * 4, s->L(i), GETPC());
+        }
+    }
+}
+
+void glue(helper_vpmaskmovq_st, SUFFIX)(CPUX86State *env,
+                                        Reg *s, Reg *v, target_ulong a0)
+{
+    int i;
+
+    for (i = 0; i < (1 << SHIFT); i++) {
+        if (v->Q(i) >> 63) {
+            cpu_stq_data_ra(env, a0 + i * 8, s->Q(i), GETPC());
+        }
+    }
+}
+
+void glue(helper_vpmaskmovd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
+{
+    d->L(0) = (v->L(0) >> 31) ? s->L(0) : 0;
+    d->L(1) = (v->L(1) >> 31) ? s->L(1) : 0;
+    d->L(2) = (v->L(2) >> 31) ? s->L(2) : 0;
+    d->L(3) = (v->L(3) >> 31) ? s->L(3) : 0;
+#if SHIFT == 2
+    d->L(4) = (v->L(4) >> 31) ? s->L(4) : 0;
+    d->L(5) = (v->L(5) >> 31) ? s->L(5) : 0;
+    d->L(6) = (v->L(6) >> 31) ? s->L(6) : 0;
+    d->L(7) = (v->L(7) >> 31) ? s->L(7) : 0;
+#endif
+}
+
+void glue(helper_vpmaskmovq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s)
+{
+    d->Q(0) = (v->Q(0) >> 63) ? s->Q(0) : 0;
+    d->Q(1) = (v->Q(1) >> 63) ? s->Q(1) : 0;
+#if SHIFT == 2
+    d->Q(2) = (v->Q(2) >> 63) ? s->Q(2) : 0;
+    d->Q(3) = (v->Q(3) >> 63) ? s->Q(3) : 0;
+#endif
+}
+
 #if SHIFT == 2
 void glue(helper_vbroadcastdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
 {
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 8b93b8e6d6..a7a6bf6b10 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -429,6 +429,10 @@ DEF_HELPER_4(glue(vpsravq, SUFFIX), void, env, Reg, Reg, 
Reg)
 DEF_HELPER_4(glue(vpsllvq, SUFFIX), void, env, Reg, Reg, Reg)
 DEF_HELPER_3(glue(vtestps, SUFFIX), void, env, Reg, Reg)
 DEF_HELPER_3(glue(vtestpd, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(vpmaskmovd_st, SUFFIX), void, env, Reg, Reg, tl)
+DEF_HELPER_4(glue(vpmaskmovq_st, SUFFIX), void, env, Reg, Reg, tl)
+DEF_HELPER_4(glue(vpmaskmovd, SUFFIX), void, env, Reg, Reg, Reg)
+DEF_HELPER_4(glue(vpmaskmovq, SUFFIX), void, env, Reg, Reg, Reg)
 #if SHIFT == 2
 DEF_HELPER_3(glue(vbroadcastdq, SUFFIX), void, env, Reg, Reg)
 DEF_HELPER_1(vzeroall, void, env)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 2fbb7bfcad..e00195d301 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3277,6 +3277,10 @@ static const struct SSEOpHelper_table6 
sse_op_table6[256] = {
     [0x29] = BINARY_OP(pcmpeqq, SSE41, SSE_OPF_MMX),
     [0x2a] = SPECIAL_OP(SSE41), /* movntqda */
     [0x2b] = BINARY_OP(packusdw, SSE41, SSE_OPF_MMX),
+    [0x2c] = BINARY_OP(vpmaskmovd, AVX, 0), /* vmaskmovps */
+    [0x2d] = BINARY_OP(vpmaskmovq, AVX, 0), /* vmaskmovpd */
+    [0x2e] = SPECIAL_OP(AVX), /* vmaskmovps */
+    [0x2f] = SPECIAL_OP(AVX), /* vmaskmovpd */
     [0x30] = UNARY_OP(pmovzxbw, SSE41, SSE_OPF_MMX),
     [0x31] = UNARY_OP(pmovzxbd, SSE41, SSE_OPF_MMX),
     [0x32] = UNARY_OP(pmovzxbq, SSE41, SSE_OPF_MMX),
@@ -3308,6 +3312,9 @@ static const struct SSEOpHelper_table6 sse_op_table6[256] 
= {
     [0x78] = UNARY_OP(vbroadcastb, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
     /* vpbroadcastw */
     [0x79] = UNARY_OP(vbroadcastw, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+    /* vpmaskmovd, vpmaskmovq */
+    [0x8c] = BINARY_OP(vpmaskmovd, AVX, SSE_OPF_AVX2),
+    [0x8e] = SPECIAL_OP(AVX), /* vpmaskmovd, vpmaskmovq */
 #define gen_helper_aesimc_ymm NULL
     [0xdb] = UNARY_OP(aesimc, AES, 0),
     [0xdc] = BINARY_OP(aesenc, AES, 0),
@@ -3369,6 +3376,11 @@ static const SSEFunc_0_eppp sse_op_table8[3][2] = {
     SSE_OP(vpsravq),
     SSE_OP(vpsllvq),
 };
+
+static const SSEFunc_0_eppt sse_op_table9[2][2] = {
+    SSE_OP(vpmaskmovd_st),
+    SSE_OP(vpmaskmovq_st),
+};
 #undef SSE_OP
 
 /* VEX prefix not allowed */
@@ -4394,6 +4406,22 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
                             gen_clear_ymmh(s, reg);
                         }
                         return;
+                    case 0x2e: /* maskmovpd */
+                        b1 = 0;
+                        goto vpmaskmov;
+                    case 0x2f: /* maskmovpd */
+                        b1 = 1;
+                        goto vpmaskmov;
+                    case 0x8e: /* vpmaskmovd, vpmaskmovq */
+                        CHECK_AVX2(s);
+                        b1 = REX_W(s);
+                    vpmaskmov:
+                        tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset);
+                        v_offset = ZMM_OFFSET(reg_v);
+                        tcg_gen_addi_ptr(s->ptr2, cpu_env, v_offset);
+                        sse_op_table9[b1][s->vex_l](cpu_env,
+                                s->ptr0, s->ptr2, s->A0);
+                        return;
                     default:
                         size = 128;
                     }
@@ -4456,6 +4484,12 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
                         if (REX_W(s)) {
                             if (b >= 0x45 && b <= 0x47) {
                                 fn = sse_op_table8[b - 0x45][b1 - 1];
+                            } else if (b == 0x8c) {
+                                if (s->vex_l) {
+                                    fn = gen_helper_vpmaskmovq_ymm;
+                                } else {
+                                    fn = gen_helper_vpmaskmovq_xmm;
+                                }
                             }
                         }
                         fn(cpu_env, s->ptr0, s->ptr2, s->ptr1);
-- 
2.36.0

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v2 02/42] i386: DPPS rounding fix, (continued)
- [PATCH v2 02/42] i386: DPPS rounding fix, Paul Brook, 2022/04/24
- [PATCH v2 09/42] i386: Helper macro for 256 bit AVX helpers, Paul Brook, 2022/04/24
- [PATCH v2 07/42] Enforce VEX encoding restrictions, Paul Brook, 2022/04/24
- [PATCH v2 08/42] i386: Add ZMM_OFFSET macro, Paul Brook, 2022/04/24
- [PATCH v2 04/42] i386: Rework sse_op_table1, Paul Brook, 2022/04/24
- [PATCH v2 05/42] i386: Rework sse_op_table6/7, Paul Brook, 2022/04/24
- [PATCH v2 03/42] Add AVX_EN hflag, Paul Brook, 2022/04/24
- [PATCH v2 10/42] i386: Rewrite vector shift helper, Paul Brook, 2022/04/24
- [PATCH v2 17/42] i386: Destructive FP helpers for AVX, Paul Brook, 2022/04/24
- [PATCH v2 30/42] i386: Implement VPERMIL, Paul Brook, 2022/04/24
- [PATCH v2 33/42] i386: Implement VMASKMOV, Paul Brook <=
- [PATCH v2 29/42] i386: Implement VBROADCAST, Paul Brook, 2022/04/24
- [PATCH v2 41/42] AVX tests, Paul Brook, 2022/04/24
- [PATCH v2 16/42] i386: Dot product AVX helper prep, Paul Brook, 2022/04/24
- [PATCH v2 37/42] i386: Implement VBLENDV, Paul Brook, 2022/04/24
- [PATCH v2 39/42] i386: Enable AVX cpuid bits when using TCG, Paul Brook, 2022/04/24
- [PATCH v2 25/42] i386: VEX.V encodings (3 operand), Paul Brook, 2022/04/24
- [PATCH v2 11/42] i386: Rewrite simple integer vector helpers, Paul Brook, 2022/04/24
- [PATCH v2 14/42] i386: Add size suffix to vector FP helpers, Paul Brook, 2022/04/24
- [PATCH v2 38/42] i386: Implement VPBLENDD, Paul Brook, 2022/04/24
- [PATCH v2 24/42] i386: Move 3DNOW decoder, Paul Brook, 2022/04/24

Prev by Date: [PATCH v2 30/42] i386: Implement VPERMIL
Next by Date: [PATCH v2 29/42] i386: Implement VBROADCAST
Previous by thread: [PATCH v2 30/42] i386: Implement VPERMIL
Next by thread: [PATCH v2 29/42] i386: Implement VBROADCAST
Index(es):
- Date
- Thread