qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128


From: Paul Brook
Subject: [PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128
Date: Sun, 24 Apr 2022 23:01:58 +0100

128-bit vinsert/vextract instructions. The integer and loating point variants
have the same semantics.

This is where we encounter an instruction encoded with VEX.L == 1 and
a 128 bit (xmm) destination operand.

Signed-off-by: Paul Brook <paul@nowt.org>
---
 target/i386/tcg/translate.c | 78 +++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 5a11d3c083..4072fa28d3 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2814,6 +2814,24 @@ static inline void gen_op_movo_ymmh(DisasContext *s, int 
d_offset, int s_offset)
     tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, 
ZMM_Q(3)));
 }
 
+static inline void gen_op_movo_ymm_l2h(DisasContext *s,
+                                       int d_offset, int s_offset)
+{
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, 
ZMM_Q(0)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, 
ZMM_Q(2)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, 
ZMM_Q(1)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, 
ZMM_Q(3)));
+}
+
+static inline void gen_op_movo_ymm_h2l(DisasContext *s,
+                                       int d_offset, int s_offset)
+{
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, 
ZMM_Q(2)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, 
ZMM_Q(0)));
+    tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, 
ZMM_Q(3)));
+    tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, 
ZMM_Q(1)));
+}
+
 static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
 {
     tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
@@ -3353,9 +3371,13 @@ static const struct SSEOpHelper_table7 
sse_op_table7[256] = {
     [0x15] = SPECIAL_OP(SSE41), /* pextrw */
     [0x16] = SPECIAL_OP(SSE41), /* pextrd/pextrq */
     [0x17] = SPECIAL_OP(SSE41), /* extractps */
+    [0x18] = SPECIAL_OP(AVX), /* vinsertf128 */
+    [0x19] = SPECIAL_OP(AVX), /* vextractf128 */
     [0x20] = SPECIAL_OP(SSE41), /* pinsrb */
     [0x21] = SPECIAL_OP(SSE41), /* insertps */
     [0x22] = SPECIAL_OP(SSE41), /* pinsrd/pinsrq */
+    [0x38] = SPECIAL_OP(AVX), /* vinserti128 */
+    [0x39] = SPECIAL_OP(AVX), /* vextracti128 */
     [0x40] = BINARY_OP(dpps, SSE41, 0),
 #define gen_helper_dppd_ymm NULL
     [0x41] = BINARY_OP(dppd, SSE41, 0),
@@ -5145,6 +5167,62 @@ static void gen_sse(CPUX86State *env, DisasContext *s, 
int b,
                     }
                     gen_clear_ymmh(s, reg);
                     break;
+                case 0x38: /* vinserti128 */
+                    CHECK_AVX2_256(s);
+                    /* fall through */
+                case 0x18: /* vinsertf128 */
+                    CHECK_AVX(s);
+                    if ((s->prefix & PREFIX_VEX) == 0 || s->vex_l == 0) {
+                        goto illegal_op;
+                    }
+                    if (mod == 3) {
+                        if (val & 1) {
+                            gen_op_movo_ymm_l2h(s, ZMM_OFFSET(reg),
+                                                ZMM_OFFSET(rm));
+                        } else {
+                            gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm));
+                        }
+                    } else {
+                        if (val & 1) {
+                            gen_ldo_env_A0_ymmh(s, ZMM_OFFSET(reg));
+                        } else {
+                            gen_ldo_env_A0(s, ZMM_OFFSET(reg));
+                        }
+                    }
+                    if (reg != reg_v) {
+                        if (val & 1) {
+                            gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(reg_v));
+                        } else {
+                            gen_op_movo_ymmh(s, ZMM_OFFSET(reg),
+                                             ZMM_OFFSET(reg_v));
+                        }
+                    }
+                    break;
+                case 0x39: /* vextracti128 */
+                    CHECK_AVX2_256(s);
+                    /* fall through */
+                case 0x19: /* vextractf128 */
+                    CHECK_AVX_V0(s);
+                    if ((s->prefix & PREFIX_VEX) == 0 || s->vex_l == 0) {
+                        goto illegal_op;
+                    }
+                    if (mod == 3) {
+                        op1_offset = ZMM_OFFSET(rm);
+                        if (val & 1) {
+                            gen_op_movo_ymm_h2l(s, ZMM_OFFSET(rm),
+                                                ZMM_OFFSET(reg));
+                        } else {
+                            gen_op_movo(s, ZMM_OFFSET(rm), ZMM_OFFSET(reg));
+                        }
+                        gen_clear_ymmh(s, rm);
+                    } else{
+                        if (val & 1) {
+                            gen_sto_env_A0_ymmh(s, ZMM_OFFSET(reg));
+                        } else {
+                            gen_sto_env_A0(s, ZMM_OFFSET(reg));
+                        }
+                    }
+                    break;
                 }
                 return;
             }
-- 
2.36.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]