[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128
From: |
Paul Brook |
Subject: |
[PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128 |
Date: |
Sun, 24 Apr 2022 23:01:58 +0100 |
128-bit vinsert/vextract instructions. The integer and loating point variants
have the same semantics.
This is where we encounter an instruction encoded with VEX.L == 1 and
a 128 bit (xmm) destination operand.
Signed-off-by: Paul Brook <paul@nowt.org>
---
target/i386/tcg/translate.c | 78 +++++++++++++++++++++++++++++++++++++
1 file changed, 78 insertions(+)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 5a11d3c083..4072fa28d3 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -2814,6 +2814,24 @@ static inline void gen_op_movo_ymmh(DisasContext *s, int
d_offset, int s_offset)
tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg,
ZMM_Q(3)));
}
+static inline void gen_op_movo_ymm_l2h(DisasContext *s,
+ int d_offset, int s_offset)
+{
+ tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg,
ZMM_Q(0)));
+ tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg,
ZMM_Q(2)));
+ tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg,
ZMM_Q(1)));
+ tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg,
ZMM_Q(3)));
+}
+
+static inline void gen_op_movo_ymm_h2l(DisasContext *s,
+ int d_offset, int s_offset)
+{
+ tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg,
ZMM_Q(2)));
+ tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg,
ZMM_Q(0)));
+ tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg,
ZMM_Q(3)));
+ tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg,
ZMM_Q(1)));
+}
+
static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset)
{
tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset);
@@ -3353,9 +3371,13 @@ static const struct SSEOpHelper_table7
sse_op_table7[256] = {
[0x15] = SPECIAL_OP(SSE41), /* pextrw */
[0x16] = SPECIAL_OP(SSE41), /* pextrd/pextrq */
[0x17] = SPECIAL_OP(SSE41), /* extractps */
+ [0x18] = SPECIAL_OP(AVX), /* vinsertf128 */
+ [0x19] = SPECIAL_OP(AVX), /* vextractf128 */
[0x20] = SPECIAL_OP(SSE41), /* pinsrb */
[0x21] = SPECIAL_OP(SSE41), /* insertps */
[0x22] = SPECIAL_OP(SSE41), /* pinsrd/pinsrq */
+ [0x38] = SPECIAL_OP(AVX), /* vinserti128 */
+ [0x39] = SPECIAL_OP(AVX), /* vextracti128 */
[0x40] = BINARY_OP(dpps, SSE41, 0),
#define gen_helper_dppd_ymm NULL
[0x41] = BINARY_OP(dppd, SSE41, 0),
@@ -5145,6 +5167,62 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
}
gen_clear_ymmh(s, reg);
break;
+ case 0x38: /* vinserti128 */
+ CHECK_AVX2_256(s);
+ /* fall through */
+ case 0x18: /* vinsertf128 */
+ CHECK_AVX(s);
+ if ((s->prefix & PREFIX_VEX) == 0 || s->vex_l == 0) {
+ goto illegal_op;
+ }
+ if (mod == 3) {
+ if (val & 1) {
+ gen_op_movo_ymm_l2h(s, ZMM_OFFSET(reg),
+ ZMM_OFFSET(rm));
+ } else {
+ gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(rm));
+ }
+ } else {
+ if (val & 1) {
+ gen_ldo_env_A0_ymmh(s, ZMM_OFFSET(reg));
+ } else {
+ gen_ldo_env_A0(s, ZMM_OFFSET(reg));
+ }
+ }
+ if (reg != reg_v) {
+ if (val & 1) {
+ gen_op_movo(s, ZMM_OFFSET(reg), ZMM_OFFSET(reg_v));
+ } else {
+ gen_op_movo_ymmh(s, ZMM_OFFSET(reg),
+ ZMM_OFFSET(reg_v));
+ }
+ }
+ break;
+ case 0x39: /* vextracti128 */
+ CHECK_AVX2_256(s);
+ /* fall through */
+ case 0x19: /* vextractf128 */
+ CHECK_AVX_V0(s);
+ if ((s->prefix & PREFIX_VEX) == 0 || s->vex_l == 0) {
+ goto illegal_op;
+ }
+ if (mod == 3) {
+ op1_offset = ZMM_OFFSET(rm);
+ if (val & 1) {
+ gen_op_movo_ymm_h2l(s, ZMM_OFFSET(rm),
+ ZMM_OFFSET(reg));
+ } else {
+ gen_op_movo(s, ZMM_OFFSET(rm), ZMM_OFFSET(reg));
+ }
+ gen_clear_ymmh(s, rm);
+ } else{
+ if (val & 1) {
+ gen_sto_env_A0_ymmh(s, ZMM_OFFSET(reg));
+ } else {
+ gen_sto_env_A0(s, ZMM_OFFSET(reg));
+ }
+ }
+ break;
}
return;
}
--
2.36.0
- [PATCH v2 20/42] i386: AVX pclmulqdq, (continued)
- [PATCH v2 20/42] i386: AVX pclmulqdq, Paul Brook, 2022/04/24
- [PATCH v2 40/42] Enable all x86-64 cpu features in user mode, Paul Brook, 2022/04/24
- [PATCH v2 34/42] i386: Implement VGATHER, Paul Brook, 2022/04/24
- [PATCH v2 18/42] i386: Misc AVX helper prep, Paul Brook, 2022/04/24
- [PATCH v2 23/42] i386: AVX comparison helpers, Paul Brook, 2022/04/24
- [PATCH v2 12/42] i386: Misc integer AVX helper prep, Paul Brook, 2022/04/24
- [PATCH v2 21/42] i386: AVX+AES helpers, Paul Brook, 2022/04/24
- [PATCH v2 42/42] i386: Add sha512-avx test, Paul Brook, 2022/04/24
- [PATCH v2 31/42] i386: Implement AVX variable shifts, Paul Brook, 2022/04/24
- [PATCH v2 26/42] i386: Utility function for 128 bit AVX, Paul Brook, 2022/04/24
- [PATCH v2 36/42] i386: Implement VINSERT128/VEXTRACT128,
Paul Brook <=
- [PATCH v2 27/42] i386: Translate 256 bit AVX instructions, Paul Brook, 2022/04/24
- [PATCH v2 19/42] i386: Rewrite blendv helpers, Paul Brook, 2022/04/24
- [PATCH v2 35/42] i386: Implement VPERM, Paul Brook, 2022/04/24
- [PATCH v2 32/42] i386: Implement VTEST, Paul Brook, 2022/04/24
- [PATCH v2 15/42] i386: Floating point atithmetic helper AVX prep, Paul Brook, 2022/04/24