[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 29/42] i386: Implement VBROADCAST
From: |
Paul Brook |
Subject: |
[PATCH v2 29/42] i386: Implement VBROADCAST |
Date: |
Sun, 24 Apr 2022 23:01:51 +0100 |
The catch here is that these are whole vector operations (not independent 128
bit lanes). We abuse the SSE_OPF_SCALAR flag to select the memory operand
width appropriately.
Signed-off-by: Paul Brook <paul@nowt.org>
---
target/i386/ops_sse.h | 51 ++++++++++++++++++++++++++++++++++++
target/i386/ops_sse_header.h | 8 ++++++
target/i386/tcg/translate.c | 42 ++++++++++++++++++++++++++++-
3 files changed, 100 insertions(+), 1 deletion(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index a1f50f0c8b..4115c9a257 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -3071,7 +3071,57 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State
*env, Reg *d, Reg *s,
#endif
#endif
+#if SHIFT >= 1
+void glue(helper_vbroadcastb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ uint8_t val = s->B(0);
+ int i;
+
+ for (i = 0; i < 16 * SHIFT; i++) {
+ d->B(i) = val;
+ }
+}
+
+void glue(helper_vbroadcastw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ uint16_t val = s->W(0);
+ int i;
+
+ for (i = 0; i < 8 * SHIFT; i++) {
+ d->W(i) = val;
+ }
+}
+
+void glue(helper_vbroadcastl, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ uint32_t val = s->L(0);
+ int i;
+
+ for (i = 0; i < 8 * SHIFT; i++) {
+ d->L(i) = val;
+ }
+}
+
+void glue(helper_vbroadcastq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ uint64_t val = s->Q(0);
+ d->Q(0) = val;
+ d->Q(1) = val;
#if SHIFT == 2
+ d->Q(2) = val;
+ d->Q(3) = val;
+#endif
+}
+
+#if SHIFT == 2
+void glue(helper_vbroadcastdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
+{
+ d->Q(0) = s->Q(0);
+ d->Q(1) = s->Q(1);
+ d->Q(2) = s->Q(0);
+ d->Q(3) = s->Q(1);
+}
+
void helper_vzeroall(CPUX86State *env)
{
int i;
@@ -3118,6 +3168,7 @@ void helper_vzeroupper_hi8(CPUX86State *env)
}
#endif
#endif
+#endif
#undef SSE_HELPER_S
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 48f0945917..51e02cd4fa 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -411,7 +411,14 @@ DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env,
Reg, Reg, i32)
DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32)
#endif
+/* AVX helpers */
+#if SHIFT >= 1
+DEF_HELPER_3(glue(vbroadcastb, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(vbroadcastw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(vbroadcastl, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_3(glue(vbroadcastq, SUFFIX), void, env, Reg, Reg)
#if SHIFT == 2
+DEF_HELPER_3(glue(vbroadcastdq, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_1(vzeroall, void, env)
DEF_HELPER_1(vzeroupper, void, env)
#ifdef TARGET_X86_64
@@ -419,6 +426,7 @@ DEF_HELPER_1(vzeroall_hi8, void, env)
DEF_HELPER_1(vzeroupper_hi8, void, env)
#endif
#endif
+#endif
#undef SHIFT
#undef Reg
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index ba70aeb039..59ab1dc562 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -3255,6 +3255,11 @@ static const struct SSEOpHelper_table6
sse_op_table6[256] = {
[0x14] = BLENDV_OP(blendvps, SSE41, 0),
[0x15] = BLENDV_OP(blendvpd, SSE41, 0),
[0x17] = CMP_OP(ptest, SSE41),
+ /* TODO:Some vbroadcast variants require AVX2 */
+ [0x18] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR), /* vbroadcastss */
+ [0x19] = UNARY_OP(vbroadcastq, AVX, SSE_OPF_SCALAR), /* vbroadcastsd */
+#define gen_helper_vbroadcastdq_xmm NULL
+ [0x1a] = UNARY_OP(vbroadcastdq, AVX, SSE_OPF_SCALAR), /* vbroadcastf128 */
[0x1c] = UNARY_OP_MMX(pabsb, SSSE3),
[0x1d] = UNARY_OP_MMX(pabsw, SSSE3),
[0x1e] = UNARY_OP_MMX(pabsd, SSSE3),
@@ -3286,6 +3291,16 @@ static const struct SSEOpHelper_table6
sse_op_table6[256] = {
[0x40] = BINARY_OP(pmulld, SSE41, SSE_OPF_MMX),
#define gen_helper_phminposuw_ymm NULL
[0x41] = UNARY_OP(phminposuw, SSE41, 0),
+ /* vpbroadcastd */
+ [0x58] = UNARY_OP(vbroadcastl, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+ /* vpbroadcastq */
+ [0x59] = UNARY_OP(vbroadcastq, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+ /* vbroadcasti128 */
+ [0x5a] = UNARY_OP(vbroadcastdq, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+ /* vpbroadcastb */
+ [0x78] = UNARY_OP(vbroadcastb, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
+ /* vpbroadcastw */
+ [0x79] = UNARY_OP(vbroadcastw, AVX, SSE_OPF_SCALAR | SSE_OPF_MMX),
#define gen_helper_aesimc_ymm NULL
[0xdb] = UNARY_OP(aesimc, AES, 0),
[0xdc] = BINARY_OP(aesenc, AES, 0),
@@ -4323,6 +4338,24 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
op2_offset = offsetof(CPUX86State, xmm_t0);
gen_lea_modrm(env, s, modrm);
switch (b) {
+ case 0x78: /* vpbroadcastb */
+ size = 8;
+ break;
+ case 0x79: /* vpbroadcastw */
+ size = 16;
+ break;
+ case 0x18: /* vbroadcastss */
+ case 0x58: /* vpbroadcastd */
+ size = 32;
+ break;
+ case 0x19: /* vbroadcastsd */
+ case 0x59: /* vpbroadcastq */
+ size = 64;
+ break;
+ case 0x1a: /* vbroadcastf128 */
+ case 0x5a: /* vbroadcasti128 */
+ size = 128;
+ break;
case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */
case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */
case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */
@@ -4346,10 +4379,17 @@ static void gen_sse(CPUX86State *env, DisasContext *s,
int b,
default:
size = 128;
}
- if (s->vex_l) {
+ /* 256 bit vbroadcast only load a single element. */
+ if ((op6.flags & SSE_OPF_SCALAR) == 0 && s->vex_l) {
size *= 2;
}
switch (size) {
+ case 8:
+ tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
+ s->mem_index, MO_UB);
+ tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset +
+ offsetof(ZMMReg, ZMM_B(0)));
+ break;
case 16:
tcg_gen_qemu_ld_tl(s->tmp0, s->A0,
s->mem_index, MO_LEUW);
--
2.36.0
- [PATCH v2 09/42] i386: Helper macro for 256 bit AVX helpers, (continued)
- [PATCH v2 09/42] i386: Helper macro for 256 bit AVX helpers, Paul Brook, 2022/04/24
- [PATCH v2 07/42] Enforce VEX encoding restrictions, Paul Brook, 2022/04/24
- [PATCH v2 08/42] i386: Add ZMM_OFFSET macro, Paul Brook, 2022/04/24
- [PATCH v2 04/42] i386: Rework sse_op_table1, Paul Brook, 2022/04/24
- [PATCH v2 05/42] i386: Rework sse_op_table6/7, Paul Brook, 2022/04/24
- [PATCH v2 03/42] Add AVX_EN hflag, Paul Brook, 2022/04/24
- [PATCH v2 10/42] i386: Rewrite vector shift helper, Paul Brook, 2022/04/24
- [PATCH v2 17/42] i386: Destructive FP helpers for AVX, Paul Brook, 2022/04/24
- [PATCH v2 30/42] i386: Implement VPERMIL, Paul Brook, 2022/04/24
- [PATCH v2 33/42] i386: Implement VMASKMOV, Paul Brook, 2022/04/24
- [PATCH v2 29/42] i386: Implement VBROADCAST,
Paul Brook <=
- [PATCH v2 41/42] AVX tests, Paul Brook, 2022/04/24
- [PATCH v2 16/42] i386: Dot product AVX helper prep, Paul Brook, 2022/04/24
- [PATCH v2 37/42] i386: Implement VBLENDV, Paul Brook, 2022/04/24
- [PATCH v2 39/42] i386: Enable AVX cpuid bits when using TCG, Paul Brook, 2022/04/24
- [PATCH v2 25/42] i386: VEX.V encodings (3 operand), Paul Brook, 2022/04/24
- [PATCH v2 11/42] i386: Rewrite simple integer vector helpers, Paul Brook, 2022/04/24
- [PATCH v2 14/42] i386: Add size suffix to vector FP helpers, Paul Brook, 2022/04/24
- [PATCH v2 38/42] i386: Implement VPBLENDD, Paul Brook, 2022/04/24
- [PATCH v2 24/42] i386: Move 3DNOW decoder, Paul Brook, 2022/04/24
- [PATCH v2 28/42] i386: Implement VZEROALL and VZEROUPPER, Paul Brook, 2022/04/24