[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 17/42] i386: Destructive FP helpers for AVX
From: |
Paul Brook |
Subject: |
[PATCH v2 17/42] i386: Destructive FP helpers for AVX |
Date: |
Sun, 24 Apr 2022 23:01:39 +0100 |
Perpare the horizontal atithmetic vector helpers for AVX
These currently use a dummy Reg typed variable to store the result then
assign the whole register. This will cause 128 bit operations to corrupt
the upper half of the register, so replace it with explicit temporaries
and element assignments.
Signed-off-by: Paul Brook <paul@nowt.org>
---
target/i386/ops_sse.h | 96 +++++++++++++++++++++++++++++++------------
1 file changed, 70 insertions(+), 26 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 4137e6e1fa..d128af6cc8 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -1196,44 +1196,88 @@ void helper_insertq_i(CPUX86State *env, ZMMReg *d, int
index, int length)
d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length);
}
-void glue(helper_haddps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_haddps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
+ Reg *v = d;
+ float32 r0, r1, r2, r3;
+
+ r0 = float32_add(v->ZMM_S(0), v->ZMM_S(1), &env->sse_status);
+ r1 = float32_add(v->ZMM_S(2), v->ZMM_S(3), &env->sse_status);
+ r2 = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
+ r3 = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
+ d->ZMM_S(0) = r0;
+ d->ZMM_S(1) = r1;
+ d->ZMM_S(2) = r2;
+ d->ZMM_S(3) = r3;
+#if SHIFT == 2
+ r0 = float32_add(v->ZMM_S(4), v->ZMM_S(5), &env->sse_status);
+ r1 = float32_add(v->ZMM_S(6), v->ZMM_S(7), &env->sse_status);
+ r2 = float32_add(s->ZMM_S(4), s->ZMM_S(5), &env->sse_status);
+ r3 = float32_add(s->ZMM_S(6), s->ZMM_S(7), &env->sse_status);
+ d->ZMM_S(4) = r0;
+ d->ZMM_S(5) = r1;
+ d->ZMM_S(6) = r2;
+ d->ZMM_S(7) = r3;
+#endif
}
-void glue(helper_haddpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_haddpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
- ZMMReg r;
+ Reg *v = d;
+ float64 r0, r1;
- r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
+ r0 = float64_add(v->ZMM_D(0), v->ZMM_D(1), &env->sse_status);
+ r1 = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
+ d->ZMM_D(0) = r0;
+ d->ZMM_D(1) = r1;
+#if SHIFT == 2
+ r0 = float64_add(v->ZMM_D(2), v->ZMM_D(3), &env->sse_status);
+ r1 = float64_add(s->ZMM_D(2), s->ZMM_D(3), &env->sse_status);
+ d->ZMM_D(2) = r0;
+ d->ZMM_D(3) = r1;
+#endif
}
-void glue(helper_hsubps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_hsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
- ZMMReg r;
-
- r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status);
- r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status);
- r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
- r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
- MOVE(*d, r);
+ Reg *v = d;
+ float32 r0, r1, r2, r3;
+
+ r0 = float32_sub(v->ZMM_S(0), v->ZMM_S(1), &env->sse_status);
+ r1 = float32_sub(v->ZMM_S(2), v->ZMM_S(3), &env->sse_status);
+ r2 = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status);
+ r3 = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status);
+ d->ZMM_S(0) = r0;
+ d->ZMM_S(1) = r1;
+ d->ZMM_S(2) = r2;
+ d->ZMM_S(3) = r3;
+#if SHIFT == 2
+ r0 = float32_sub(v->ZMM_S(4), v->ZMM_S(5), &env->sse_status);
+ r1 = float32_sub(v->ZMM_S(6), v->ZMM_S(7), &env->sse_status);
+ r2 = float32_sub(s->ZMM_S(4), s->ZMM_S(5), &env->sse_status);
+ r3 = float32_sub(s->ZMM_S(6), s->ZMM_S(7), &env->sse_status);
+ d->ZMM_S(4) = r0;
+ d->ZMM_S(5) = r1;
+ d->ZMM_S(6) = r2;
+ d->ZMM_S(7) = r3;
+#endif
}
-void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s)
+void glue(helper_hsubpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
{
- ZMMReg r;
+ Reg *v = d;
+ float64 r0, r1;
- r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status);
- r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
- MOVE(*d, r);
+ r0 = float64_sub(v->ZMM_D(0), v->ZMM_D(1), &env->sse_status);
+ r1 = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status);
+ d->ZMM_D(0) = r0;
+ d->ZMM_D(1) = r1;
+#if SHIFT == 2
+ r0 = float64_sub(v->ZMM_D(2), v->ZMM_D(3), &env->sse_status);
+ r1 = float64_sub(s->ZMM_D(2), s->ZMM_D(3), &env->sse_status);
+ d->ZMM_D(2) = r0;
+ d->ZMM_D(3) = r1;
+#endif
}
void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s)
--
2.36.0
- Re: [PATCH v2 07/42] Enforce VEX encoding restrictions, (continued)
- [PATCH v2 08/42] i386: Add ZMM_OFFSET macro, Paul Brook, 2022/04/24
- [PATCH v2 04/42] i386: Rework sse_op_table1, Paul Brook, 2022/04/24
- [PATCH v2 05/42] i386: Rework sse_op_table6/7, Paul Brook, 2022/04/24
- [PATCH v2 03/42] Add AVX_EN hflag, Paul Brook, 2022/04/24
- [PATCH v2 10/42] i386: Rewrite vector shift helper, Paul Brook, 2022/04/24
- [PATCH v2 17/42] i386: Destructive FP helpers for AVX,
Paul Brook <=
- [PATCH v2 30/42] i386: Implement VPERMIL, Paul Brook, 2022/04/24
- [PATCH v2 33/42] i386: Implement VMASKMOV, Paul Brook, 2022/04/24
- [PATCH v2 29/42] i386: Implement VBROADCAST, Paul Brook, 2022/04/24
- [PATCH v2 41/42] AVX tests, Paul Brook, 2022/04/24
- [PATCH v2 16/42] i386: Dot product AVX helper prep, Paul Brook, 2022/04/24
- [PATCH v2 37/42] i386: Implement VBLENDV, Paul Brook, 2022/04/24
- [PATCH v2 39/42] i386: Enable AVX cpuid bits when using TCG, Paul Brook, 2022/04/24
- [PATCH v2 25/42] i386: VEX.V encodings (3 operand), Paul Brook, 2022/04/24
- [PATCH v2 11/42] i386: Rewrite simple integer vector helpers, Paul Brook, 2022/04/24
- [PATCH v2 14/42] i386: Add size suffix to vector FP helpers, Paul Brook, 2022/04/24