[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 37/69] target/arm: Handle FPCR.AH in negation steps in SVE FCA
From: |
Peter Maydell |
Subject: |
[PATCH v2 37/69] target/arm: Handle FPCR.AH in negation steps in SVE FCADD |
Date: |
Sat, 1 Feb 2025 16:39:40 +0000 |
The negation steps in FCADD must honour FPCR.AH's "don't change the
sign of a NaN" semantics. Implement this in the same way we did for
the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field
passed to the helper and using that to decide whether to negate the
values.
The construction of neg_imag and neg_real were done to make it easy
to apply both in parallel with two simple logical operations. This
changed with FPCR.AH, which is more complex than that. Switch to
an approach that follows the pseudocode more closely, by extracting
the 'rot=1' parameter from the SIMD data field and changing the
sign of the appropriate input value.
Note that there was a naming issue with neg_imag and neg_real.
They were named backward, with neg_imag being non-zero for rot=1,
and vice versa. This was combined with reversed usage within the
loop, so that the negation in the end turned out correct.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
v2: Squashed in changes from RTH's patchset
---
target/arm/tcg/vec_internal.h | 17 ++++++++++++++
target/arm/tcg/sve_helper.c | 42 ++++++++++++++++++++++++----------
target/arm/tcg/translate-sve.c | 2 +-
3 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
index 094f5c169ca..826791523a6 100644
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -20,6 +20,8 @@
#ifndef TARGET_ARM_VEC_INTERNAL_H
#define TARGET_ARM_VEC_INTERNAL_H
+#include "fpu/softfloat.h"
+
/*
* Note that vector data is stored in host-endian 64-bit chunks,
* so addressing units smaller than that needs a host-endian fixup.
@@ -265,4 +267,19 @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
*/
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
+static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
+{
+ return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah)
+{
+ return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
+{
+ return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
+}
+
#endif /* TARGET_ARM_VEC_INTERNAL_H */
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 8527a7495a6..770945a2c6a 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5131,8 +5131,8 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm,
void *vg,
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
- float16 neg_real = float16_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5144,9 +5144,15 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm,
void *vg,
i -= 2 * sizeof(float16);
e0 = *(float16 *)(vn + H1_2(i));
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float16 *)(vm + H1_2(j));
e2 = *(float16 *)(vn + H1_2(j));
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float16 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
*(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
@@ -5163,8 +5169,8 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm,
void *vg,
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
- float32 neg_real = float32_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5176,9 +5182,15 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm,
void *vg,
i -= 2 * sizeof(float32);
e0 = *(float32 *)(vn + H1_2(i));
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float32 *)(vm + H1_2(j));
e2 = *(float32 *)(vn + H1_2(j));
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float32 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
*(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
@@ -5195,8 +5207,8 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm,
void *vg,
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
- float64 neg_real = float64_chs(neg_imag);
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5208,9 +5220,15 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm,
void *vg,
i -= 2 * sizeof(float64);
e0 = *(float64 *)(vn + H1_2(i));
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float64 *)(vm + H1_2(j));
e2 = *(float64 *)(vn + H1_2(j));
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float64 *)(vm + H1_2(i));
+
+ if (rot) {
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
+ }
if (likely((pg >> (i & 63)) & 1)) {
*(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 0d8bd1a49c4..7816b5801af 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3916,7 +3916,7 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
};
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
- a->rd, a->rn, a->rm, a->pg, a->rot,
+ a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
#define DO_FMLA(NAME, name) \
--
2.34.1
- [PATCH v2 28/69] target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV, (continued)
- [PATCH v2 28/69] target/arm: Implement FPCR.AH semantics for SVE FMAXV and FMINV, Peter Maydell, 2025/02/01
- [PATCH v2 30/69] target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector, Peter Maydell, 2025/02/01
- [PATCH v2 26/69] target/arm: Implement FPCR.AH semantics for FMAXV and FMINV, Peter Maydell, 2025/02/01
- [PATCH v2 29/69] target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX immediate, Peter Maydell, 2025/02/01
- [PATCH v2 31/69] target/arm: Implement FPCR.AH handling of negation of NaN, Peter Maydell, 2025/02/01
- [PATCH v2 32/69] target/arm: Implement FPCR.AH handling for scalar FABS and FABD, Peter Maydell, 2025/02/01
- [PATCH v2 33/69] target/arm: Handle FPCR.AH in vector FABD, Peter Maydell, 2025/02/01
- [PATCH v2 34/69] target/arm: Handle FPCR.AH in SVE FNEG, Peter Maydell, 2025/02/01
- [PATCH v2 35/69] target/arm: Handle FPCR.AH in SVE FABS, Peter Maydell, 2025/02/01
- [PATCH v2 36/69] target/arm: Handle FPCR.AH in SVE FABD, Peter Maydell, 2025/02/01
- [PATCH v2 37/69] target/arm: Handle FPCR.AH in negation steps in SVE FCADD,
Peter Maydell <=
- [PATCH v2 39/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns, Peter Maydell, 2025/02/01
- [PATCH v2 40/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns, Peter Maydell, 2025/02/01
- [PATCH v2 38/69] target/arm: Handle FPCR.AH in negation steps in FCADD, Peter Maydell, 2025/02/01
- [PATCH v2 41/69] target/arm: Handle FPCR.AH in negation step in FMLS (indexed), Peter Maydell, 2025/02/01
- [PATCH v2 42/69] target/arm: Handle FPCR.AH in negation in FMLS (vector), Peter Maydell, 2025/02/01
- [PATCH v2 43/69] target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector), Peter Maydell, 2025/02/01
- [PATCH v2 44/69] target/arm: Handle FPCR.AH in SVE FTSSEL, Peter Maydell, 2025/02/01
- [PATCH v2 47/69] target/arm: Handle FPCR.AH in FCMLA by index, Peter Maydell, 2025/02/01
- [PATCH v2 48/69] target/arm: Handle FPCR.AH in SVE FCMLA, Peter Maydell, 2025/02/01
- [PATCH v2 45/69] target/arm: Handle FPCR.AH in SVE FTMAD, Peter Maydell, 2025/02/01