[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 43/69] target/arm: Handle FPCR.AH in negation step in SVE FMLS
From: |
Peter Maydell |
Subject: |
[PATCH v2 43/69] target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) |
Date: |
Sat, 1 Feb 2025 16:39:46 +0000 |
Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the
SVE FMLS (vector) insns, by providing new helpers for the AH=1 case
which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions
that do the work.
The float*_muladd functions have a flags argument that can
perform optional negation of various operand. We don't use
that for "normal" arm fmla, because the muladd flags are not
applied when an input is a NaN. But since FEAT_AFP does not
negate NaNs, this behaviour is exactly what we need.
The non-AH helpers pass in a zero flags argument and control the
negation via the neg1 and neg3 arguments; the AH helpers always pass
in neg1 and neg3 as zero and control the negation via the flags
argument. This allows us to avoid conditional branches within the
inner loop.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-sve.h | 21 ++++++++
target/arm/tcg/sve_helper.c | 99 +++++++++++++++++++++++++++-------
target/arm/tcg/translate-sve.c | 18 ++++---
3 files changed, 114 insertions(+), 24 deletions(-)
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
index a2e96a498dd..0b1b5887834 100644
--- a/target/arm/tcg/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -1475,6 +1475,27 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 770945a2c6a..90d4defc0d5 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -4802,7 +4802,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
+ uint16_t neg1, uint16_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4817,7 +4817,7 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm,
void *va, void *vg,
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
e2 = *(uint16_t *)(vm + H1_2(i));
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
- r = float16_muladd(e1, e2, e3, 0, status);
+ r = float16_muladd(e1, e2, e3, flags, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
} while (i & 63);
@@ -4827,30 +4827,51 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
+ uint32_t neg1, uint32_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4865,7 +4886,7 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm,
void *va, void *vg,
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
e2 = *(uint32_t *)(vm + H1_4(i));
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
- r = float32_muladd(e1, e2, e3, 0, status);
+ r = float32_muladd(e1, e2, e3, flags, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
} while (i & 63);
@@ -4875,30 +4896,51 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000,
0);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
+ uint64_t neg1, uint64_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4913,7 +4955,7 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm,
void *va, void *vg,
e1 = *(uint64_t *)(vn + i) ^ neg1;
e2 = *(uint64_t *)(vm + i);
e3 = *(uint64_t *)(va + i) ^ neg3;
- r = float64_muladd(e1, e2, e3, 0, status);
+ r = float64_muladd(e1, e2, e3, flags, status);
*(uint64_t *)(vd + i) = r;
}
} while (i & 63);
@@ -4923,25 +4965,46 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
/* Two operand floating-point comparison controlled by a predicate.
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index e81e996c56e..17016854d87 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3927,19 +3927,25 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp,
fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-#define DO_FMLA(NAME, name) \
+#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
NULL, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
#undef DO_FMLA
--
2.34.1
- [PATCH v2 33/69] target/arm: Handle FPCR.AH in vector FABD, (continued)
- [PATCH v2 33/69] target/arm: Handle FPCR.AH in vector FABD, Peter Maydell, 2025/02/01
- [PATCH v2 34/69] target/arm: Handle FPCR.AH in SVE FNEG, Peter Maydell, 2025/02/01
- [PATCH v2 35/69] target/arm: Handle FPCR.AH in SVE FABS, Peter Maydell, 2025/02/01
- [PATCH v2 36/69] target/arm: Handle FPCR.AH in SVE FABD, Peter Maydell, 2025/02/01
- [PATCH v2 37/69] target/arm: Handle FPCR.AH in negation steps in SVE FCADD, Peter Maydell, 2025/02/01
- [PATCH v2 39/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns, Peter Maydell, 2025/02/01
- [PATCH v2 40/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns, Peter Maydell, 2025/02/01
- [PATCH v2 38/69] target/arm: Handle FPCR.AH in negation steps in FCADD, Peter Maydell, 2025/02/01
- [PATCH v2 41/69] target/arm: Handle FPCR.AH in negation step in FMLS (indexed), Peter Maydell, 2025/02/01
- [PATCH v2 42/69] target/arm: Handle FPCR.AH in negation in FMLS (vector), Peter Maydell, 2025/02/01
- [PATCH v2 43/69] target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector),
Peter Maydell <=
- [PATCH v2 44/69] target/arm: Handle FPCR.AH in SVE FTSSEL, Peter Maydell, 2025/02/01
- [PATCH v2 47/69] target/arm: Handle FPCR.AH in FCMLA by index, Peter Maydell, 2025/02/01
- [PATCH v2 48/69] target/arm: Handle FPCR.AH in SVE FCMLA, Peter Maydell, 2025/02/01
- [PATCH v2 45/69] target/arm: Handle FPCR.AH in SVE FTMAD, Peter Maydell, 2025/02/01
- [PATCH v2 46/69] target/arm: Handle FPCR.AH in vector FCMLA, Peter Maydell, 2025/02/01
- [PATCH v2 49/69] target/arm: Handle FPCR.AH in FMLSL (by element and vector), Peter Maydell, 2025/02/01
- [PATCH v2 52/69] target/arm: Enable FEAT_AFP for '-cpu max', Peter Maydell, 2025/02/01
- [PATCH v2 50/69] target/arm: Handle FPCR.AH in SVE FMLSL (indexed), Peter Maydell, 2025/02/01
- [PATCH v2 54/69] target/arm: Implement increased precision FRECPE, Peter Maydell, 2025/02/01
- [PATCH v2 53/69] target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper, Peter Maydell, 2025/02/01