[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 41/69] target/arm: Handle FPCR.AH in negation step in FMLS (in
From: |
Peter Maydell |
Subject: |
[PATCH v2 41/69] target/arm: Handle FPCR.AH in negation step in FMLS (indexed) |
Date: |
Sat, 1 Feb 2025 16:39:44 +0000 |
Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS
(indexed). We do this by creating 6 new helpers, which allow us to
do the negation either by XOR (for AH=0) or by muladd flags
(for AH=1).
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
[PMM: Mostly from RTH's patch; error in index order into fns[][]
fixed]
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 14 ++++++++++++++
target/arm/tcg/translate-a64.c | 17 +++++++++++------
target/arm/tcg/translate-sve.c | 31 +++++++++++++++++--------------
target/arm/tcg/vec_helper.c | 24 +++++++++++++++---------
4 files changed, 57 insertions(+), 29 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 43505d5fedc..be47edff896 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -813,6 +813,20 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 03b629e0ba0..2509a29528e 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6737,10 +6737,16 @@ TRANS(FMULX_vi, do_fp3_vector_idx, a,
f_vector_idx_fmulx)
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
{
- static gen_helper_gvec_4_ptr * const fns[3] = {
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
+ static gen_helper_gvec_4_ptr * const fns[3][3] = {
+ { gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d },
+ { gen_helper_gvec_fmls_idx_h,
+ gen_helper_gvec_fmls_idx_s,
+ gen_helper_gvec_fmls_idx_d },
+ { gen_helper_gvec_ah_fmls_idx_h,
+ gen_helper_gvec_ah_fmls_idx_s,
+ gen_helper_gvec_ah_fmls_idx_d },
};
MemOp esz = a->esz;
int check = fp_access_check_vector_hsd(s, a->q, esz);
@@ -6751,8 +6757,7 @@ static bool do_fmla_vector_idx(DisasContext *s,
arg_qrrx_e *a, bool neg)
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
esz == MO_16 ? FPST_A64_F16 : FPST_A64,
- (a->idx << 1) | neg,
- fns[esz - 1]);
+ a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
return true;
}
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 50f16d5affa..e81e996c56e 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3524,21 +3524,24 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d,
gen_helper_sve2_cdot_idx_d)
*** SVE Floating Point Multiply-Add Indexed Group
*/
-static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
-{
- static gen_helper_gvec_4_ptr * const fns[4] = {
- NULL,
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
- };
- return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
- (a->index << 1) | sub,
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
-}
+static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
+ NULL, gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
+};
+TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
+ fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
-TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
+static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
+ { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
+ { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
+};
+TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
+ fmls_idx_fns[a->esz][s->fpcr_ah],
+ a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Multiply Indexed Group
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index e4c519f9e33..ae3cb50fa24 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -1680,29 +1680,35 @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub,
float32_mul, float32, H4)
#undef DO_FMUL_IDX
-#define DO_FMLA_IDX(NAME, TYPE, H) \
+#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
float_status *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
- TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
- intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
+ intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
- op1_neg <<= (8 * sizeof(TYPE) - 1); \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
- d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
- mm, a[i + j], 0, stat); \
+ d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
+ a[i + j], NEGF, stat); \
} \
} \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
-DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
+
+DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
+DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
+DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
+
+DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
#undef DO_FMLA_IDX
--
2.34.1
- [PATCH v2 31/69] target/arm: Implement FPCR.AH handling of negation of NaN, (continued)
- [PATCH v2 31/69] target/arm: Implement FPCR.AH handling of negation of NaN, Peter Maydell, 2025/02/01
- [PATCH v2 32/69] target/arm: Implement FPCR.AH handling for scalar FABS and FABD, Peter Maydell, 2025/02/01
- [PATCH v2 33/69] target/arm: Handle FPCR.AH in vector FABD, Peter Maydell, 2025/02/01
- [PATCH v2 34/69] target/arm: Handle FPCR.AH in SVE FNEG, Peter Maydell, 2025/02/01
- [PATCH v2 35/69] target/arm: Handle FPCR.AH in SVE FABS, Peter Maydell, 2025/02/01
- [PATCH v2 36/69] target/arm: Handle FPCR.AH in SVE FABD, Peter Maydell, 2025/02/01
- [PATCH v2 37/69] target/arm: Handle FPCR.AH in negation steps in SVE FCADD, Peter Maydell, 2025/02/01
- [PATCH v2 39/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns, Peter Maydell, 2025/02/01
- [PATCH v2 40/69] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS vector insns, Peter Maydell, 2025/02/01
- [PATCH v2 38/69] target/arm: Handle FPCR.AH in negation steps in FCADD, Peter Maydell, 2025/02/01
- [PATCH v2 41/69] target/arm: Handle FPCR.AH in negation step in FMLS (indexed),
Peter Maydell <=
- [PATCH v2 42/69] target/arm: Handle FPCR.AH in negation in FMLS (vector), Peter Maydell, 2025/02/01
- [PATCH v2 43/69] target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector), Peter Maydell, 2025/02/01
- [PATCH v2 44/69] target/arm: Handle FPCR.AH in SVE FTSSEL, Peter Maydell, 2025/02/01
- [PATCH v2 47/69] target/arm: Handle FPCR.AH in FCMLA by index, Peter Maydell, 2025/02/01
- [PATCH v2 48/69] target/arm: Handle FPCR.AH in SVE FCMLA, Peter Maydell, 2025/02/01
- [PATCH v2 45/69] target/arm: Handle FPCR.AH in SVE FTMAD, Peter Maydell, 2025/02/01
- [PATCH v2 46/69] target/arm: Handle FPCR.AH in vector FCMLA, Peter Maydell, 2025/02/01
- [PATCH v2 49/69] target/arm: Handle FPCR.AH in FMLSL (by element and vector), Peter Maydell, 2025/02/01
- [PATCH v2 52/69] target/arm: Enable FEAT_AFP for '-cpu max', Peter Maydell, 2025/02/01
- [PATCH v2 50/69] target/arm: Handle FPCR.AH in SVE FMLSL (indexed), Peter Maydell, 2025/02/01