[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 10/61] target/arm: Use FPST_ZA for sme_fmopa_[hsd]
From: |
Richard Henderson |
Subject: |
[PATCH 10/61] target/arm: Use FPST_ZA for sme_fmopa_[hsd] |
Date: |
Thu, 6 Feb 2025 11:56:24 -0800 |
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sme_helper.c | 37 ++++++++--------------------------
target/arm/tcg/translate-sme.c | 4 ++--
2 files changed, 10 insertions(+), 31 deletions(-)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index dcc48e43db..d4562502dd 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -904,20 +904,11 @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
}
void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, float_status *fpst_in, uint32_t desc)
+ void *vpm, float_status *fpst, uint32_t desc)
{
intptr_t row, col, oprsz = simd_maxsz(desc);
uint32_t neg = simd_data(desc) << 31;
uint16_t *pn = vpn, *pm = vpm;
- float_status fpst;
-
- /*
- * Make a copy of float_status because this operation does not
- * update the cumulative fp exception status. It also produces
- * default nans.
- */
- fpst = *fpst_in;
- set_default_nan_mode(true, &fpst);
for (row = 0; row < oprsz; ) {
uint16_t pa = pn[H2(row >> 4)];
@@ -932,7 +923,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm,
void *vpn,
if (pb & 1) {
uint32_t *a = vza_row + H1_4(col);
uint32_t *m = vzm + H1_4(col);
- *a = float32_muladd(n, *m, *a, 0, &fpst);
+ *a = float32_muladd(n, *m, *a, 0, fpst);
}
col += 4;
pb >>= 4;
@@ -946,15 +937,12 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm,
void *vpn,
}
void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
- void *vpm, float_status *fpst_in, uint32_t desc)
+ void *vpm, float_status *fpst, uint32_t desc)
{
intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
uint64_t neg = (uint64_t)simd_data(desc) << 63;
uint64_t *za = vza, *zn = vzn, *zm = vzm;
uint8_t *pn = vpn, *pm = vpm;
- float_status fpst = *fpst_in;
-
- set_default_nan_mode(true, &fpst);
for (row = 0; row < oprsz; ++row) {
if (pn[H1(row)] & 1) {
@@ -964,7 +952,7 @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm,
void *vpn,
for (col = 0; col < oprsz; ++col) {
if (pm[H1(col)] & 1) {
uint64_t *a = &za_row[col];
- *a = float64_muladd(n, zm[col], *a, 0, &fpst);
+ *a = float64_muladd(n, zm[col], *a, 0, fpst);
}
}
}
@@ -1035,19 +1023,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void
*vzm, void *vpn,
intptr_t row, col, oprsz = simd_maxsz(desc);
uint32_t neg = simd_data(desc) * 0x80008000u;
uint16_t *pn = vpn, *pm = vpm;
- float_status fpst_odd, fpst_std, fpst_f16;
+ float_status fpst_odd = env->vfp.fp_status[FPST_ZA];
- /*
- * Make copies of the fp status fields we use, because this operation
- * does not update the cumulative fp exception status. It also
- * produces default NaNs. We also need a second copy of fp_status with
- * round-to-odd -- see above.
- */
- fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
- fpst_std = env->vfp.fp_status[FPST_A64];
- set_default_nan_mode(true, &fpst_std);
- set_default_nan_mode(true, &fpst_f16);
- fpst_odd = fpst_std;
set_float_rounding_mode(float_round_to_odd, &fpst_odd);
for (row = 0; row < oprsz; ) {
@@ -1067,7 +1044,9 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm,
void *vpn,
m = f16mop_adj_pair(m, pcol, 0);
*a = f16_dotadd(*a, n, m,
- &fpst_f16, &fpst_std, &fpst_odd);
+ &env->vfp.fp_status[FPST_ZA_F16],
+ &env->vfp.fp_status[FPST_ZA],
+ &fpst_odd);
}
col += 4;
pcol >>= 4;
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index fcbb350016..51175c923e 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -358,9 +358,9 @@ static bool do_outprod_env(DisasContext *s, arg_op *a,
MemOp esz,
TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
MO_32, gen_helper_sme_fmopa_h)
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
- MO_32, FPST_A64, gen_helper_sme_fmopa_s)
+ MO_32, FPST_ZA, gen_helper_sme_fmopa_s)
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
- MO_64, FPST_A64, gen_helper_sme_fmopa_d)
+ MO_64, FPST_ZA, gen_helper_sme_fmopa_d)
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa)
--
2.43.0
- [PATCH 00/61] target/arm: Implement FEAT_SME2, Richard Henderson, 2025/02/06
- [PATCH 01/61] tcg: Add dbase argument to do_dup_store, Richard Henderson, 2025/02/06
- [PATCH 05/61] tcg: Split out tcg_gen_gvec_2_var, Richard Henderson, 2025/02/06
- [PATCH 06/61] tcg: Split out tcg_gen_gvec_3_var, Richard Henderson, 2025/02/06
- [PATCH 08/61] tcg: Split out tcg_gen_gvec_{add,sub}_var, Richard Henderson, 2025/02/06
- [PATCH 09/61] target/arm: Introduce FPST_ZA, FPST_ZA_F16, Richard Henderson, 2025/02/06
- [PATCH 02/61] tcg: Add dbase argument to do_dup, Richard Henderson, 2025/02/06
- [PATCH 03/61] tcg: Add dbase argument to expand_clr, Richard Henderson, 2025/02/06
- [PATCH 04/61] tcg: Add base arguments to check_overlap_[234], Richard Henderson, 2025/02/06
- [PATCH 07/61] tcg: Split out tcg_gen_gvec_mov_var, Richard Henderson, 2025/02/06
- [PATCH 10/61] target/arm: Use FPST_ZA for sme_fmopa_[hsd],
Richard Henderson <=
- [PATCH 14/61] target/arm: Add zt0_excp_el to DisasContext, Richard Henderson, 2025/02/06
- [PATCH 12/61] target/arm: Add isar_feature_aa64_sme2*, Richard Henderson, 2025/02/06
- [PATCH 17/61] target/arm: Implement SME2 MOVT, Richard Henderson, 2025/02/06
- [PATCH 11/61] target/arm: Rename zarray to za_state.za, Richard Henderson, 2025/02/06
- [PATCH 13/61] target/arm: Add ZT0, Richard Henderson, 2025/02/06
- [PATCH 16/61] target/arm: Implement SME2 LDR/STR ZT0, Richard Henderson, 2025/02/06
- [PATCH 15/61] target/arm: Implement SME2 ZERO ZT0, Richard Henderson, 2025/02/06
- [PATCH 21/61] target/arm: Split out get_zarray, Richard Henderson, 2025/02/06
- [PATCH 20/61] target/arm: Implement SME2 MOVA to/from tile, multiple registers, Richard Henderson, 2025/02/06
- [PATCH 19/61] target/arm: Rename MOVA for translate, Richard Henderson, 2025/02/06