[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 07/69] target/arm: Implement FPCR.FIZ handling
From: |
Peter Maydell |
Subject: |
[PATCH v2 07/69] target/arm: Implement FPCR.FIZ handling |
Date: |
Sat, 1 Feb 2025 16:39:10 +0000 |
Part of FEAT_AFP is the new control bit FPCR.FIZ. This bit affects
flushing of single and double precision denormal inputs to zero for
AArch64 floating point instructions. (For half-precision, the
existing FPCR.FZ16 control remains the only one.)
FPCR.FIZ differs from FPCR.FZ in that if we flush an input denormal
only because of FPCR.FIZ then we should *not* set the cumulative
exception bit FPSR.IDC.
FEAT_AFP also defines that in AArch64 the existing FPCR.FZ only
applies when FPCR.AH is 0.
We can implement this by setting the "flush inputs to zero" state
appropriately when FPCR is written, and by not reflecting the
float_flag_input_denormal status flag into FPSR reads when it is the
result only of FPSR.FIZ.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/vfp_helper.c | 60 ++++++++++++++++++++++++++++++++++-------
1 file changed, 50 insertions(+), 10 deletions(-)
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 8c79ab4fc8a..30c170ecee5 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -61,19 +61,31 @@ static inline uint32_t vfp_exceptbits_from_host(int
host_bits)
static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
{
- uint32_t i = 0;
+ uint32_t a32_flags = 0, a64_flags = 0;
- i |= get_float_exception_flags(&env->vfp.fp_status_a32);
- i |= get_float_exception_flags(&env->vfp.fp_status_a64);
- i |= get_float_exception_flags(&env->vfp.standard_fp_status);
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
+ a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status);
/* FZ16 does not generate an input denormal exception. */
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
& ~float_flag_input_denormal_flushed);
- i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
+ a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
& ~float_flag_input_denormal_flushed);
- i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
+
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
& ~float_flag_input_denormal_flushed);
- return vfp_exceptbits_from_host(i);
+ /*
+ * Flushing an input denormal *only* because FPCR.FIZ == 1 does
+ * not set FPSR.IDC; if FPCR.FZ is also set then this takes
+ * precedence and IDC is set (see the FPUnpackBase pseudocode).
+ * So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
+ * We only do this for the a64 flags because FIZ has no effect
+ * on AArch32 even if it is set.
+ */
+ if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
+ a64_flags &= ~float_flag_input_denormal_flushed;
+ }
+ return vfp_exceptbits_from_host(a32_flags | a64_flags);
}
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
@@ -91,6 +103,17 @@ static void vfp_clear_float_status_exc_flags(CPUARMState
*env)
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
}
+static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
+{
+ /*
+ * Synchronize any pending exception-flag information in the
+ * float_status values into env->vfp.fpsr, and then clear out
+ * the float_status data.
+ */
+ env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
+ vfp_clear_float_status_exc_flags(env);
+}
+
static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
{
uint64_t changed = env->vfp.fpcr;
@@ -130,9 +153,18 @@ static void vfp_set_fpcr_to_host(CPUARMState *env,
uint32_t val, uint32_t mask)
if (changed & FPCR_FZ) {
bool ftz_enabled = val & FPCR_FZ;
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
+ /* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
+ }
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
+ /*
+ * A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
+ * both FPCR.AH = 0 and FPCR.FZ = 1.
+ */
+ bool fitz_enabled = (val & FPCR_FIZ) ||
+ (val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64);
}
if (changed & FPCR_DN) {
bool dnan_enabled = val & FPCR_DN;
@@ -141,6 +173,14 @@ static void vfp_set_fpcr_to_host(CPUARMState *env,
uint32_t val, uint32_t mask)
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
}
+ /*
+ * If any bits changed that we look at in vfp_get_fpsr_from_host(),
+ * we must sync the float_status flags into vfp.fpsr now (under the
+ * old regime) before we update vfp.fpcr.
+ */
+ if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
+ vfp_sync_and_clear_float_status_exc_flags(env);
+ }
}
#else
--
2.34.1
- [PATCH v2 00/69] target/arm: FEAT_AFP and FEAT_RPRES, Peter Maydell, 2025/02/01
- [PATCH v2 01/69] target/i386: Do not raise Invalid for 0 * Inf + QNaN, Peter Maydell, 2025/02/01
- [PATCH v2 02/69] tests/tcg/x86_64/fma: Test some x86 fused-multiply-add cases, Peter Maydell, 2025/02/01
- [PATCH v2 03/69] fpu: Add float_class_denormal, Peter Maydell, 2025/02/01
- [PATCH v2 05/69] fpu: allow flushing of output denormals to be after rounding, Peter Maydell, 2025/02/01
- [PATCH v2 06/69] target/arm: Define FPCR AH, FIZ, NEP bits, Peter Maydell, 2025/02/01
- [PATCH v2 04/69] fpu: Implement float_flag_input_denormal_used, Peter Maydell, 2025/02/01
- [PATCH v2 11/69] target/arm: Set up float_status to use for FPCR.AH=1 behaviour, Peter Maydell, 2025/02/01
- [PATCH v2 07/69] target/arm: Implement FPCR.FIZ handling,
Peter Maydell <=
- [PATCH v2 14/69] target/arm: Use FPST_FPCR_AH for BFMLAL*, BFMLSL* insns, Peter Maydell, 2025/02/01
- [PATCH v2 08/69] target/arm: Adjust FP behaviour for FPCR.AH = 1, Peter Maydell, 2025/02/01
- [PATCH v2 09/69] target/arm: Adjust exception flag handling for AH = 1, Peter Maydell, 2025/02/01
- [PATCH v2 10/69] target/arm: Add FPCR.AH to tbflags, Peter Maydell, 2025/02/01
- [PATCH v2 13/69] target/arm: Use FPST_FPCR_AH for BFCVT* insns, Peter Maydell, 2025/02/01
- [PATCH v2 19/69] target/arm: Handle FPCR.NEP for 1-input scalar operations, Peter Maydell, 2025/02/01
- [PATCH v2 17/69] target/arm: Handle FPCR.NEP for 3-input scalar operations, Peter Maydell, 2025/02/01
- [PATCH v2 16/69] target/arm: Define and use new write_fp_*reg_merging() functions, Peter Maydell, 2025/02/01
- [PATCH v2 15/69] target/arm: Add FPCR.NEP to TBFLAGS, Peter Maydell, 2025/02/01