[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 47/61] target/arm: Implement SME2 FCVT (widening), FCVTL
From: |
Richard Henderson |
Subject: |
[PATCH 47/61] target/arm: Implement SME2 FCVT (widening), FCVTL |
Date: |
Thu, 6 Feb 2025 11:57:01 -0800 |
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-sme.h | 2 ++
target/arm/tcg/sme_helper.c | 38 ++++++++++++++++++++++++++++++++++
target/arm/tcg/translate-sme.c | 5 +++++
target/arm/tcg/sme.decode | 5 +++++
4 files changed, 50 insertions(+)
diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h
index cb81f89fb3..04db920299 100644
--- a/target/arm/tcg/helper-sme.h
+++ b/target/arm/tcg/helper-sme.h
@@ -218,3 +218,5 @@ DEF_HELPER_FLAGS_4(sme2_bfcvt, TCG_CALL_NO_RWG, void, ptr,
ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sme2_bfcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sme2_fcvt_n, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_4(sme2_fcvtn, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvt_w, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_4(sme2_fcvtl, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 6d2b83d26a..8289a02bfa 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -1562,3 +1562,41 @@ void HELPER(sme2_fcvtn)(void *vd, void *vs, float_status
*fpst, uint32_t desc)
d[H2(i * 2 + 1)] = d1;
}
}
+
+/* Expand and convert */
+void HELPER(sme2_fcvt_w)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ ARMVectorReg scratch __attribute__((uninitialized));
+ size_t oprsz = simd_oprsz(desc);
+ size_t i, n = oprsz / 4;
+ float16 *s = vs;
+ float32 *d0 = vd;
+ float32 *d1 = vd + sizeof(ARMVectorReg);
+
+ if (vd == vs) {
+ s = memcpy(&scratch, s, oprsz);
+ }
+
+ for (i = 0; i < n; ++i) {
+ d0[H4(i)] = float16_to_float32(s[H2(i)], true, fpst);
+ }
+ for (i = 0; i < n; ++i) {
+ d1[H4(i)] = float16_to_float32(s[H2(n + i)], true, fpst);
+ }
+}
+
+/* Deinterleave and convert. */
+void HELPER(sme2_fcvtl)(void *vd, void *vs, float_status *fpst, uint32_t desc)
+{
+ size_t i, n = simd_oprsz(desc) / 4;
+ float16 *s = vs;
+ float32 *d0 = vd;
+ float32 *d1 = vd + sizeof(ARMVectorReg);
+
+ for (i = 0; i < n; ++i) {
+ float32 v0 = float16_to_float32(s[H2(i * 2 + 0)], true, fpst);
+ float32 v1 = float16_to_float32(s[H2(i * 2 + 1)], true, fpst);
+ d0[H4(i)] = v0;
+ d1[H4(i)] = v1;
+ }
+}
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 777ea15a80..2b45244e23 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -1288,3 +1288,8 @@ TRANS_FEAT(FCVT_n, aa64_sme2, do_zz_fpst, a, 0,
FPST_A64, gen_helper_sme2_fcvt_n)
TRANS_FEAT(FCVTN, aa64_sme2, do_zz_fpst, a, 0,
FPST_A64, gen_helper_sme2_fcvtn)
+
+TRANS_FEAT(FCVT_w, aa64_sme2_f16f16, do_zz_fpst, a, 0,
+ FPST_A64_F16, gen_helper_sme2_fcvt_w)
+TRANS_FEAT(FCVTL, aa64_sme2_f16f16, do_zz_fpst, a, 0,
+ FPST_A64_F16, gen_helper_sme2_fcvtl)
diff --git a/target/arm/tcg/sme.decode b/target/arm/tcg/sme.decode
index 8cca7d0d46..644794bdc1 100644
--- a/target/arm/tcg/sme.decode
+++ b/target/arm/tcg/sme.decode
@@ -725,9 +725,14 @@ FMLS_nx_d 11000001 1101 .... 1 .. 00. ...00 10 ...
@azx_4x1_i1_o3
&zz_n zd zn n
@zz_1x2 ........ ... ..... ...... ..... zd:5 \
&zz_n n=1 zn=%zn_ax2
+@zz_2x1 ........ ... ..... ...... zn:5 ..... \
+ &zz_n n=1 zd=%zd_ax2
BFCVT 11000001 011 00000 111000 ....0 ..... @zz_1x2
BFCVTN 11000001 011 00000 111000 ....1 ..... @zz_1x2
FCVT_n 11000001 001 00000 111000 ....0 ..... @zz_1x2
FCVTN 11000001 001 00000 111000 ....1 ..... @zz_1x2
+
+FCVT_w 11000001 101 00000 111000 ..... ....0 @zz_2x1
+FCVTL 11000001 101 00000 111000 ..... ....1 @zz_2x1
--
2.43.0
- [PATCH 54/61] target/arm: Implement SME2 SUNPK, UUNPK, (continued)
- [PATCH 54/61] target/arm: Implement SME2 SUNPK, UUNPK, Richard Henderson, 2025/02/06
- [PATCH 48/61] target/arm: Implement SME2 FCVTZS, FCVTZU, Richard Henderson, 2025/02/06
- [PATCH 51/61] target/arm: Introduce do_[us]sat_[bhs] macros, Richard Henderson, 2025/02/06
- [PATCH 45/61] target/arm: Remove CPUARMState.vfp.scratch, Richard Henderson, 2025/02/06
- [PATCH 49/61] target/arm: Implement SME2 SCVTF, UCVTF, Richard Henderson, 2025/02/06
- [PATCH 50/61] target/arm: Implement SME2 FRINTN, FRINTP, FRINTM, FRINTA, Richard Henderson, 2025/02/06
- [PATCH 60/61] target/arm: Implement SME2 SEL, Richard Henderson, 2025/02/06
- [PATCH 61/61] target/arm: Enable FEAT_SME2, FEAT_SME_F16F16, FEAT_SVE_B16B16 on -cpu max, Richard Henderson, 2025/02/06
- [PATCH 55/61] target/arm: Implement SME2 ZIP, UZP (four registers), Richard Henderson, 2025/02/06
- [PATCH 44/61] target/arm: Implement SME2 FADD, FSUB, BFADD, BFSUB, Richard Henderson, 2025/02/06
- [PATCH 47/61] target/arm: Implement SME2 FCVT (widening), FCVTL,
Richard Henderson <=
- [PATCH 53/61] target/arm: Implement SME2 SQCVT, UQCVT, SQCVTU, Richard Henderson, 2025/02/06
- [PATCH 58/61] target/arm: Implement SME2 ZIP, UZP (two registers), Richard Henderson, 2025/02/06
- [PATCH 59/61] target/arm: Implement SME2 FCLAMP, SCLAMP, UCLAMP, Richard Henderson, 2025/02/06
- [PATCH 52/61] target/arm: Use do_[us]sat_[bhs] in sve_helper.c, Richard Henderson, 2025/02/06
- [PATCH 56/61] target/arm: Move do_urshr, do_srshr to vec_internal.h, Richard Henderson, 2025/02/06
- [PATCH 57/61] target/arm: Implement SME2 SQRSHR, UQRSHR, SQRSHRN, Richard Henderson, 2025/02/06
- Re: [PATCH 00/61] target/arm: Implement FEAT_SME2, Richard Henderson, 2025/02/24
- Prev by Date:
[PATCH 44/61] target/arm: Implement SME2 FADD, FSUB, BFADD, BFSUB
- Next by Date:
[PATCH 53/61] target/arm: Implement SME2 SQCVT, UQCVT, SQCVTU
- Previous by thread:
[PATCH 44/61] target/arm: Implement SME2 FADD, FSUB, BFADD, BFSUB
- Next by thread:
[PATCH 53/61] target/arm: Implement SME2 SQCVT, UQCVT, SQCVTU
- Index(es):