[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 06/21] target/arm: Convert Neon VCVT f16/f32 insns to decodetree
From: |
Peter Maydell |
Subject: |
[PATCH 06/21] target/arm: Convert Neon VCVT f16/f32 insns to decodetree |
Date: |
Tue, 16 Jun 2020 18:08:29 +0100 |
Convert the Neon insns in the 2-reg-misc group which are
VCVT between f32 and f16 to decodetree.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/neon-dp.decode | 3 ++
target/arm/translate-neon.inc.c | 96 +++++++++++++++++++++++++++++++++
target/arm/translate.c | 65 ++--------------------
3 files changed, 102 insertions(+), 62 deletions(-)
diff --git a/target/arm/neon-dp.decode b/target/arm/neon-dp.decode
index 0102aa7254b..8174f2f92f4 100644
--- a/target/arm/neon-dp.decode
+++ b/target/arm/neon-dp.decode
@@ -461,6 +461,9 @@ Vimm_1r 1111 001 . 1 . 000 ... .... cmode:4 0 .
op:1 1 .... @1reg_imm
VQMOVN_U 1111 001 11 . 11 .. 10 .... 0 0101 1 . 0 .... @2misc_q0
VSHLL 1111 001 11 . 11 .. 10 .... 0 0110 0 . 0 .... @2misc_q0
+
+ VCVT_F16_F32 1111 001 11 . 11 .. 10 .... 0 1100 0 . 0 .... @2misc_q0
+ VCVT_F32_F16 1111 001 11 . 11 .. 10 .... 0 1110 0 . 0 .... @2misc_q0
]
# Subgroup for size != 0b11
diff --git a/target/arm/translate-neon.inc.c b/target/arm/translate-neon.inc.c
index 78239ec1c1b..d37be597cf4 100644
--- a/target/arm/translate-neon.inc.c
+++ b/target/arm/translate-neon.inc.c
@@ -3354,3 +3354,99 @@ static bool trans_VSHLL(DisasContext *s, arg_2misc *a)
tcg_temp_free_i32(rm1);
return true;
}
+
+static bool trans_VCVT_F16_F32(DisasContext *s, arg_2misc *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_fp16_spconv, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vm & 1) || (a->size != 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = get_fpstatus_ptr(true);
+ ahp = get_ahp_flag();
+ tmp = neon_load_reg(a->vm, 0);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+ tmp2 = neon_load_reg(a->vm, 1);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
+ tcg_gen_shli_i32(tmp2, tmp2, 16);
+ tcg_gen_or_i32(tmp2, tmp2, tmp);
+ tcg_temp_free_i32(tmp);
+ tmp = neon_load_reg(a->vm, 2);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
+ tmp3 = neon_load_reg(a->vm, 3);
+ neon_store_reg(a->vd, 0, tmp2);
+ gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
+ tcg_gen_shli_i32(tmp3, tmp3, 16);
+ tcg_gen_or_i32(tmp3, tmp3, tmp);
+ neon_store_reg(a->vd, 1, tmp3);
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(ahp);
+ tcg_temp_free_ptr(fpst);
+
+ return true;
+}
+
+static bool trans_VCVT_F32_F16(DisasContext *s, arg_2misc *a)
+{
+ TCGv_ptr fpst;
+ TCGv_i32 ahp, tmp, tmp2, tmp3;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_NEON) ||
+ !dc_isar_feature(aa32_fp16_spconv, s)) {
+ return false;
+ }
+
+ /* UNDEF accesses to D16-D31 if they don't exist. */
+ if (!dc_isar_feature(aa32_simd_r32, s) &&
+ ((a->vd | a->vm) & 0x10)) {
+ return false;
+ }
+
+ if ((a->vd & 1) || (a->size != 1)) {
+ return false;
+ }
+
+ if (!vfp_access_check(s)) {
+ return true;
+ }
+
+ fpst = get_fpstatus_ptr(true);
+ ahp = get_ahp_flag();
+ tmp3 = tcg_temp_new_i32();
+ tmp = neon_load_reg(a->vm, 0);
+ tmp2 = neon_load_reg(a->vm, 1);
+ tcg_gen_ext16u_i32(tmp3, tmp);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+ neon_store_reg(a->vd, 0, tmp3);
+ tcg_gen_shri_i32(tmp, tmp, 16);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
+ neon_store_reg(a->vd, 1, tmp);
+ tmp3 = tcg_temp_new_i32();
+ tcg_gen_ext16u_i32(tmp3, tmp2);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
+ neon_store_reg(a->vd, 2, tmp3);
+ tcg_gen_shri_i32(tmp2, tmp2, 16);
+ gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
+ neon_store_reg(a->vd, 3, tmp2);
+ tcg_temp_free_i32(ahp);
+ tcg_temp_free_ptr(fpst);
+
+ return true;
+}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 94d5e34fff4..1ea09695546 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -4860,7 +4860,7 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t
insn)
int pass;
int u;
int vec_size;
- TCGv_i32 tmp, tmp2, tmp3;
+ TCGv_i32 tmp, tmp2;
if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
return 1;
@@ -4927,6 +4927,8 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t
insn)
case NEON_2RM_VZIP:
case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
case NEON_2RM_VSHLL:
+ case NEON_2RM_VCVT_F16_F32:
+ case NEON_2RM_VCVT_F32_F16:
/* handled by decodetree */
return 1;
case NEON_2RM_VTRN:
@@ -4942,67 +4944,6 @@ static int disas_neon_data_insn(DisasContext *s,
uint32_t insn)
goto elementwise;
}
break;
- case NEON_2RM_VCVT_F16_F32:
- {
- TCGv_ptr fpst;
- TCGv_i32 ahp;
-
- if (!dc_isar_feature(aa32_fp16_spconv, s) ||
- q || (rm & 1)) {
- return 1;
- }
- fpst = get_fpstatus_ptr(true);
- ahp = get_ahp_flag();
- tmp = neon_load_reg(rm, 0);
- gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
- tmp2 = neon_load_reg(rm, 1);
- gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
- tcg_gen_shli_i32(tmp2, tmp2, 16);
- tcg_gen_or_i32(tmp2, tmp2, tmp);
- tcg_temp_free_i32(tmp);
- tmp = neon_load_reg(rm, 2);
- gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
- tmp3 = neon_load_reg(rm, 3);
- neon_store_reg(rd, 0, tmp2);
- gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
- tcg_gen_shli_i32(tmp3, tmp3, 16);
- tcg_gen_or_i32(tmp3, tmp3, tmp);
- neon_store_reg(rd, 1, tmp3);
- tcg_temp_free_i32(tmp);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
- case NEON_2RM_VCVT_F32_F16:
- {
- TCGv_ptr fpst;
- TCGv_i32 ahp;
- if (!dc_isar_feature(aa32_fp16_spconv, s) ||
- q || (rd & 1)) {
- return 1;
- }
- fpst = get_fpstatus_ptr(true);
- ahp = get_ahp_flag();
- tmp3 = tcg_temp_new_i32();
- tmp = neon_load_reg(rm, 0);
- tmp2 = neon_load_reg(rm, 1);
- tcg_gen_ext16u_i32(tmp3, tmp);
- gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
- neon_store_reg(rd, 0, tmp3);
- tcg_gen_shri_i32(tmp, tmp, 16);
- gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
- neon_store_reg(rd, 1, tmp);
- tmp3 = tcg_temp_new_i32();
- tcg_gen_ext16u_i32(tmp3, tmp2);
- gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
- neon_store_reg(rd, 2, tmp3);
- tcg_gen_shri_i32(tmp2, tmp2, 16);
- gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
- neon_store_reg(rd, 3, tmp2);
- tcg_temp_free_i32(ahp);
- tcg_temp_free_ptr(fpst);
- break;
- }
case NEON_2RM_AESE: case NEON_2RM_AESMC:
if (!dc_isar_feature(aa32_aes, s) || ((rm | rd) & 1)) {
return 1;
--
2.20.1
- [PATCH 01/21] target/arm: Convert Neon 2-reg-misc VREV64 to decodetree, (continued)
- [PATCH 01/21] target/arm: Convert Neon 2-reg-misc VREV64 to decodetree, Peter Maydell, 2020/06/16
- [PATCH 02/21] target/arm: Convert Neon 2-reg-misc pairwise ops to decodetree, Peter Maydell, 2020/06/16
- [PATCH 03/21] target/arm: Convert VZIP, VUZP to decodetree, Peter Maydell, 2020/06/16
- [PATCH 04/21] target/arm: Convert Neon narrowing moves to decodetree, Peter Maydell, 2020/06/16
- [PATCH 05/21] target/arm: Convert Neon 2-reg-misc VSHLL to decodetree, Peter Maydell, 2020/06/16
- [PATCH 06/21] target/arm: Convert Neon VCVT f16/f32 insns to decodetree,
Peter Maydell <=
- [PATCH 07/21] target/arm: Convert vectorised 2-reg-misc Neon ops to decodetree, Peter Maydell, 2020/06/16
- [PATCH 08/21] target/arm: Convert Neon 2-reg-misc crypto operations to decodetree, Peter Maydell, 2020/06/16
- [PATCH 09/21] target/arm: Rename NeonGenOneOpFn to NeonGenOne64OpFn, Peter Maydell, 2020/06/16
- [PATCH 10/21] target/arm: Fix capitalization in NeonGenTwo{Single, Double}OPFn typedefs, Peter Maydell, 2020/06/16
- [PATCH 12/21] target/arm: Convert Neon 2-reg-misc VREV32 and VREV16 to decodetree, Peter Maydell, 2020/06/16