[PATCH v2 042/100] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN

qemu-arm

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 042/100] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN

From:	Richard Henderson
Subject:	[PATCH v2 042/100] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN
Date:	Wed, 17 Jun 2020 21:25:46 -0700

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/arm/helper-sve.h    | 16 +++++++
 target/arm/sve.decode      |  4 ++
 target/arm/sve_helper.c    | 24 ++++++++++
 target/arm/translate-sve.c | 98 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 142 insertions(+)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 3a7d7ff66d..371a1b02e0 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -1972,6 +1972,22 @@ DEF_HELPER_FLAGS_3(sve2_rshrnt_h, TCG_CALL_NO_RWG, void, 
ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(sve2_rshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 DEF_HELPER_FLAGS_3(sve2_rshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 7cc4b6cc43..cade628cfd 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1288,6 +1288,10 @@ SQXTUNT         01000101 .. 1 ..... 010 101 ..... .....  
@rd_rn_tszimm_shl
 ## SVE2 bitwise shift right narrow
 
 # Bit 23 == 0 is handled by esz > 0 in the translator.
+SQSHRUNB        01000101 .. 1 ..... 00 0000 ..... .....  @rd_rn_tszimm_shr
+SQSHRUNT        01000101 .. 1 ..... 00 0001 ..... .....  @rd_rn_tszimm_shr
+SQRSHRUNB       01000101 .. 1 ..... 00 0010 ..... .....  @rd_rn_tszimm_shr
+SQRSHRUNT       01000101 .. 1 ..... 00 0011 ..... .....  @rd_rn_tszimm_shr
 SHRNB           01000101 .. 1 ..... 00 0100 ..... .....  @rd_rn_tszimm_shr
 SHRNT           01000101 .. 1 ..... 00 0101 ..... .....  @rd_rn_tszimm_shr
 RSHRNB          01000101 .. 1 ..... 00 0110 ..... .....  @rd_rn_tszimm_shr
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 9b3d0d2ddd..01c717e27e 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1926,6 +1926,30 @@ DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1, 
DO_RSHR)
 DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, DO_RSHR)
 DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t,     , H1_4, DO_RSHR)
 
+#define DO_SQSHRUN_H(x, sh) MIN(MAX(x >> sh, 0), UINT8_MAX)
+#define DO_SQSHRUN_S(x, sh) MIN(MAX(x >> sh, 0), UINT16_MAX)
+#define DO_SQSHRUN_D(x, sh) MIN(MAX(x >> sh, 0), UINT32_MAX)
+
+DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H)
+DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S)
+DO_SHRNB(sve2_sqshrunb_d, int64_t, uint32_t, DO_SQSHRUN_D)
+
+DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H)
+DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S)
+DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t,     , H1_4, DO_SQSHRUN_D)
+
+#define DO_SQRSHRUN_H(x, sh) MIN(MAX(DO_RSHR(x, sh), 0), UINT8_MAX)
+#define DO_SQRSHRUN_S(x, sh) MIN(MAX(DO_RSHR(x, sh), 0), UINT16_MAX)
+#define DO_SQRSHRUN_D(x, sh) MIN(MAX(DO_RSHR(x, sh), 0), UINT32_MAX)
+
+DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H)
+DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S)
+DO_SHRNB(sve2_sqrshrunb_d, int64_t, uint32_t, DO_SQRSHRUN_D)
+
+DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H)
+DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S)
+DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t,     , H1_4, DO_SQRSHRUN_D)
+
 #undef DO_SHRNB
 #undef DO_SHRNT
 
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 81e44bb818..7e89d7b9a8 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -6424,6 +6424,104 @@ static bool trans_RSHRNT(DisasContext *s, arg_rri_esz 
*a)
     return do_sve2_shr_narrow(s, a, ops);
 }
 
+static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
+                             TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+
+    tcg_gen_sari_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, 0);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_umin_vec(vece, d, n, t);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_sqshrunb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrunb_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqshrunb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrunb_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqshrunb_vec,
+          .opt_opc = vec_list,
+          .fno = gen_helper_sve2_sqshrunb_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
+                             TCGv_vec n, int64_t shr)
+{
+    TCGv_vec t = tcg_temp_new_vec_matching(d);
+    int halfbits = 4 << vece;
+
+    tcg_gen_sari_vec(vece, n, n, shr);
+    tcg_gen_dupi_vec(vece, t, 0);
+    tcg_gen_smax_vec(vece, n, n, t);
+    tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+    tcg_gen_umin_vec(vece, n, n, t);
+    tcg_gen_shli_vec(vece, n, n, halfbits);
+    tcg_gen_bitsel_vec(vece, d, t, d, n);
+    tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const TCGOpcode vec_list[] = {
+        INDEX_op_shli_vec, INDEX_op_sari_vec,
+        INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+    };
+    static const GVecGen2i ops[3] = {
+        { .fniv = gen_sqshrunt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrunt_h,
+          .vece = MO_16 },
+        { .fniv = gen_sqshrunt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrunt_s,
+          .vece = MO_32 },
+        { .fniv = gen_sqshrunt_vec,
+          .opt_opc = vec_list,
+          .load_dest = true,
+          .fno = gen_helper_sve2_sqshrunt_d,
+          .vece = MO_64 },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_sqrshrunb_h },
+        { .fno = gen_helper_sve2_sqrshrunb_s },
+        { .fno = gen_helper_sve2_sqrshrunb_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
+{
+    static const GVecGen2i ops[3] = {
+        { .fno = gen_helper_sve2_sqrshrunt_h },
+        { .fno = gen_helper_sve2_sqrshrunt_s },
+        { .fno = gen_helper_sve2_sqrshrunt_d },
+    };
+    return do_sve2_shr_narrow(s, a, ops);
+}
+
 static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
                             gen_helper_gvec_4_ptr *fn)
 {
-- 
2.25.1

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v2 032/100] target/arm: Implement SVE2 bitwise permute, (continued)
- [PATCH v2 032/100] target/arm: Implement SVE2 bitwise permute, Richard Henderson, 2020/06/18
- [PATCH v2 035/100] target/arm: Implement SVE2 integer add/subtract long with carry, Richard Henderson, 2020/06/18
- [PATCH v2 036/100] target/arm: Implement SVE2 bitwise shift right and accumulate, Richard Henderson, 2020/06/18
- [PATCH v2 037/100] target/arm: Implement SVE2 bitwise shift and insert, Richard Henderson, 2020/06/18
- [PATCH v2 033/100] target/arm: Implement SVE2 complex integer add, Richard Henderson, 2020/06/18
- [PATCH v2 034/100] target/arm: Implement SVE2 integer absolute difference and accumulate long, Richard Henderson, 2020/06/18
- [PATCH v2 038/100] target/arm: Implement SVE2 integer absolute difference and accumulate, Richard Henderson, 2020/06/18
- [PATCH v2 039/100] target/arm: Implement SVE2 saturating extract narrow, Richard Henderson, 2020/06/18
- [PATCH v2 040/100] target/arm: Implement SVE2 floating-point pairwise, Richard Henderson, 2020/06/18
- [PATCH v2 041/100] target/arm: Implement SVE2 SHRN, RSHRN, Richard Henderson, 2020/06/18
- [PATCH v2 042/100] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN, Richard Henderson <=
- [PATCH v2 043/100] target/arm: Implement SVE2 UQSHRN, UQRSHRN, Richard Henderson, 2020/06/18
- [PATCH v2 044/100] target/arm: Implement SVE2 SQSHRN, SQRSHRN, Richard Henderson, 2020/06/18
- [PATCH v2 045/100] target/arm: Implement SVE2 WHILEGT, WHILEGE, WHILEHI, WHILEHS, Richard Henderson, 2020/06/18
- [PATCH v2 046/100] target/arm: Implement SVE2 WHILERW, WHILEWR, Richard Henderson, 2020/06/18
- [PATCH v2 048/100] target/arm: Implement SVE2 MATCH, NMATCH, Richard Henderson, 2020/06/18
- [PATCH v2 047/100] target/arm: Implement SVE2 bitwise ternary operations, Richard Henderson, 2020/06/18
- [PATCH v2 049/100] target/arm: Implement SVE2 saturating multiply-add long, Richard Henderson, 2020/06/18
- [PATCH v2 050/100] target/arm: Generalize inl_qrdmlah_* helper functions, Richard Henderson, 2020/06/18
- [PATCH v2 051/100] target/arm: Implement SVE2 saturating multiply-add high, Richard Henderson, 2020/06/18
- [PATCH v2 052/100] target/arm: Implement SVE2 integer multiply-add long, Richard Henderson, 2020/06/18

Prev by Date: [PATCH v2 041/100] target/arm: Implement SVE2 SHRN, RSHRN
Next by Date: [PATCH v2 043/100] target/arm: Implement SVE2 UQSHRN, UQRSHRN
Previous by thread: [PATCH v2 041/100] target/arm: Implement SVE2 SHRN, RSHRN
Next by thread: [PATCH v2 043/100] target/arm: Implement SVE2 UQSHRN, UQRSHRN
Index(es):
- Date
- Thread