qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5 09/60] target/riscv: vector single-width integer add and s


From: LIU Zhiwei
Subject: Re: [PATCH v5 09/60] target/riscv: vector single-width integer add and subtract
Date: Mon, 23 Mar 2020 16:10:06 +0800
User-agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.6.0



On 2020/3/14 13:25, Richard Henderson wrote:
On 3/12/20 7:58 AM, LIU Zhiwei wrote:
+    if (a->vm && s->vl_eq_vlmax) {                                 \
+        tcg_gen_gvec_##GVSUF(8 << s->sew, vreg_ofs(s, a->rd),      \
+            vreg_ofs(s, a->rs2), vreg_ofs(s, a->rs1),              \
+            MAXSZ(s), MAXSZ(s));                                   \
The first argument here should be just s->sew.
You should have see the assert fire:

    tcg_debug_assert(vece <= MO_64);

It would be nice to pull out the bulk of GEN_OPIVV_GVEC_TRANS as a function,
and pass in tcg_gen_gvec_* as a function pointer, and fns as a pointer.

In general, I prefer the functions that are generated by macros like this to
have exactly one executable statement -- the call to the helper that does all
of the work using the arguments provided.  That way a maximum number of lines
are available for stepping with the debugger.

+        data = "" VDATA, MLEN, s->mlen);                        \
+        data = "" VDATA, VM, a->vm);                            \
+        data = "" VDATA, LMUL, s->lmul);                        \
Why are these replicated in each trans_* function, and not done in opiv?_trans,
where the rest of the descriptor is created?

+/* OPIVX without GVEC IR */
+#define GEN_OPIVX_TRANS(NAME, CHECK)                                     \
+static bool trans_##NAME(DisasContext *s, arg_rmrr *a)                   \
+{                                                                        \
+    if (CHECK(s, a)) {                                                   \
+        uint32_t data = ""                                               \
+        static gen_helper_opivx const fns[4] = {                         \
+            gen_helper_##NAME##_b, gen_helper_##NAME##_h,                \
+            gen_helper_##NAME##_w, gen_helper_##NAME##_d,                \
+        };                                                               \
+                                                                         \
+        data = "" VDATA, MLEN, s->mlen);                   \
+        data = "" VDATA, VM, a->vm);                       \
+        data = "" VDATA, LMUL, s->lmul);                   \
+        return opivx_trans(a->rd, a->rs1, a->rs2, data, fns[s->sew], s); \
+    }                                                                    \
+    return false;                                                        \
+}
+
+GEN_OPIVX_TRANS(vrsub_vx, opivx_check)
Note that you *can* generate vector code for this,
you just have to write your own helpers.

E.g.

static void gen_vec_rsub8_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 a)
{
    tcg_gen_vec_sub8_i64(d, b, a);
}
// etc, reversing the arguments and passing on to sub.

static const GVecGen2s rsub_op[4] = {
    { .fni8 = tcg_gen_vec_rsub8_i64,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs8,
      .opt_opc = vecop_list_sub,
      .vece = MO_8 },
    { .fni8 = tcg_gen_vec_rsub16_i64,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs16,
      .opt_opc = vecop_list_sub,
      .vece = MO_16 },
    { .fni4 = tcg_gen_rsub_i32,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs32,
      .opt_opc = vecop_list_sub,
      .vece = MO_32 },
    { .fni8 = tcg_gen_rsub_i64,
      .fniv = tcg_gen_rsub_vec,
      .fno = gen_helper_gvec_rsubs64,
      .opt_opc = vecop_list_sub,
      .prefer_i64 = TCG_TARGET_REG_BITS == 64,
      .vece = MO_64 },
};

static void gen_gvec_rsubs(unsigned vece, uint32_t dofs,
    uint32_t aofs, TCGv_i64 c,
    uint32_t oprsz, uint32_t maxsz)
{
    tcg_debug_assert(vece <= MO_64);
    tcg_gen_gvec_2s(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
}

static void gen_gvec_rsubi(unsigned vece, uint32_t dofs,
    uint32_t aofs, int64_t c,
    uint32_t oprsz, uint32_t maxsz)
{
    tcg_debug_assert(vece <= MO_64);
    tcg_gen_gvec_2i(dofs, aofs, oprsz, maxsz, c, &rsub_op[vece]);
}
Hi Richard,

When I try to add GVEC IR rsubs,I find it is some difficult to keep it separate from tcg-runtime-gvec.c.

The .fno functions, e.g.,  gen_helper_gvec_rsubs8  need to be defined like

void HELPER(gvec_subs8)(void *d, void *a, uint64_t b, uint32_t desc)
{
    intptr_t oprsz = simd_oprsz(desc);
    vec8 vecb = (vec8)DUP16(b);
    intptr_t i;
    for (i = 0; i < oprsz; i += sizeof(vec8)) {
        *(vec8 *)(d + i) = vecb - *(vec8 *)(a + i);
    }
    clear_high(d, oprsz, desc);
}
   
The vec8 and DUP are defined in tcg-runtime-gvec.c. 

Should I declare them  in somewhere else, or just put HELPER(gvec_subs8) into tcg-runtime-gvec.c?

Zhiwei


      
+/* generate the helpers for OPIVV */
+#define GEN_VEXT_VV(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+    for (i = 0; i < vl; i++) {                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        do_##NAME(vd, vs1, vs2, i);                       \
+    }                                                     \
+    if (i != 0) {                                         \
+        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
+    }                                                     \
+}
+
+GEN_VEXT_VV(vadd_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vadd_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vadd_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vadd_vv_d, 8, 8, clearq)
+GEN_VEXT_VV(vsub_vv_b, 1, 1, clearb)
+GEN_VEXT_VV(vsub_vv_h, 2, 2, clearh)
+GEN_VEXT_VV(vsub_vv_w, 4, 4, clearl)
+GEN_VEXT_VV(vsub_vv_d, 8, 8, clearq)
The body of GEN_VEXT_VV can be an inline function, calling the helper functions
that you generated above.

+/*
+ * If XLEN < SEW, the value from the x register is sign-extended to SEW bits.
+ * So (target_long)s1 is need. (T1)(target_long)s1 gives the real operator type.
+ * (TX1)(T1)(target_long)s1 expands the operator type of widen operations
+ * or narrow operations
+ */
+#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP)             \
+static void do_##NAME(void *vd, target_ulong s1, void *vs2, int i)  \
+{                                                                   \
+    TX2 s2 = *((T2 *)vs2 + HS2(i));                                 \
+    *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)(target_long)s1);         \
+}
Why not just make the type of s1 be target_long in the parameter?

+/* generate the helpers for instructions with one vector and one sclar */
+#define GEN_VEXT_VX(NAME, ESZ, DSZ, CLEAR_FN)             \
+void HELPER(NAME)(void *vd, void *v0, target_ulong s1,    \
+        void *vs2, CPURISCVState *env, uint32_t desc)     \
+{                                                         \
+    uint32_t vlmax = vext_maxsz(desc) / ESZ;              \
+    uint32_t mlen = vext_mlen(desc);                      \
+    uint32_t vm = vext_vm(desc);                          \
+    uint32_t vl = env->vl;                                \
+    uint32_t i;                                           \
+                                                          \
+    for (i = 0; i < vl; i++) {                            \
+        if (!vm && !vext_elem_mask(v0, mlen, i)) {        \
+            continue;                                     \
+        }                                                 \
+        do_##NAME(vd, s1, vs2, i);                        \
+    }                                                     \
+    if (i != 0) {                                         \
+        CLEAR_FN(vd, vl, vl * DSZ,  vlmax * DSZ);         \
+    }                                                     \
+}
Likewise an inline function.


r~


reply via email to

[Prev in Thread] Current Thread [Next in Thread]