qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5 24/60] target/riscv: vector single-width averaging add and


From: LIU Zhiwei
Subject: Re: [PATCH v5 24/60] target/riscv: vector single-width averaging add and subtract
Date: Sun, 15 Mar 2020 07:12:54 +0800
User-agent: Mozilla/5.0 (Windows NT 10.0; WOW64; rv:68.0) Gecko/20100101 Thunderbird/68.5.0



On 2020/3/14 16:25, Richard Henderson wrote:
On 3/14/20 1:14 AM, Richard Henderson wrote:
I think you should have 4 versions of aadd8, for each of the rounding modes,

+RVVCALL(OPIVV2_ENV, vaadd_vv_b, OP_SSS_B, H1, H1, H1, aadd8)
then use this, or something like it, to define 4 functions containing main
loops, which will get the helper above inlined.
Alternately, a set of inlines, where a (constant) vxrm is passed down from 
above.

I am not sure whether I get it. In my opinion, the code should be modified like

static inline int8_t aadd8_rnu(CPURISCVState *env, int8_t a, int8_t b)
{
    int16_t res = (int16_t)a + (int16_t)b;
    uint8_t round = res & 0x1;
    res   = (res >> 1) + round;
    return res;
}

static inline int8_t aadd8_rne(CPURISCVState *env, int8_t a, int8_t b)
{
    int16_t res = (int16_t)a + (int16_t)b;
    uint8_t round = ((res & 0x3) == 0x3);
    res   = (res >> 1) + round;
    return res;
}

static inline int8_t aadd8_rdn(CPURISCVState *env, int8_t a, int8_t b)
{
    int16_t res = (int16_t)a + (int16_t)b;
    res   = (res >> 1);
    return res;
}

static inline int8_t aadd8_rod(CPURISCVState *env, int8_t a, int8_t b)
{
    int16_t res = (int16_t)a + (int16_t)b;
    uint8_t round = ((res & 0x3) == 0x1);
   res   = (res >> 1) + round;
    return res;
}

RVVCALL(OPIVV2_ENV, vaadd_vv_b_rnu, OP_SSS_B, H1, H1, H1, aadd8_rnu)
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rne, OP_SSS_B, H1, H1, H1, aadd8_rne)
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rdn, OP_SSS_B, H1, H1, H1, aadd8_rdn)
RVVCALL(OPIVV2_ENV, vaadd_vv_b_rod, OP_SSS_B, H1, H1, H1, aadd8_rod)

void do_vext_vv_env(void *vd, void *v0, void *vs1,
                    void *vs2, CPURISCVState *env, uint32_t desc,
                    uint32_t esz, uint32_t dsz,
                    opivv2_fn *fn, clear_fn *clearfn)
{
    uint32_t vlmax = vext_maxsz(desc) / esz;
    uint32_t mlen = vext_mlen(desc);
    uint32_t vm = vext_vm(desc);
    uint32_t vl = env->vl;
    uint32_t i;
    for (i = 0; i < vl; i++) {
        if (!vm && !vext_elem_mask(v0, mlen, i)) {
            continue;
        }
        fn(vd, vs1, vs2, i, env);
    }
    if (i != 0) {
        clear_fn(vd, vl, vl * dsz,  vlmax * dsz);
    }
}

#define GEN_VEXT_VV_ENV(NAME, ESZ, DSZ, CLEAR_FN)         \
void HELPER(NAME)(void *vd, void *v0, void *vs1,          \
                  void *vs2, CPURISCVState *env,          \
                  uint32_t desc)                          \
{                                                         \
    static opivv2_fn *fns[4] = {                          \
        NAME##_rnu, NAME##_rne,                           \
        NAME##_rdn, NAME##_rod                            \
    }                                                     \
    return do_vext_vv_env(vd, v0, vs1, vs2, env, desc,    \
                          ESZ, DSZ, fns[env->vxrm],       \
                          CLEAR_FN);                      \
}

Is it true?

Zhiwei

Then use a final outermost wrapper to select one of the 4 functions based on
env->vxrm.
The outermost wrapper could look like

     switch (env->vxrm) {
     case 0:  somefunc(some, args, 0); break;
     case 1:  somefunc(some, args, 1); break;
     case 2:  somefunc(some, args, 2); break;
     default: somefunc(some, args, 3); break;
     }

so that somefunc (and its subroutines) are expanded with a constant, and we
switch on that constant at the outermost level.


r~




reply via email to

[Prev in Thread] Current Thread [Next in Thread]