Re: [Qemu-devel] [PATCH v6 05/26] tcg: Add generic vector expanders

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v6 05/26] tcg: Add generic vector expanders

From:	Kirill Batuzov
Subject:	Re: [Qemu-devel] [PATCH v6 05/26] tcg: Add generic vector expanders
Date:	Wed, 6 Dec 2017 13:21:16 +0300 (MSK)
User-agent:	Alpine 2.20 (DEB 67 2015-01-07)

On Tue, 21 Nov 2017, Richard Henderson wrote:

> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> new file mode 100644
> index 0000000000..925c293f9c
> --- /dev/null
> +++ b/tcg/tcg-op-gvec.c

<...>

> +/* Set OPRSZ bytes at DOFS to replications of IN or IN_C.  */
> +static void do_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
> +                       uint32_t maxsz, TCGv_i32 in, uint32_t in_c,
> +                       void (*ool)(TCGv_ptr, TCGv_i32, TCGv_i32))
> +{
> +    TCGv_vec t_vec;
> +    uint32_t i;
> +
> +    if (TCG_TARGET_HAS_v256 && check_size_impl(oprsz, 32)) {
> +        t_vec = tcg_temp_new_vec(TCG_TYPE_V256);
> +    } else if (TCG_TARGET_HAS_v128 && check_size_impl(oprsz, 16)) {
> +        t_vec = tcg_temp_new_vec(TCG_TYPE_V128);
> +    } else if (TCG_TARGET_HAS_v64 && check_size_impl(oprsz, 8)) {
> +        t_vec = tcg_temp_new_vec(TCG_TYPE_V64);
> +    } else  {
> +        TCGv_i32 t_i32 = in ? in : tcg_const_i32(in_c);
> +
> +        if (check_size_impl(oprsz, 4)) {
> +            for (i = 0; i < oprsz; i += 4) {
> +                tcg_gen_st_i32(t_i32, cpu_env, dofs + i);
> +            }

You are ignoring VECE argument here and always duplicate a 32-bit value.
For this to be correct for smaller VECE, you need to prepare this 32-bit
value beforehand. The current code produces incorrect expansion:

IN: 
0x004005e8:  d2824682  movz     x2, #0x1234
0x004005ec:  4e010c41  dup      v1.16b, w2

OP:

 ---- 00000000004005e8 0000000000000000 0000000000000000
 movi_i64 x2,$0x1234

 ---- 00000000004005ec 0000000000000000 0000000000000000
 st_i32 x2,env,$0x8b0
 st_i32 x2,env,$0x8b4
 st_i32 x2,env,$0x8b8
 st_i32 x2,env,$0x8bc



> +            if (in == NULL) {
> +                tcg_temp_free_i32(t_i32);
> +            }
> +            goto done;
> +        } else {
> +            TCGv_ptr a0 = tcg_temp_new_ptr();
> +            TCGv_i32 desc = tcg_const_i32(simd_desc(oprsz, maxsz, 0));
> +
> +            tcg_gen_addi_ptr(a0, cpu_env, dofs);
> +            ool(a0, desc, t_i32);
> +
> +            tcg_temp_free_ptr(a0);
> +            tcg_temp_free_i32(desc);
> +            if (in == NULL) {
> +                tcg_temp_free_i32(t_i32);
> +            }
> +            return;
> +        }
> +    }
> +
> +    if (in) {
> +        tcg_gen_dup_i32_vec(vece, t_vec, in);
> +    } else {
> +        tcg_gen_dup32i_vec(t_vec, in_c);

May be it'll be better to call this function tcg_gen_dupi_i32_vec?
This will go along with current naming conventions like tcg_gen_addi_i32.

> +    }
> +
> +    i = 0;
> +    if (TCG_TARGET_HAS_v256) {
> +        for (; i + 32 <= oprsz; i += 32) {
> +            tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V256);
> +        }
> +    }
> +    if (TCG_TARGET_HAS_v128) {
> +        for (; i + 16 <= oprsz; i += 16) {
> +            tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V128);
> +        }
> +    }
> +    if (TCG_TARGET_HAS_v64) {
> +        for (; i < oprsz; i += 8) {
> +            tcg_gen_stl_vec(t_vec, cpu_env, dofs + i, TCG_TYPE_V64);
> +        }
> +    }
> +
> + done:
> +    tcg_debug_assert(i == oprsz);
> +    if (i < maxsz) {
> +        expand_clr(dofs + i, maxsz - i);
> +    }
> +}
> +

<...>
(the following is context)

> +void tcg_gen_gvec_dup_i32(unsigned vece, uint32_t dofs, uint32_t oprsz,
> +                          uint32_t maxsz, TCGv_i32 in)
> +{
> +    typedef void dup_fn(TCGv_ptr, TCGv_i32, TCGv_i32);
> +    static dup_fn * const fns[3] = {
> +        gen_helper_gvec_dup8,
> +        gen_helper_gvec_dup16,
> +        gen_helper_gvec_dup32
> +    };
> +
> +    check_size_align(oprsz, maxsz, dofs);
> +    tcg_debug_assert(vece <= MO_32);
> +    do_dup_i32(vece, dofs, oprsz, maxsz, in, 0, fns[vece]);
> +}
> +
> +void tcg_gen_gvec_dup_i64(unsigned vece, uint32_t dofs, uint32_t oprsz,
> +                          uint32_t maxsz, TCGv_i64 in)
> +{
> +    check_size_align(oprsz, maxsz, dofs);
> +    tcg_debug_assert(vece <= MO_64);
> +    if (vece <= MO_32) {
> +        /* This works for both host register sizes.  */
> +        tcg_gen_gvec_dup_i32(vece, dofs, oprsz, maxsz, (TCGv_i32)in);
> +    } else {
> +        do_dup_i64(vece, dofs, oprsz, maxsz, in, 0);
> +    }
> +}
> +
> +void tcg_gen_gvec_dup_mem(unsigned vece, uint32_t dofs, uint32_t aofs,
> +                          uint32_t oprsz, uint32_t maxsz)
> +{
> +    tcg_debug_assert(vece <= MO_64);
> +    if (vece <= MO_32) {
> +        TCGv_i32 in = tcg_temp_new_i32();
> +        switch (vece) {
> +        case MO_8:
> +            tcg_gen_ld8u_i32(in, cpu_env, aofs);
> +            break;
> +        case MO_16:
> +            tcg_gen_ld16u_i32(in, cpu_env, aofs);
> +            break;
> +        case MO_32:
> +            tcg_gen_ld_i32(in, cpu_env, aofs);
> +            break;
> +        }
> +        tcg_gen_gvec_dup_i32(vece, dofs, oprsz, maxsz, in);
> +        tcg_temp_free_i32(in);
> +    } else {
> +        TCGv_i64 in = tcg_temp_new_i64();
> +        tcg_gen_ld_i64(in, cpu_env, aofs);
> +        tcg_gen_gvec_dup_i64(MO_64, dofs, oprsz, maxsz, in);
> +        tcg_temp_free_i64(in);
> +    }
> +}
> +
> +void tcg_gen_gvec_dup64i(uint32_t dofs, uint32_t oprsz,
> +                         uint32_t maxsz, uint64_t x)
> +{
> +    check_size_align(oprsz, maxsz, dofs);
> +    do_dup_i64(MO_64, dofs, oprsz, maxsz, NULL, x);
> +}
> +
> +void tcg_gen_gvec_dup32i(uint32_t dofs, uint32_t oprsz,
> +                         uint32_t maxsz, uint32_t x)
> +{
> +    if (TCG_TARGET_REG_BITS == 64) {
> +        do_dup_i64(MO_64, dofs, oprsz, maxsz, NULL, deposit64(x, 32, 32, x));
> +    } else {
> +        do_dup_i32(MO_32, dofs, oprsz, maxsz, NULL, x, 
> gen_helper_gvec_dup32);
> +    }
> +}
> +
> +void tcg_gen_gvec_dup16i(uint32_t dofs, uint32_t oprsz,
> +                         uint32_t maxsz, uint16_t x)
> +{
> +    tcg_gen_gvec_dup32i(dofs, oprsz, maxsz, 0x00010001 * x);
> +}
> +
> +void tcg_gen_gvec_dup8i(uint32_t dofs, uint32_t oprsz,
> +                         uint32_t maxsz, uint8_t x)
> +{
> +    tcg_gen_gvec_dup32i(dofs, oprsz, maxsz, 0x01010101 * x);
> +}
> +

-- 
Kirill

[Prev in Thread]

Current Thread

[Next in Thread]

Re: [Qemu-devel] [PATCH v6 05/26] tcg: Add generic vector expanders, Kirill Batuzov <=
- Re: [Qemu-devel] [PATCH v6 05/26] tcg: Add generic vector expanders, Richard Henderson, 2017/12/08

Prev by Date: Re: [Qemu-devel] [PATCH 2.12] spapr: replace numa_get_node() with lookup in pc-dimm list
Next by Date: Re: [Qemu-devel] [PATCH v2 0/5] target/m68k: implement 680x0 FPU (part 3)
Previous by thread: [Qemu-devel] [PATCH v2 0/4] Rewrite TCP packet comparison in colo
Next by thread: Re: [Qemu-devel] [PATCH v6 05/26] tcg: Add generic vector expanders
Index(es):
- Date
- Thread