qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v1 1/5] s390x/tcg: Implement VECTOR FIND ANY ELE


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH v1 1/5] s390x/tcg: Implement VECTOR FIND ANY ELEMENT EQUAL
Date: Thu, 23 May 2019 08:27:28 -0400
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.6.1

On 5/23/19 3:50 AM, David Hildenbrand wrote:
> /*
>  * Returns the number of bits composing one element.
>  */
> static uint8_t get_element_bits(uint8_t es)
> {
>     return (1 << es) * BITS_PER_BYTE;
> }
> 
> /*
>  * Returns the bitmask for a single element.
>  */
> static uint64_t get_single_element_mask(uint8_t es)
> {
>     return -1ull >> (64 - get_element_bits(es));
> }
> 
> /*
>  * Returns the bitmask for a single element (excluding the MSB).
>  */
> static uint64_t get_single_element_lsbs_mask(uint8_t es)
> {
>     return -1ull >> (65 - get_element_bits(es));
> }
> 
> /*
>  * Returns the bitmasks for multiple elements (excluding the MSBs).
>  */
> static uint64_t get_element_lsbs_mask(uint8_t es)
> {
>     return dup_const(es, get_single_element_lsbs_mask(es));
> }
> 
> static int vfae(void *v1, const void *v2, const void *v3, bool in,
>                 bool rt, bool zs, uint8_t es)
> {
>     const uint64_t mask = get_element_lsbs_mask(es);
>     const int bits = get_element_bits(es);
>     uint64_t a0, a1, b0, b1, e0, e1, t0, t1, z0, z1;
>     uint64_t first_zero = 16;
>     uint64_t first_equal;
>     int i;
> 
>     a0 = s390_vec_read_element64(v2, 0);
>     a1 = s390_vec_read_element64(v2, 1);
>     b0 = s390_vec_read_element64(v3, 0);
>     b1 = s390_vec_read_element64(v3, 1);
>     e0 = 0;
>     e1 = 0;
>     /* compare against equality with every other element */
>     for (i = 0; i < 64; i += bits) {
>         t0 = i ? rol64(b0, i) : b0;
>         t1 = i ? rol64(b1, i) : b1;
>         e0 |= zero_search(a0 ^ t0, mask);
>         e0 |= zero_search(a0 ^ t1, mask);
>         e1 |= zero_search(a1 ^ t0, mask);
>         e1 |= zero_search(a1 ^ t1, mask);
>     }

I don't see that this is doing what you want.  You're shifting one element of B
down, but not broadcasting it so that it is compared against every element of A.

I'd expect something like

        t0 = dup_const(es, b0 >> i);
        t1 = dup_const(es, b1 >> i);

(I also don't see what rol is getting you that shift doesn't.)


>     /* invert the result if requested - invert only the MSBs */
>     if (in) {
>         e0 = ~e0 & ~mask;
>         e1 = ~e1 & ~mask;
>     }
>     first_equal = match_index(e0, e1);
> 
>     if (zs) {
>         z0 = zero_search(a0, mask);
>         z1 = zero_search(a1, mask);
>         first_zero = match_index(z0, z1);
>     }
> 
>     if (rt) {
>         e0 = (e0 >> (bits - 1)) * get_single_element_mask(es);
>         e1 = (e1 >> (bits - 1)) * get_single_element_mask(es);
>         s390_vec_write_element64(v1, 0, e0);
>         s390_vec_write_element64(v1, 1, e1);
>     } else {
>         s390_vec_write_element64(v1, 0, MIN(first_equal, first_zero));
>         s390_vec_write_element64(v1, 1, 0);
>     }
> 
>     if (first_zero == 16 && first_equal == 16) {
>         return 3; /* no match */
>     } else if (first_zero == 16) {
>         return 1; /* matching elements, no match for zero */
>     } else if (first_equal < first_zero) {
>         return 2; /* matching elements before match for zero */
>     }
>     return 0; /* match for zero */
> }

The rest of this looks good.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]