[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reve
From: |
David Hildenbrand |
Subject: |
Re: [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements |
Date: |
Mon, 21 Mar 2022 12:45:06 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.6.2 |
On 08.03.22 02:53, Richard Henderson wrote:
> From: David Miller <dmiller423@gmail.com>
>
> Signed-off-by: David Miller <dmiller423@gmail.com>
> Message-Id: <20220307020327.3003-6-dmiller423@gmail.com>
> [rth: Split out elements (plural) from element (scalar)
> Use tcg little-endian memory ops, plus hswap and wswap.]
> Signed-off-by: Richard Henderson <richard.henderson@linar.org>
> ---
> target/s390x/tcg/translate_vx.c.inc | 101 ++++++++++++++++++++++++++++
> target/s390x/tcg/insn-data.def | 4 ++
> 2 files changed, 105 insertions(+)
>
> diff --git a/target/s390x/tcg/translate_vx.c.inc
> b/target/s390x/tcg/translate_vx.c.inc
> index ac807122a3..9a82401d71 100644
> --- a/target/s390x/tcg/translate_vx.c.inc
> +++ b/target/s390x/tcg/translate_vx.c.inc
> @@ -457,6 +457,56 @@ static DisasJumpType op_vlrep(DisasContext *s, DisasOps
> *o)
> return DISAS_NEXT;
> }
>
> +static DisasJumpType op_vlbr(DisasContext *s, DisasOps *o)
> +{
> + const uint8_t es = get_field(s, m3);
> + TCGv_i64 t0, t1, tt;
> +
> + if (es < ES_16 || es > ES_128) {
> + gen_program_exception(s, PGM_SPECIFICATION);
> + return DISAS_NORETURN;
> + }
> +
> + t0 = tcg_temp_new_i64();
> + t1 = tcg_temp_new_i64();
> +
> + /* Begin with byte reversed doublewords... */
> + tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
> + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> + tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
> +
Would it make sense to just special-case ES_128, by loading them into
the proper t0/t1 right away?
if (es == ES_128) {
tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
goto write;
}
/* Begin with byte reversed doublewords... */
tcg_gen_qemu_ld_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
tcg_gen_qemu_ld_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
/*
* For 16 and 32-bit elements, the doubleword bswap also reversed
* the order of the elements. Perform a larger order swap to put
* them back into place.
*/
switch (es) {
...
}
write:
write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
> + /*
> + * For 16 and 32-bit elements, the doubleword bswap also reversed
> + * the order of the elements. Perform a larger order swap to put
> + * them back into place. For the 128-bit "element", finish the
> + * bswap by swapping the doublewords.
> + */
> + switch (es) {
> + case ES_16:
> + tcg_gen_hswap_i64(t0, t0);
> + tcg_gen_hswap_i64(t1, t1);
> + break;
> + case ES_32:
> + tcg_gen_wswap_i64(t0, t0);
> + tcg_gen_wswap_i64(t1, t1);
> + break;
> + case ES_64:
> + break;
> + case ES_128:
> + tt = t0, t0 = t1, t1 = tt;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + write_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
> + write_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
> +
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + return DISAS_NEXT;
> +}
> +
> static DisasJumpType op_vle(DisasContext *s, DisasOps *o)
> {
> const uint8_t es = s->insn->data;
> @@ -998,6 +1048,57 @@ static DisasJumpType op_vst(DisasContext *s, DisasOps
> *o)
> return DISAS_NEXT;
> }
>
> +static DisasJumpType op_vstbr(DisasContext *s, DisasOps *o)
> +{
> + const uint8_t es = get_field(s, m3);
> + TCGv_i64 t0, t1, tt;
> +
> + if (es < ES_16 || es > ES_128) {
> + gen_program_exception(s, PGM_SPECIFICATION);
> + return DISAS_NORETURN;
> + }
> +
> + /* Probe write access before actually modifying memory */
> + gen_helper_probe_write_access(cpu_env, o->addr1, tcg_constant_i64(16));
> +
> + t0 = tcg_temp_new_i64();
> + t1 = tcg_temp_new_i64();
> + read_vec_element_i64(t0, get_field(s, v1), 0, ES_64);
> + read_vec_element_i64(t1, get_field(s, v1), 1, ES_64);
Dito, eventually just special case on MO_128 directly.
> +
> + /*
> + * For 16 and 32-bit elements, the doubleword bswap below will
> + * reverse the order of the elements. Perform a larger order
> + * swap to put them back into place. For the 128-bit "element",
> + * finish the bswap by swapping the doublewords.
> + */
> + switch (es) {
> + case MO_16:
> + tcg_gen_hswap_i64(t0, t0);
> + tcg_gen_hswap_i64(t1, t1);
> + break;
> + case MO_32:
> + tcg_gen_wswap_i64(t0, t0);
> + tcg_gen_wswap_i64(t1, t1);
> + break;
> + case MO_64:
> + break;
> + case MO_128:
> + tt = t0, t0 = t1, t1 = tt;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + tcg_gen_qemu_st_i64(t0, o->addr1, get_mem_index(s), MO_LEUQ);
> + gen_addi_and_wrap_i64(s, o->addr1, o->addr1, 8);
> + tcg_gen_qemu_st_i64(t1, o->addr1, get_mem_index(s), MO_LEUQ);
> +
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + return DISAS_NEXT;
> +}
> +
> static DisasJumpType op_vste(DisasContext *s, DisasOps *o)
> {
> const uint8_t es = s->insn->data;
> diff --git a/target/s390x/tcg/insn-data.def b/target/s390x/tcg/insn-data.def
> index b524541a7d..ee6e1dc9e5 100644
> --- a/target/s390x/tcg/insn-data.def
> +++ b/target/s390x/tcg/insn-data.def
> @@ -1027,6 +1027,8 @@
> F(0xe756, VLR, VRR_a, V, 0, 0, 0, 0, vlr, 0, IF_VEC)
> /* VECTOR LOAD AND REPLICATE */
> F(0xe705, VLREP, VRX, V, la2, 0, 0, 0, vlrep, 0, IF_VEC)
> +/* VECTOR LOAD BYTE REVERSED ELEMENTS */
> + F(0xe606, VLBR, VRX, VE2, la2, 0, 0, 0, vlbr, 0, IF_VEC)
> /* VECTOR LOAD ELEMENT */
> E(0xe700, VLEB, VRX, V, la2, 0, 0, 0, vle, 0, ES_8, IF_VEC)
> E(0xe701, VLEH, VRX, V, la2, 0, 0, 0, vle, 0, ES_16, IF_VEC)
> @@ -1079,6 +1081,8 @@
> F(0xe75f, VSEG, VRR_a, V, 0, 0, 0, 0, vseg, 0, IF_VEC)
> /* VECTOR STORE */
> F(0xe70e, VST, VRX, V, la2, 0, 0, 0, vst, 0, IF_VEC)
> +/* VECTOR STORE BYTE REVERSED ELEMENTS */
> + F(0xe60e, VSTBR, VRX, VE2, la2, 0, 0, 0, vstbr, 0, IF_VEC)
> /* VECTOR STORE ELEMENT */
> E(0xe708, VSTEB, VRX, V, la2, 0, 0, 0, vste, 0, ES_8, IF_VEC)
> E(0xe709, VSTEH, VRX, V, la2, 0, 0, 0, vste, 0, ES_16, IF_VEC)
--
Thanks,
David / dhildenb
- Re: [PATCH v3 02/11] target/s390x: vxeh2: vector convert short/32b, (continued)
- [PATCH v3 03/11] target/s390x: vxeh2: vector string search, Richard Henderson, 2022/03/07
- [PATCH v3 04/11] target/s390x: vxeh2: Update for changes to vector shifts, Richard Henderson, 2022/03/07
- [PATCH v3 05/11] target/s390x: vxeh2: vector shift double by bit, Richard Henderson, 2022/03/07
- [PATCH v3 06/11] target/s390x: vxeh2: vector {load, store} elements reversed, Richard Henderson, 2022/03/07
- [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements, Richard Henderson, 2022/03/07
- Re: [PATCH v3 07/11] target/s390x: vxeh2: vector {load, store} byte reversed elements,
David Hildenbrand <=
- [PATCH v3 08/11] target/s390x: vxeh2: vector {load, store} byte reversed element, Richard Henderson, 2022/03/07
- [PATCH v3 09/11] target/s390x: add S390_FEAT_VECTOR_ENH2 to cpu max, Richard Henderson, 2022/03/07
- [PATCH v3 10/11] tests/tcg/s390x: Tests for Vector Enhancements Facility 2, Richard Henderson, 2022/03/07
- [PATCH v3 11/11] target/s390x: Fix writeback to v1 in helper_vstl, Richard Henderson, 2022/03/07
- Re: [PATCH v3 00/11] s390x/tcg: Implement Vector-Enhancements Facility 2, David Miller, 2022/03/07