[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 07/20] target/loongarch: Add fixed point arithmetic instructi
From: |
Song Gao |
Subject: |
Re: [PATCH 07/20] target/loongarch: Add fixed point arithmetic instruction translation |
Date: |
Fri, 2 Jul 2021 16:15:24 +0800 |
User-agent: |
Mozilla/5.0 (X11; Linux mips64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1 |
On 07/02/2021 04:31 AM, Philippe Mathieu-Daudé wrote:
> On 6/28/21 2:04 PM, Song Gao wrote:
>> This patch implement fixed point arithemtic instruction translation.
>>
>> This includes:
>> - ADD.{W/D}, SUB.{W/D}
>> - ADDI.{W/D}, ADDU16ID
>> - ALSL.{W[U]/D}
>> - LU12I.W, LU32I.D LU52I.D
>> - SLT[U], SLT[U]I
>> - PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
>> - AND, OR, NOR, XOR, ANDN, ORN
>> - MUL.{W/D}, MULH.{W[U]/D[U]}
>> - MULW.D.W[U]
>> - DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
>> - ANDI, ORI, XORI
>>
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>> target/loongarch/insns.decode | 89 ++++++++
>> target/loongarch/instmap.h | 53 +++++
>> target/loongarch/trans.inc.c | 367 +++++++++++++++++++++++++++++++++
>> target/loongarch/translate.c | 458
>> ++++++++++++++++++++++++++++++++++++++++++
>> 4 files changed, 967 insertions(+)
>> create mode 100644 target/loongarch/insns.decode
>> create mode 100644 target/loongarch/instmap.h
>> create mode 100644 target/loongarch/trans.inc.c
>
>> diff --git a/target/loongarch/instmap.h b/target/loongarch/instmap.h
>> new file mode 100644
>> index 0000000..8844333
>> --- /dev/null
>> +++ b/target/loongarch/instmap.h
>> @@ -0,0 +1,53 @@
>> +/*
>> + * LoongArch emulation for qemu: instruction opcode
>> + *
>> + * Copyright (c) 2021 Loongson Technology Corporation Limited
>> + *
>> + * SPDX-License-Identifier: LGPL-2.1+
>> + */
>> +
>> +#ifndef TARGET_LOONGARCH_INSTMAP_H
>> +#define TARGET_LOONGARCH_INSTMAP_H
>> +
>> +/* fixed point opcodes */
>> +enum {
>> + LA_OPC_ADD_W = (0x00020 << 15),
>> + LA_OPC_ADD_D = (0x00021 << 15),
>> + LA_OPC_SUB_W = (0x00022 << 15),
>> + LA_OPC_SUB_D = (0x00023 << 15),
>> + LA_OPC_SLT = (0x00024 << 15),
>> + LA_OPC_SLTU = (0x00025 << 15),
>> + LA_OPC_NOR = (0x00028 << 15),
>> + LA_OPC_AND = (0x00029 << 15),
>> + LA_OPC_OR = (0x0002A << 15),
>> + LA_OPC_XOR = (0x0002B << 15),
>> + LA_OPC_MUL_W = (0x00038 << 15),
>> + LA_OPC_MULH_W = (0x00039 << 15),
>> + LA_OPC_MULH_WU = (0x0003A << 15),
>> + LA_OPC_MUL_D = (0x0003B << 15),
>> + LA_OPC_MULH_D = (0x0003C << 15),
>> + LA_OPC_MULH_DU = (0x0003D << 15),
>> + LA_OPC_DIV_W = (0x00040 << 15),
>> + LA_OPC_MOD_W = (0x00041 << 15),
>> + LA_OPC_DIV_WU = (0x00042 << 15),
>> + LA_OPC_MOD_WU = (0x00043 << 15),
>> + LA_OPC_DIV_D = (0x00044 << 15),
>> + LA_OPC_MOD_D = (0x00045 << 15),
>> + LA_OPC_DIV_DU = (0x00046 << 15),
>> + LA_OPC_MOD_DU = (0x00047 << 15),
>> +
>> + LA_OPC_ALSL_W = (0x0002 << 17),
>> + LA_OPC_ALSL_D = (0x0016 << 17)
>> +
>> +};
>> +
>> +/* 12 bit immediate opcodes */
>> +enum {
>> + LA_OPC_SLTI = (0x008 << 22),
>> + LA_OPC_SLTIU = (0x009 << 22),
>> + LA_OPC_ADDI_W = (0x00A << 22),
>> + LA_OPC_ADDI_D = (0x00B << 22),
>> + LA_OPC_ANDI = (0x00D << 22),
>> + LA_OPC_ORI = (0x00E << 22),
>> + LA_OPC_XORI = (0x00F << 22)
>> +};
>
> Opcode definitions shouldn't be needed when using decodetree.
> >> +/* loongarch mul and div */
>> +static void gen_loongarch_muldiv(DisasContext *ctx, int opc, int rd,
>> + int rj, int rk)
>> +{
>> + TCGv t0, t1;
>> +
>> + if (rd == 0) {
>> + /* Treat as NOP. */
>> + return;
>> + }
>> +
>> + t0 = tcg_temp_new();
>> + t1 = tcg_temp_new();
>> +
>> + gen_load_gpr(t0, rj);
>> + gen_load_gpr(t1, rk);
>> +
>> + switch (opc) {
>> + case LA_OPC_DIV_W:
>> + {
>> + TCGv t2 = tcg_temp_new();
>> + TCGv t3 = tcg_temp_new();
>> + tcg_gen_ext32s_tl(t0, t0);
>> + tcg_gen_ext32s_tl(t1, t1);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
>> + tcg_gen_and_tl(t2, t2, t3);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> + tcg_gen_or_tl(t2, t2, t3);
>> + tcg_gen_movi_tl(t3, 0);
>> + tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> + tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
>> + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MOD_W:
>> + {
>> + TCGv t2 = tcg_temp_new();
>> + TCGv t3 = tcg_temp_new();
>> + tcg_gen_ext32s_tl(t0, t0);
>> + tcg_gen_ext32s_tl(t1, t1);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
>> + tcg_gen_and_tl(t2, t2, t3);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> + tcg_gen_or_tl(t2, t2, t3);
>> + tcg_gen_movi_tl(t3, 0);
>> + tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> + tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
>> + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_DIV_WU:
>> + {
>> + TCGv t2 = tcg_const_tl(0);
>> + TCGv t3 = tcg_const_tl(1);
>> + tcg_gen_ext32u_tl(t0, t0);
>> + tcg_gen_ext32u_tl(t1, t1);
>> + tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> + tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
>> + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MOD_WU:
>> + {
>> + TCGv t2 = tcg_const_tl(0);
>> + TCGv t3 = tcg_const_tl(1);
>> + tcg_gen_ext32u_tl(t0, t0);
>> + tcg_gen_ext32u_tl(t1, t1);
>> + tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> + tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
>> + tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MUL_W:
>> + {
>> + TCGv_i32 t2 = tcg_temp_new_i32();
>> + TCGv_i32 t3 = tcg_temp_new_i32();
>> + tcg_gen_trunc_tl_i32(t2, t0);
>> + tcg_gen_trunc_tl_i32(t3, t1);
>> + tcg_gen_mul_i32(t2, t2, t3);
>> + tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
>> + tcg_temp_free_i32(t2);
>> + tcg_temp_free_i32(t3);
>> + }
>> + break;
>> + case LA_OPC_MULH_W:
>> + {
>> + TCGv_i32 t2 = tcg_temp_new_i32();
>> + TCGv_i32 t3 = tcg_temp_new_i32();
>> + tcg_gen_trunc_tl_i32(t2, t0);
>> + tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
>> + tcg_temp_free_i32(t2);
>> + tcg_temp_free_i32(t3);
>> + }
>> + break;
>> + case LA_OPC_MULH_WU:
>> + {
>> + TCGv_i32 t2 = tcg_temp_new_i32();
>> + TCGv_i32 t3 = tcg_temp_new_i32();
>> + tcg_gen_trunc_tl_i32(t2, t0);
>> + tcg_gen_trunc_tl_i32(t3, t1);
>> + tcg_gen_mulu2_i32(t2, t3, t2, t3);
>> + tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
>> + tcg_temp_free_i32(t2);
>> + tcg_temp_free_i32(t3);
>> + }
>> + break;
>> + case LA_OPC_DIV_D:
>> + {
>> + TCGv t2 = tcg_temp_new();
>> + TCGv t3 = tcg_temp_new();
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
>> + tcg_gen_and_tl(t2, t2, t3);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> + tcg_gen_or_tl(t2, t2, t3);
>> + tcg_gen_movi_tl(t3, 0);
>> + tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> + tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MOD_D:
>> + {
>> + TCGv t2 = tcg_temp_new();
>> + TCGv t3 = tcg_temp_new();
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
>> + tcg_gen_and_tl(t2, t2, t3);
>> + tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> + tcg_gen_or_tl(t2, t2, t3);
>> + tcg_gen_movi_tl(t3, 0);
>> + tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> + tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_DIV_DU:
>> + {
>> + TCGv t2 = tcg_const_tl(0);
>> + TCGv t3 = tcg_const_tl(1);
>> + tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> + tcg_gen_divu_i64(cpu_gpr[rd], t0, t1);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MOD_DU:
>> + {
>> + TCGv t2 = tcg_const_tl(0);
>> + TCGv t3 = tcg_const_tl(1);
>> + tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> + tcg_gen_remu_i64(cpu_gpr[rd], t0, t1);
>> + tcg_temp_free(t3);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MUL_D:
>> + tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
>> + break;
>> + case LA_OPC_MULH_D:
>> + {
>> + TCGv t2 = tcg_temp_new();
>> + tcg_gen_muls2_i64(t2, cpu_gpr[rd], t0, t1);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + case LA_OPC_MULH_DU:
>> + {
>> + TCGv t2 = tcg_temp_new();
>> + tcg_gen_mulu2_i64(t2, cpu_gpr[rd], t0, t1);
>> + tcg_temp_free(t2);
>> + }
>> + break;
>> + default:
>> + generate_exception_end(ctx, EXCP_INE);
>> + goto out;
>> + }
>> + out:
>> + tcg_temp_free(t0);
>> + tcg_temp_free(t1);
>> +
>> + return;
>> +}
>
>> +static bool trans_mul_w(DisasContext *ctx, arg_mul_w *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_MUL_W, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mulh_w(DisasContext *ctx, arg_mulh_w *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_MULH_W, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mulh_wu(DisasContext *ctx, arg_mulh_wu *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_MULH_WU, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mul_d(DisasContext *ctx, arg_mul_d *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_MUL_D, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mulh_d(DisasContext *ctx, arg_mulh_d *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_MULH_D, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mulh_du(DisasContext *ctx, arg_mulh_du *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_MULH_DU, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>
>> +static bool trans_div_w(DisasContext *ctx, arg_div_w *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_DIV_W, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mod_w(DisasContext *ctx, arg_mod_w *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_MOD_W, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_div_wu(DisasContext *ctx, arg_div_wu *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_DIV_WU, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mod_wu(DisasContext *ctx, arg_mod_wu *a)
>> +{
>> + gen_loongarch_muldiv(ctx, LA_OPC_MOD_WU, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_div_d(DisasContext *ctx, arg_div_d *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_DIV_D, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mod_d(DisasContext *ctx, arg_mod_d *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_MOD_D, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_div_du(DisasContext *ctx, arg_div_du *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_DIV_DU, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>> +
>> +static bool trans_mod_du(DisasContext *ctx, arg_mod_du *a)
>> +{
>> + check_loongarch_64(ctx);
>> + gen_loongarch_muldiv(ctx, LA_OPC_MOD_DU, a->rd, a->rj, a->rk);
>> + return true;
>> +}
>
> It seems you are missing what decodetree is for... You should inline
> each opcode code from gen_loongarch_muldiv in the opcode handler.
>
> Don't take MIPS as an example =)
>
Hi, Philippe,
I‘m not sure I understand right. Here is an example of my modification
static bool trans_xxx(DisasContext *ctx, arg_mul_w *a)
{
gen_loongarch_muldiv(ctx, a->rd, a->rj, a->rk);
return true;
}
...
static void gen_loongarch_muldiv(DisasContext *ctx, int rd,
int rj, int rk)
{
TCGv t0, t1;
if (rd == 0) {
/* Treat as NOP. */
return;
}
t0 = tcg_temp_new();
t1 = tcg_temp_new();
gen_load_gpr(t0, rj);
gen_load_gpr(t1, rk);
switch (ctx->opcode) {
case xxx_opcode:
/* translate xxx */
...
}
Is that right?
Have a nice weekend!
Song Gao