qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 07/20] target/loongarch: Add fixed point arithmetic instructi


From: Song Gao
Subject: Re: [PATCH 07/20] target/loongarch: Add fixed point arithmetic instruction translation
Date: Fri, 2 Jul 2021 16:15:24 +0800
User-agent: Mozilla/5.0 (X11; Linux mips64; rv:52.0) Gecko/20100101 Thunderbird/52.9.1

 
On 07/02/2021 04:31 AM, Philippe Mathieu-Daudé wrote:
> On 6/28/21 2:04 PM, Song Gao wrote:
>> This patch implement fixed point arithemtic instruction translation.
>>
>> This includes:
>> - ADD.{W/D}, SUB.{W/D}
>> - ADDI.{W/D}, ADDU16ID
>> - ALSL.{W[U]/D}
>> - LU12I.W, LU32I.D LU52I.D
>> - SLT[U], SLT[U]I
>> - PCADDI, PCADDU12I, PCADDU18I, PCALAU12I
>> - AND, OR, NOR, XOR, ANDN, ORN
>> - MUL.{W/D}, MULH.{W[U]/D[U]}
>> - MULW.D.W[U]
>> - DIV.{W[U]/D[U]}, MOD.{W[U]/D[U]}
>> - ANDI, ORI, XORI
>>
>> Signed-off-by: Song Gao <gaosong@loongson.cn>
>> ---
>>  target/loongarch/insns.decode |  89 ++++++++
>>  target/loongarch/instmap.h    |  53 +++++
>>  target/loongarch/trans.inc.c  | 367 +++++++++++++++++++++++++++++++++
>>  target/loongarch/translate.c  | 458 
>> ++++++++++++++++++++++++++++++++++++++++++
>>  4 files changed, 967 insertions(+)
>>  create mode 100644 target/loongarch/insns.decode
>>  create mode 100644 target/loongarch/instmap.h
>>  create mode 100644 target/loongarch/trans.inc.c
> 
>> diff --git a/target/loongarch/instmap.h b/target/loongarch/instmap.h
>> new file mode 100644
>> index 0000000..8844333
>> --- /dev/null
>> +++ b/target/loongarch/instmap.h
>> @@ -0,0 +1,53 @@
>> +/*
>> + * LoongArch emulation for qemu: instruction opcode
>> + *
>> + * Copyright (c) 2021 Loongson Technology Corporation Limited
>> + *
>> + * SPDX-License-Identifier: LGPL-2.1+
>> + */
>> +
>> +#ifndef TARGET_LOONGARCH_INSTMAP_H
>> +#define TARGET_LOONGARCH_INSTMAP_H
>> +
>> +/* fixed point opcodes */
>> +enum {
>> +    LA_OPC_ADD_W     = (0x00020 << 15),
>> +    LA_OPC_ADD_D     = (0x00021 << 15),
>> +    LA_OPC_SUB_W     = (0x00022 << 15),
>> +    LA_OPC_SUB_D     = (0x00023 << 15),
>> +    LA_OPC_SLT       = (0x00024 << 15),
>> +    LA_OPC_SLTU      = (0x00025 << 15),
>> +    LA_OPC_NOR       = (0x00028 << 15),
>> +    LA_OPC_AND       = (0x00029 << 15),
>> +    LA_OPC_OR        = (0x0002A << 15),
>> +    LA_OPC_XOR       = (0x0002B << 15),
>> +    LA_OPC_MUL_W     = (0x00038 << 15),
>> +    LA_OPC_MULH_W    = (0x00039 << 15),
>> +    LA_OPC_MULH_WU   = (0x0003A << 15),
>> +    LA_OPC_MUL_D     = (0x0003B << 15),
>> +    LA_OPC_MULH_D    = (0x0003C << 15),
>> +    LA_OPC_MULH_DU   = (0x0003D << 15),
>> +    LA_OPC_DIV_W     = (0x00040 << 15),
>> +    LA_OPC_MOD_W     = (0x00041 << 15),
>> +    LA_OPC_DIV_WU    = (0x00042 << 15),
>> +    LA_OPC_MOD_WU    = (0x00043 << 15),
>> +    LA_OPC_DIV_D     = (0x00044 << 15),
>> +    LA_OPC_MOD_D     = (0x00045 << 15),
>> +    LA_OPC_DIV_DU    = (0x00046 << 15),
>> +    LA_OPC_MOD_DU    = (0x00047 << 15),
>> +
>> +    LA_OPC_ALSL_W    = (0x0002 << 17),
>> +    LA_OPC_ALSL_D    = (0x0016 << 17)
>> +
>> +};
>> +
>> +/* 12 bit immediate opcodes */
>> +enum {
>> +    LA_OPC_SLTI      = (0x008 << 22),
>> +    LA_OPC_SLTIU     = (0x009 << 22),
>> +    LA_OPC_ADDI_W    = (0x00A << 22),
>> +    LA_OPC_ADDI_D    = (0x00B << 22),
>> +    LA_OPC_ANDI      = (0x00D << 22),
>> +    LA_OPC_ORI       = (0x00E << 22),
>> +    LA_OPC_XORI      = (0x00F << 22)
>> +};
> 
> Opcode definitions shouldn't be needed when using decodetree.
> >> +/* loongarch mul and div */
>> +static void gen_loongarch_muldiv(DisasContext *ctx, int opc, int rd,
>> +                                 int rj, int rk)
>> +{
>> +    TCGv t0, t1;
>> +
>> +    if (rd == 0) {
>> +        /* Treat as NOP. */
>> +        return;
>> +    }
>> +
>> +    t0 = tcg_temp_new();
>> +    t1 = tcg_temp_new();
>> +
>> +    gen_load_gpr(t0, rj);
>> +    gen_load_gpr(t1, rk);
>> +
>> +    switch (opc) {
>> +    case LA_OPC_DIV_W:
>> +        {
>> +            TCGv t2 = tcg_temp_new();
>> +            TCGv t3 = tcg_temp_new();
>> +            tcg_gen_ext32s_tl(t0, t0);
>> +            tcg_gen_ext32s_tl(t1, t1);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
>> +            tcg_gen_and_tl(t2, t2, t3);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> +            tcg_gen_or_tl(t2, t2, t3);
>> +            tcg_gen_movi_tl(t3, 0);
>> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> +            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
>> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MOD_W:
>> +        {
>> +            TCGv t2 = tcg_temp_new();
>> +            TCGv t3 = tcg_temp_new();
>> +            tcg_gen_ext32s_tl(t0, t0);
>> +            tcg_gen_ext32s_tl(t1, t1);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, INT_MIN);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1);
>> +            tcg_gen_and_tl(t2, t2, t3);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> +            tcg_gen_or_tl(t2, t2, t3);
>> +            tcg_gen_movi_tl(t3, 0);
>> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> +            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
>> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_DIV_WU:
>> +        {
>> +            TCGv t2 = tcg_const_tl(0);
>> +            TCGv t3 = tcg_const_tl(1);
>> +            tcg_gen_ext32u_tl(t0, t0);
>> +            tcg_gen_ext32u_tl(t1, t1);
>> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> +            tcg_gen_divu_tl(cpu_gpr[rd], t0, t1);
>> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MOD_WU:
>> +        {
>> +            TCGv t2 = tcg_const_tl(0);
>> +            TCGv t3 = tcg_const_tl(1);
>> +            tcg_gen_ext32u_tl(t0, t0);
>> +            tcg_gen_ext32u_tl(t1, t1);
>> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> +            tcg_gen_remu_tl(cpu_gpr[rd], t0, t1);
>> +            tcg_gen_ext32s_tl(cpu_gpr[rd], cpu_gpr[rd]);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MUL_W:
>> +        {
>> +            TCGv_i32 t2 = tcg_temp_new_i32();
>> +            TCGv_i32 t3 = tcg_temp_new_i32();
>> +            tcg_gen_trunc_tl_i32(t2, t0);
>> +            tcg_gen_trunc_tl_i32(t3, t1);
>> +            tcg_gen_mul_i32(t2, t2, t3);
>> +            tcg_gen_ext_i32_tl(cpu_gpr[rd], t2);
>> +            tcg_temp_free_i32(t2);
>> +            tcg_temp_free_i32(t3);
>> +        }
>> +        break;
>> +    case LA_OPC_MULH_W:
>> +        {
>> +            TCGv_i32 t2 = tcg_temp_new_i32();
>> +            TCGv_i32 t3 = tcg_temp_new_i32();
>> +            tcg_gen_trunc_tl_i32(t2, t0);
>> +            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
>> +            tcg_temp_free_i32(t2);
>> +            tcg_temp_free_i32(t3);
>> +        }
>> +        break;
>> +    case LA_OPC_MULH_WU:
>> +        {
>> +            TCGv_i32 t2 = tcg_temp_new_i32();
>> +            TCGv_i32 t3 = tcg_temp_new_i32();
>> +            tcg_gen_trunc_tl_i32(t2, t0);
>> +            tcg_gen_trunc_tl_i32(t3, t1);
>> +            tcg_gen_mulu2_i32(t2, t3, t2, t3);
>> +            tcg_gen_ext_i32_tl(cpu_gpr[rd], t3);
>> +            tcg_temp_free_i32(t2);
>> +            tcg_temp_free_i32(t3);
>> +        }
>> +        break;
>> +    case LA_OPC_DIV_D:
>> +        {
>> +            TCGv t2 = tcg_temp_new();
>> +            TCGv t3 = tcg_temp_new();
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
>> +            tcg_gen_and_tl(t2, t2, t3);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> +            tcg_gen_or_tl(t2, t2, t3);
>> +            tcg_gen_movi_tl(t3, 0);
>> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> +            tcg_gen_div_tl(cpu_gpr[rd], t0, t1);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MOD_D:
>> +        {
>> +            TCGv t2 = tcg_temp_new();
>> +            TCGv t3 = tcg_temp_new();
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t2, t0, -1LL << 63);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, -1LL);
>> +            tcg_gen_and_tl(t2, t2, t3);
>> +            tcg_gen_setcondi_tl(TCG_COND_EQ, t3, t1, 0);
>> +            tcg_gen_or_tl(t2, t2, t3);
>> +            tcg_gen_movi_tl(t3, 0);
>> +            tcg_gen_movcond_tl(TCG_COND_NE, t1, t2, t3, t2, t1);
>> +            tcg_gen_rem_tl(cpu_gpr[rd], t0, t1);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_DIV_DU:
>> +        {
>> +            TCGv t2 = tcg_const_tl(0);
>> +            TCGv t3 = tcg_const_tl(1);
>> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> +            tcg_gen_divu_i64(cpu_gpr[rd], t0, t1);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MOD_DU:
>> +        {
>> +            TCGv t2 = tcg_const_tl(0);
>> +            TCGv t3 = tcg_const_tl(1);
>> +            tcg_gen_movcond_tl(TCG_COND_EQ, t1, t1, t2, t3, t1);
>> +            tcg_gen_remu_i64(cpu_gpr[rd], t0, t1);
>> +            tcg_temp_free(t3);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MUL_D:
>> +        tcg_gen_mul_i64(cpu_gpr[rd], t0, t1);
>> +        break;
>> +    case LA_OPC_MULH_D:
>> +        {
>> +            TCGv t2 = tcg_temp_new();
>> +            tcg_gen_muls2_i64(t2, cpu_gpr[rd], t0, t1);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    case LA_OPC_MULH_DU:
>> +        {
>> +            TCGv t2 = tcg_temp_new();
>> +            tcg_gen_mulu2_i64(t2, cpu_gpr[rd], t0, t1);
>> +            tcg_temp_free(t2);
>> +        }
>> +        break;
>> +    default:
>> +        generate_exception_end(ctx, EXCP_INE);
>> +        goto out;
>> +    }
>> + out:
>> +    tcg_temp_free(t0);
>> +    tcg_temp_free(t1);
>> +
>> +    return;
>> +}
> 
>> +static bool trans_mul_w(DisasContext *ctx, arg_mul_w *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MUL_W, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mulh_w(DisasContext *ctx, arg_mulh_w *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_W, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mulh_wu(DisasContext *ctx, arg_mulh_wu *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_WU, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mul_d(DisasContext *ctx, arg_mul_d *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MUL_D, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mulh_d(DisasContext *ctx, arg_mulh_d *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_D, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mulh_du(DisasContext *ctx, arg_mulh_du *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MULH_DU, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
> 
>> +static bool trans_div_w(DisasContext *ctx, arg_div_w *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_W, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mod_w(DisasContext *ctx, arg_mod_w *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_W, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_div_wu(DisasContext *ctx, arg_div_wu *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_WU, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mod_wu(DisasContext *ctx, arg_mod_wu *a)
>> +{
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_WU, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_div_d(DisasContext *ctx, arg_div_d *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_D, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mod_d(DisasContext *ctx, arg_mod_d *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_D, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_div_du(DisasContext *ctx, arg_div_du *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_DIV_DU, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
>> +
>> +static bool trans_mod_du(DisasContext *ctx, arg_mod_du *a)
>> +{
>> +    check_loongarch_64(ctx);
>> +    gen_loongarch_muldiv(ctx, LA_OPC_MOD_DU, a->rd, a->rj, a->rk);
>> +    return true;
>> +}
> 
> It seems you are missing what decodetree is for... You should inline
> each opcode code from gen_loongarch_muldiv in the opcode handler.
> 
> Don't take MIPS as an example =)
> 
Hi, Philippe,

I‘m not sure I understand right.  Here is an example of my modification

    static bool trans_xxx(DisasContext *ctx, arg_mul_w *a)
    {
        gen_loongarch_muldiv(ctx, a->rd, a->rj, a->rk);
        return true;
    }
    ...

    static void gen_loongarch_muldiv(DisasContext *ctx, int rd,
                                     int rj, int rk)
    {
        TCGv t0, t1;

        if (rd == 0) {
            /* Treat as NOP. */
            return;
        }

        t0 = tcg_temp_new();
        t1 = tcg_temp_new();

        gen_load_gpr(t0, rj);
        gen_load_gpr(t1, rk);

        switch (ctx->opcode) {
        case  xxx_opcode:
             /* translate  xxx  */
        ...

    }

Is that right?
    

Have a nice weekend!
Song Gao




reply via email to

[Prev in Thread] Current Thread [Next in Thread]