Re: [Qemu-devel] [PATCH RFC v4 01/12] target/rx: TCG translation

qemu-devel
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH RFC v4 01/12] target/rx: TCG translation

From:	Yoshinori Sato
Subject:	Re: [Qemu-devel] [PATCH RFC v4 01/12] target/rx: TCG translation
Date:	Mon, 25 Mar 2019 18:38:46 +0900
User-agent:	Wanderlust/2.15.9 (Almost Unreal) SEMI-EPG/1.14.7 (Harue) FLIM/1.14.9 (Gojō) APEL/10.8 EasyPG/1.0.0 Emacs/25.1 (x86_64-pc-linux-gnu) MULE/6.0 (HANACHIRUSATO)
On Thu, 21 Mar 2019 14:40:11 +0900,
Richard Henderson wrote:
> 
> On 3/20/19 7:15 AM, Yoshinori Sato wrote:
> > +/* [ri, rb] */
> > +static inline void rx_gen_regindex(DisasContext *ctx, TCGv mem,
> 
> Please drop all of the inline markers.
> Let the compiler choose which are profitable to inline.
>

OK.

> 
> > +/* load source operand */
> > +static inline TCGv rx_load_source(DisasContext *ctx, TCGv mem,
> > +                                  int ld, int mi, int rs)
> > +{
> > +    TCGv addr;
> > +    if (ld < 3) {
> > +        switch (mi) {
> > +        case 0: /* dsp[rs].b */
> > +        case 1: /* dsp[rs].w */
> > +        case 2: /* dsp[rs].l */
> > +            addr = rx_index_addr(ctx, mem, ld, mi, rs);
> > +            rx_gen_ld(mi, mem, addr);
> > +            break;
> > +        case 3: /* dsp[rs].uw */
> > +        case 4: /* dsp[rs].ub */
> > +            addr = rx_index_addr(ctx, mem, ld, 4 - mi, rs);
> > +            rx_gen_ldu(4 - mi, mem, addr);
> 
> Note that the mi_to_mop helper applies here.
> 
>     TCGMemOp mop = mi_to_mop(mi);
>     addr = rx_index_addr(ctx, mem, ld, mop & MO_SIZE, rs);
>     tcg_gen_ld_i32(mem, addr, 0, mop);

OK.

> 
> > +        tcg_gen_not_i32(dc->temp, dc->temp);
> > +        tcg_gen_and_i32(dc->temp, dc->temp, cpu_psw_z);
> 
>     tcg_gen_andc_i32(dc->temp, cpu_psw_z, dc->temp);

OK.

> > +typedef void (*ldstfn)(unsigned int sz, TCGv val, TCGv mem);
> > +static inline void MOV_prrp(ldstfn ldst, int ad, int sz, int rs, int rd)
> > +{
> > +    TCGv temp;
> > +    if (rs == rd) {
> > +        temp = tcg_temp_new();
> > +        tcg_gen_mov_i32(temp, cpu_regs[rs]);
> > +    } else {
> > +        temp = cpu_regs[rs];
> > +    }
> > +    if (ad == 1) {
> > +        tcg_gen_subi_i32(cpu_regs[rd], cpu_regs[rd], 1 << sz);
> 
> Does this really decrement first?
> This doesn't match the behaviour described for PUSH wrt SP...
> 
> > +    }
> > +    ldst(sz, temp, cpu_regs[rd]);
> > +    if (ad == 0 && rs != rd) {
> > +        tcg_gen_addi_i32(cpu_regs[rd], cpu_regs[rd], 1 << sz);
> > +    }
> > +    if (rs == rd) {
> > +        tcg_temp_free(temp);
> > +    }
> > +}
> ...
> > +/* pop rd */
> > +static bool trans_POP(DisasContext *ctx, arg_POP *a)
> > +{
> > +    rx_gen_ld(MO_32, cpu_regs[a->rd], cpu_regs[0]);
> > +    if (a->rd != 0) {
> > +        tcg_gen_addi_i32(cpu_regs[0], cpu_regs[0], 4);
> > +    }
> > +    return true;
> 
> Perhaps as
> 
>   MOV_prrp(ctx, 0, 2, 0, a->rd);
> 
> ?

Yes. Pre-decriment and Post-increment addressing rewrite.
And POP rd converted to "mov [r0+], rd".
> 
> > +static inline TCGMemOp mi_to_mop(unsigned mi)
> > +{
> > +    static const TCGMemOp mop[5] = { MO_SB, MO_SW, MO_UL, MO_UW, MO_UB };
> > +    tcg_debug_assert(mi < 5);
> > +    return mop[mi];
> > +}
> > +
> > +/* xchg dsp[rs].<mi>,rd */
> > +static bool trans_XCHG_mr(DisasContext *ctx, arg_XCHG_mr *a)
> > +{
> > +    TCGv mem, addr;
> > +    mem = tcg_temp_new();
> > +    switch (a->mi) {
> > +    case 0: /* dsp[rs].b */
> > +    case 1: /* dsp[rs].w */
> > +    case 2: /* dsp[rs].l */
> > +        addr = rx_index_addr(ctx, mem, a->ld, a->mi, a->rs);
> > +        break;
> > +    case 3: /* dsp[rs].uw */
> > +    case 4: /* dsp[rs].ub */
> > +        addr = rx_index_addr(ctx, mem, a->ld, 4 - a->mi, a->rs);
> 
>   addr = rx_index_addr(ctx, mem, a->ld, mop & MO_SIZE, a->rs);

OK.

> > +typedef void (*logicfn)(TCGv ret, TCGv arg1, TCGv arg2);
> > +static inline void gen_logic_op(logicfn opr, TCGv ret, TCGv arg1, TCGv 
> > arg2)
> > +{
> > +    opr(cpu_psw_z, arg1, arg2);
> > +    tcg_gen_mov_i32(cpu_psw_s, cpu_psw_z);
> > +    if (ret) {
> > +        tcg_gen_mov_i32(ret, cpu_psw_z);
> > +    }
> > +}
> 
> If you here define
> 
> static void rx_gen_and_i32(TCGv ret, TCGv arg1, TCGv arg2)
> {
>     gen_logic_op(tcg_gen_and_i32, ret, arg1, arg2);
> }
> 
> static void rx_gen_tst_i32(TCGv ret, TCGv arg1, TCGv arg2)
> {
>     gen_logic_op(tcg_gen_and_i32, NULL, arg1, arg2);
> }
> 
> etc for OR and XOR, then suddenly we have a consistent interface for all of 
> the
> arithmetic and logic operations -- add, sub, etc included.
> 
> Which means that we can then do
> 
> static bool rx_gen_irr(DisasContext *ctx, arg_rri *a, logicfn opr)
> {
>     TCGv_i32 imm = tcg_const_i32(a->imm);
>     opr(cpu_regs[a->rd], cpu_regs[a->rs2], imm);
>     tcg_temp_free_i32(imm);
>     return true;
> }
> 
> static bool rx_gen_rrr(DisasContext *ctx, arg_rrr *a, logicfn opr)
> {
>     opr(cpu_regs[a->rd], cpu_regs[a->rs2], cpu_regs[a->rs]);
>     return true;
> }
> 
> static bool rx_gen_mr(DisasContext *ctx, arg_rm *a, logicfn opr)
> {
>     TCGv mem = tcg_temp_new();
>     TCGv val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs);
>     opr(cpu_regs[a->rd], cpu_regs[a->rd], val);
>     tcg_temp_free(mem);
>     return true;
> }
> 
> static bool trans_AND_mr(DisasContext *ctx, arg_AND_mr *a)
> {
>     return rx_gen_mr(ctx, a, tcg_gen_and_i32);
> }
> 
> static bool trans_ADD_mr(DisasContext *ctx, arg_AND_mr *a)
> {
>     return rx_gen_mr(ctx, a, rx_gen_add_i32);
> }
> 
> etc.
> 
> > static bool trans_SBB_mr(DisasContext *ctx, arg_SBB_mr *a)
> > {
> >     TCGv val, mem;
> >     mem = tcg_temp_new();
> >     val = rx_load_source(ctx, mem, a->ld, MO_32, a->rs);

OK.
all arithmetic and logic operations unifieded.

> The "Note only mi==2 allowed" means that you need
> 
>     if (a->mi != 2) {
>         return false;
>     }
> 
> here.  There are a couple more of these.

OK.

> > +/* ret = arg1 + arg2 + psw_c */
> > +static void rx_gen_adc_i32(TCGv ret, TCGv arg1, TCGv arg2)
> > +{
> > +    TCGv cf, z;
> > +    cf = tcg_temp_new();
> > +    z = tcg_const_i32(0);
> > +    tcg_gen_mov_i32(cf, cpu_psw_c);
> > +    tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, arg1, z, arg2, z);
> > +    tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, cpu_psw_s, cpu_psw_c, cf, z);
> 
> Note that you don't need CF if you consume psw_c right away:
> 
>     tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, arg1, z, cpu_psw_c, z);
>     tcg_gen_add2_i32(cpu_psw_s, cpu_psw_c, cpu_psw_s, cpu_psw_c, arg2, z);
> 
> You did forget to free CF, btw.

OK.

> > +/* adc dsp[rs], rd */
> > +static bool trans_ADC_mr(DisasContext *ctx, arg_ADC_mr *a)
> > +{
> 
> a->mi != 2.

OK.

> > +/* emul #imm, rd */
> > +static bool trans_EMUL_ir(DisasContext *ctx, arg_EMUL_ir *a)
> > +{
> > +    TCGv imm = tcg_const_i32(a->imm);
> > +    tcg_gen_muls2_i32(cpu_regs[a->rd], cpu_regs[a->rd + 1],
> > +                      cpu_regs[a->rd], imm);
> > +    tcg_temp_free(imm);
> > +    return true;
> > +}
> > +
> > +/* emul rs, rd */
> > +/* emul dsp[rs], rd */
> > +static bool trans_EMUL_mr(DisasContext *ctx, arg_EMUL_mr *a)
> > +{
> > +    TCGv val, mem;
> > +    mem = tcg_temp_new();
> > +    val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs);
> > +    tcg_gen_muls2_i32(cpu_regs[a->rd], cpu_regs[a->rd + 1],
> > +                      cpu_regs[a->rd], val);
> 
> Both of these need to check for rd == 15 and return false.

Specifying 15 for rd does not result in an invalid instruction.
I made it to output a log.

> > +static bool trans_EMULU_ir(DisasContext *ctx, arg_EMULU_ir *a)
> > +{
> > +    TCGv imm = tcg_const_i32(a->imm);
> > +    if (a->rd > 14) {
> > +        g_assert_not_reached();
> > +    }
> 
> You cannot make this assertion, since this is under control of the guest.  You
> need to return false instead, so that illegal instruction is signaled.  (And
> move the tcg_const_i32 below, so that you don't leak that temp.)

Likewise.

> > +static bool trans_EMULU_mr(DisasContext *ctx, arg_EMULU_mr *a)
> > +{
> > +    TCGv val, mem;
> > +    if (a->rd > 14) {
> > +        g_assert_not_reached();
> > +    }
> 
> Likewise.

OK.

> 
> > +/* divu rs, rd */
> > +/* divu dsp[rs], rd */
> > +static bool trans_DIVU_mr(DisasContext *ctx, arg_DIVU_mr *a)
> > +{
> > +    TCGv val, mem;
> > +    mem = tcg_temp_new();
> > +    val = rx_load_source(ctx, mem, a->ld, a->mi, a->rs);
> > +    gen_helper_divu(cpu_regs[a->rd], cpu_env, cpu_regs[a->rd], val);
> > +    tcg_temp_free(mem);
> > +    return true;
> > +}
> 
> You can use a shim function like
> 
> static void rx_gen_divu(TCGv ret, TCGv arg1, TCGv arg2)
> {
>     gen_helper_divu(ret, cpu_env, arg1, arg2);
> }
> 
> to allow you to use the other rx_gen_* helpers above.

OK.
It using unified arithmetic operation.

> > +
> > +
> > +/* shll #imm:5, rd */
> > +/* shll #imm:5, rs2, rd */
> > +static bool trans_SHLL_irr(DisasContext *ctx, arg_SHLL_irr *a)
> > +{
> > +    TCGv tmp;
> > +    tmp = tcg_temp_new();
> > +    if (a->imm) {
> > +        tcg_gen_sari_i32(cpu_psw_c, cpu_regs[a->rs2], 32 - a->imm);
> > +        tcg_gen_shli_i32(cpu_regs[a->rd], cpu_regs[a->rs2], a->imm);
> > +        tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_psw_o, cpu_psw_c, 0);
> > +        tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_psw_c, 0xffffffff);
> > +        tcg_gen_or_i32(cpu_psw_o, cpu_psw_o, tmp);
> > +        tcg_gen_setcondi_i32(TCG_COND_NE, cpu_psw_c, cpu_psw_c, 0);
> > +    } else {
> > +        tcg_gen_movi_i32(cpu_psw_c, 0);
> > +        tcg_gen_movi_i32(cpu_psw_o, 0);
> 
> Missing
> 
>     tcg_gen_mov_i32(cpu_regs[a->rd], cpu_regs[a->rs2]);
> 
> here.

OK.

> > +    }> +    tcg_gen_mov_i32(cpu_psw_z, cpu_regs[a->rd]);
> > +    tcg_gen_mov_i32(cpu_psw_s, cpu_regs[a->rd]);
> > +    return true;
> > +}
> > +
> > +/* shll rs, rd */
> > +static bool trans_SHLL_rr(DisasContext *ctx, arg_SHLL_rr *a)
> > +{
> > +    TCGLabel *l1, *l2;
> > +    TCGv count, tmp;
> > +
> > +    l1 = gen_new_label();
> > +    l2 = gen_new_label();
> > +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_regs[a->rs], 0, l1);
> > +    count = tcg_const_i32(32);
> > +    tmp = tcg_temp_new();
> > +    tcg_gen_sub_i32(count, count, cpu_regs[a->rs]);
> > +    tcg_gen_sar_i32(cpu_psw_c, cpu_regs[a->rd], count);
> > +    tcg_gen_shl_i32(cpu_regs[a->rd], cpu_regs[a->rd], cpu_regs[a->rs]);
> > +    tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_psw_o, cpu_psw_c, 0);
> > +    tcg_gen_setcondi_i32(TCG_COND_EQ, tmp, cpu_psw_c, 0xffffffff);
> > +    tcg_gen_or_i32(cpu_psw_o, cpu_psw_o, tmp);
> > +    tcg_gen_setcondi_i32(TCG_COND_NE, cpu_psw_c, cpu_psw_c, 0);
> > +    tcg_gen_br(l2);
> > +    gen_set_label(l1);
> > +    tcg_gen_movi_i32(cpu_psw_c, 0);
> > +    tcg_gen_movi_i32(cpu_psw_o, 0);
> 
> Likewise.

As this is a two-operand, rd does not change when the shift count is zero.

> > +    gen_set_label(l2);
> > +    tcg_gen_mov_i32(cpu_psw_z, cpu_regs[a->rd]);
> > +    tcg_gen_mov_i32(cpu_psw_s, cpu_regs[a->rd]);
> > +    tcg_temp_free(count);
> > +    tcg_temp_free(tmp);
> > +    return true;
> > +}
> > +
> > +static inline void shiftr_imm(uint32_t rd, uint32_t rs, uint32_t imm,
> > +                              unsigned int alith)
> > +{
> > +    static void (* const gen_sXri[])(TCGv ret, TCGv arg1, int arg2) = {
> > +        tcg_gen_shri_i32, tcg_gen_sari_i32,
> > +    };
> > +    tcg_debug_assert(alith < 2);
> > +    if (imm) {
> > +        gen_sXri[alith](cpu_regs[rd], cpu_regs[rs], imm - 1);
> > +        tcg_gen_andi_i32(cpu_psw_c, cpu_regs[rd], 0x00000001);
> > +        gen_sXri[alith](cpu_regs[rd], cpu_regs[rd], 1);
> > +    } else {
> > +        tcg_gen_movi_i32(cpu_psw_c, 0);
> 
> Likewise.

OK.

> 
> > +    }
> > +    tcg_gen_movi_i32(cpu_psw_o, 0);
> > +    tcg_gen_mov_i32(cpu_psw_z, cpu_regs[rd]);
> > +    tcg_gen_mov_i32(cpu_psw_s, cpu_regs[rd]);
> > +}
> > +
> > +static inline void shiftr_reg(uint32_t rd, uint32_t rs, unsigned int alith)
> > +{
> > +    TCGLabel *skipz, *done;
> > +    TCGv count;
> > +    static void (* const gen_sXri[])(TCGv ret, TCGv arg1, int arg2) = {
> > +        tcg_gen_shri_i32, tcg_gen_sari_i32,
> > +    };
> > +    static void (* const gen_sXr[])(TCGv ret, TCGv arg1, TCGv arg2) = {
> > +        tcg_gen_shr_i32, tcg_gen_sar_i32,
> > +    };
> > +    tcg_debug_assert(alith < 2);
> > +    skipz = gen_new_label();
> > +    done = gen_new_label();
> > +    count = tcg_temp_new();
> > +    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_regs[rs], 0, skipz);
> > +    tcg_gen_subi_i32(count, cpu_regs[rs], 1);
> > +    gen_sXr[alith](cpu_regs[rd], cpu_regs[rd], count);
> > +    tcg_gen_andi_i32(cpu_psw_c, cpu_regs[rd], 0x00000001);
> > +    gen_sXri[alith](cpu_regs[rd], cpu_regs[rd], 1);
> > +    tcg_gen_br(done);
> > +    gen_set_label(skipz);
> > +    tcg_gen_movi_i32(cpu_psw_c, 0);
> 
> Likewise.

Same of SHLL reg.

> > +/* revw rs, rd */
> > +static bool trans_REVW(DisasContext *ctx, arg_REVW *a)
> > +{
> > +    TCGv hi, lo;
> > +
> > +    hi = tcg_temp_new();
> > +    lo = tcg_temp_new();
> > +    tcg_gen_shri_i32(hi, cpu_regs[a->rs], 16);
> > +    tcg_gen_bswap16_i32(hi, hi);
> > +    tcg_gen_shli_i32(hi, hi, 16);
> > +    tcg_gen_bswap16_i32(lo, cpu_regs[a->rs]);
> 
> Previous comment re bswap16 requires zero-extension still applies.

Sorry. Fixed.

> > +/* conditional branch helper */
> > +static void rx_bcnd_main(DisasContext *ctx, int cd, int dst)
> > +{
> > +    TCGv z, t, f;
> > +    DisasCompare dc;
> > +    switch (cd) {
> > +    case 0 ... 13:
> > +        dc.temp = tcg_temp_new();
> > +        z = tcg_const_i32(0);
> > +        t = tcg_const_i32(ctx->pc + dst);
> > +        f = tcg_const_i32(ctx->base.pc_next);
> > +        psw_cond(&dc, cd);
> > +        tcg_gen_movcond_i32(dc.cond, cpu_pc, dc.value, z, t, f);
> 
>     lab_true = gen_new_label();
>     tcg_gen_brcondi_i32(dc.cond, dc.value, lab_true);
>     gen_goto_tb(ctx, 0, ctx->base.pc_next);
>     gen_set_label(lab_true);
>     gen_goto_tb(ctx, 1, ctx->pc + dst);

OK.

> > +        tcg_temp_free(t);
> > +        tcg_temp_free(f);
> > +        tcg_temp_free(dc.temp);
> > +        tcg_temp_free(z);
> > +        break;
> > +    case 14:
> > +        /* always true case */
> > +        tcg_gen_movi_i32(cpu_pc, ctx->pc + dst);
> 
>     gen_goto_tb(ctx, 0, ctx->pc + dst);

OK.

> > +        break;
> > +    case 15:
> > +        /* always false case */
> > +        tcg_gen_movi_i32(cpu_pc, ctx->base.pc_next);

OK.

> No need to do anything here; just return.
> 
> > +#define MULMAC(op)                                            \
> > +    do {                                                      \
> > +        TCGv regs = tcg_const_i32(a->rs << 4 | a->rs2);       \
> 
> I really don't like passing register numbers to helpers as immediates.

This part rewrite TCG.

> > +/* mulhi rs,rs2 */
> > +static bool trans_MULHI(DisasContext *ctx, arg_MULHI *a)
> > +{
> > +    MULMAC(mulhi);
> 
>     tcg_gen_ext_i32_i64(tmp1, cpu_regs[a->rs]);
>     tcg_gen_ext_i32_i64(tmp2, cpu_regs[a->rs2]);
>     tcg_gen_sari_i64(tmp1, tmp1, 16);
>     tcg_gen_andi_i64(tmp2, tmp2, ~0xffff);
>     tcg_gen_mul_i64(cpu_acc, tmp1, tmp2);

OK.

> > +static bool trans_MULLO(DisasContext *ctx, arg_MULLO *a)
> > +{
> > +    MULMAC(mullo);
> 
>     tcg_gen_extu_i32_i64(tmp1, cpu_regs[a->rs]);
>     tcg_gen_extu_i32_i64(tmp2, cpu_regs[a->rs2]);
>     tcg_gen_ext16s_i64(tmp1, tmp1);
>     tcg_gen_ext16s_i64(tmp2, tmp2);
>     tcg_gen_mul_i64(tmp1, tmp1, tmp2);
>     tcg_gen_shli_i64(cpu_acc, tmp1, 16);

OK.

> > +static bool trans_MACHI(DisasContext *ctx, arg_MACHI *a)
> > +{
> > +    MULMAC(machi);
> 
>     tcg_gen_ext_i32_i64(tmp1, cpu_regs[a->rs]);
>     tcg_gen_ext_i32_i64(tmp2, cpu_regs[a->rs2]);
>     tcg_gen_sari_i64(tmp1, tmp1, 16);
>     tcg_gen_andi_i64(tmp2, tmp2, ~0xffff);
>     tcg_gen_mul_i64(tmp1, tmp1, tmp2);
>     tcg_gen_add_i64(cpu_acc, cpu_acc, tmp1);

OK.

> > +static bool trans_MACLO(DisasContext *ctx, arg_MACLO *a)
> > +{
> > +    MULMAC(maclo);
> 
>     tcg_gen_extu_i32_i64(tmp1, cpu_regs[a->rs]);
>     tcg_gen_extu_i32_i64(tmp2, cpu_regs[a->rs2]);
>     tcg_gen_ext16s_i64(tmp1, tmp1);
>     tcg_gen_ext16s_i64(tmp2, tmp2);
>     tcg_gen_mul_i64(tmp1, tmp1, tmp2);
>     tcg_gen_shli_i64(tmp1, tmp1, 16);
>     tcg_gen_add_i64(cpu_acc, cpu_acc, tmp1);
>

OK.

> 
> > +/* sat rd */
> > +static bool trans_SAT(DisasContext *ctx, arg_SAT *a)
> > +{
> > +    TCGv rd = tcg_const_i32(a->rd);
> > +    gen_helper_sat(cpu_env, rd);
> 
>     tcg_gen_sari_i32(tmp, cpu_psw_s, 31);
>     tcg_gen_xori_i32(tmp, tmp, 0x80000000);
>     tcg_gen_movcond_i32(TCG_COND_LT, cpu_regs[a->rd],
>                         cpu_psw_o, zero,
>                         temp, cpu_regs[a->rd]);
>

OK.

> > +static inline void rx_bclrm(TCGv mem, TCGv mask)
> > +{
> > +    TCGv val;
> > +    val = tcg_temp_new();
> > +    rx_gen_ld(MO_8, val, mem);
> > +    tcg_gen_not_i32(mask, mask);
> > +    tcg_gen_and_i32(val, val, mask);
> 
>     tcg_gen_andc_i32(val, val, mask);

OK.

> > +static inline void rx_bclrr(TCGv reg, TCGv mask)
> > +{
> > +    tcg_gen_not_i32(mask, mask);
> > +    tcg_gen_and_i32(reg, reg, mask);
> 
> Likewise.

OK.

> > +    static bool cat3(trans_, name, _rr)(DisasContext *ctx,              \
> > +                                        cat3(arg_, name, _rr) * a)      \
> > +    {                                                                   \
> > +        TCGv mask;                                                      \
> > +        mask = tcg_const_i32(1);                                        \
> > +        tcg_gen_shl_i32(mask, mask, cpu_regs[a->rs]);                   \
> 
> This shift needs to be masked; only the low bits of cpu_regs[a->rs] are used.

OK. Added mask.

> > +        cat3(rx_, op, r)(cpu_regs[a->rd], mask);                        \
> > +        tcg_temp_free(mask);                                            \
> > +        return true;                                                    \
> > +    }                                                                   \
> > +    static bool cat3(trans_, name, _rm)(DisasContext *ctx,              \
> > +                                        cat3(arg_, name, _rm) * a)      \
> > +    {                                                                   \
> > +        TCGv mask, mem, addr;                                           \
> > +        mask = tcg_const_i32(1);                                        \
> > +        tcg_gen_shl_i32(mask, mask, cpu_regs[a->rd]);                   \
> 
> Likewise.

OK.

> 
> > +        mem = tcg_temp_new();                                           \
> > +        addr = rx_index_addr(ctx, mem, a->ld, MO_8, a->rs);             \
> > +        cat3(rx_, op, m)(addr, mask);                                   \
> > +        tcg_temp_free(mem);                                             \
> > +        tcg_temp_free(mask);                                            \
> > +        return true;                                                    \
> > +    }
> > +
> > +BITOP(BSET, bset)
> > +BITOP(BCLR, bclr)
> > +BITOP(BTST, btst)
> > +BITOP(BNOT, bnot)
> > +
> > +static inline void bmcnd_op(TCGv val, TCGCond cond, int pos)
> > +{
> > +    TCGv bit;
> > +    DisasCompare dc;
> > +    dc.temp = tcg_temp_new();
> > +    bit = tcg_temp_new();
> > +    psw_cond(&dc, cond);
> > +    tcg_gen_andi_i32(val, val, ~(1 << pos));
> > +    tcg_gen_setcondi_i32(dc.cond, bit, dc.value, 0);
> > +    tcg_gen_shli_i32(bit, bit, pos);
> > +    tcg_gen_or_i32(val, val, bit);
> 
> 
>    tcg_gen_setcondi_i32(dc.cond, bit, dc.value, 0);
>    tcg_gen_deposit_i32(val, val, bit, pos, 1);

OK.

> > +static void rx_tr_init_disas_context(DisasContextBase *dcbase, CPUState 
> > *cs)
> > +{
> 
> This is where you should initialize ctx->env.
> 
> > +void restore_state_to_opc(CPURXState *env, TranslationBlock *tb,
> > +                          target_ulong *data)
> > +{
> > +    env->pc = data[0];
> > +    env->psw = data[1];
> > +    rx_cpu_unpack_psw(env, 1);
> 
> You should not be doing this unpack and restore,
> as all psw bits are properly computed now.

OK. removed it.

> > +# Note that sz=3 overlaps SMOVF
> > +# RPMA.b
> > +RMPA               0111 1111 1000 1100                     sz=0
> > +# RPMA.w
> > +RMPA               0111 1111 1000 1101                     sz=1
> > +# RPMA.l
> > +RMPA               0111 1111 1000 1110                     sz=2
> 
> # SMOVF
> # RMPA.<bwl>
> {
>   SMOVF         0111 1111 1000 1111
>   RMPA          0111 1111 1000 11 sz:2
> }
> 
> > +# Note that sz=3 overlaps SMOVB
> > +# SSTR.b
> ...
> > +# Note that sz=3 overlaps SCMPU
> > +# SUNTIL.b
> ...
> > +# Note that sz=3 overlaps SMOVU
> > +# SWHILE.b
> 
> Likewise.

OK. I looked out.

> 
> r~
> 

Thanks.

-- 
Yosinori Sato
[Prev in Thread]
Current Thread
[Next in Thread]
[Qemu-devel] [PATCH RFC v4 09/12] hw/rx: RX Target hardware definition, (continued)
Prev by Date: Re: [Qemu-devel] [PATCH for-4.0] target/arm: Fix non-parallel expansion of CASP
Next by Date: Re: [Qemu-devel] [PATCH v2] iotests: add 248: test resume mirror after auto pause on ENOSPC
Previous by thread: Re: [Qemu-devel] [PATCH RFC v4 01/12] target/rx: TCG translation
Next by thread: Re: [Qemu-devel] [PATCH RFC v4 01/12] target/rx: TCG translation
Index(es):
- Date
- Thread