qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH moxie 3/5] Moxie target code


From: Richard Henderson
Subject: Re: [Qemu-devel] [PATCH moxie 3/5] Moxie target code
Date: Thu, 14 Feb 2013 15:19:00 -0800
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130110 Thunderbird/17.0.2

On 02/13/2013 02:26 PM, Anthony Green wrote:
> +typedef struct CPUMoxieState {
> +
> +
> +  uint32_t flags;               /* general execution flags */
> +  uint32_t gregs[16];           /* general registers */
> +  uint32_t sregs[256];          /* special registers */
> +  uint32_t pc;
> +  uint32_t cc;                /* condition codes */
> +
> +  uint32_t t0;
> +  uint32_t t1;
> +  uint32_t t2;

Are these t[0-2] used anywhere?  I couldn't immediately see.
Such named temporaries were legacy on a couple of targets,
so depending on when you copied this from, e.g. sparc, you
might have gotten these unnecessarily.

> +/* This is the state at translation time.  */
> +typedef struct DisasContext {
> +  struct TranslationBlock *tb;
> +  target_ulong pc, saved_pc;
> +  uint32_t opcode;
> +  uint32_t fp_status;
> +  /* Routine used to access memory */
> +  int memidx;
> +  uint32_t hflags, saved_hflags;
> +  int bstate;
> +  target_ulong btarget;
> +  void *last_T0_store;
> +  int last_T0_gpr;
> +  int singlestep_enabled;
> +} DisasContext;
> +
> +enum {
> +  BS_NONE     = 0, /* We go out of the TB without reaching a branch or an
> +                    * exception condition */
> +  BS_STOP     = 1, /* We want to stop translation for any reason */
> +  BS_BRANCH   = 2, /* We reached a branch condition     */
> +  BS_EXCP     = 3, /* We reached an exception condition */
> +};
> +
> +static TCGv cpu_pc;
> +static TCGv cpu_gregs[16];
> +static TCGv cpu_sregs[256];

You're slowing down the compiler by allocating registers for all of
these SREGS, where they're only used in move to/from insns, and in the
SWI insn.  You'll be better off issuing explicit tcg_gen_ld/st_i32 in
those cases.

> +/* The code generator doesn't like lots of temporaries, so maintain our own
> +   cache for reuse within a function.  */
> +#define MAX_TEMPS 8
> +static int num_temps;
> +static TCGv temps[MAX_TEMPS];

The code generator does fine with quite a few temporaries.  The biggest
problem being that you're never freeing them.  You don't need a local
cache, just use tcg_temp_free_i32 in dead_tmp.

FYI, there are some debugging hooks that might help:
tcg_clear_temp_count, tcg_check_temp_count.  Placing these around e.g.
each individual insn can make sure that you're not leaking.

> +/* Create a new temporary and set it to the value of a CPU register.  */
> +static inline TCGv load_reg(DisasContext *s, int reg)
> +{
> +  TCGv tmp = new_tmp();
> +  tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUMoxieState, gregs[reg]));
> +  return tmp;
> +}

This function won't work, since there's no synchronization between
cpu_env memory and tcg global registers.  On the good side, it's
actually unused.

Please delete all the dead code.  E.g. by not marking any functions
inline, and letting the compiler work out what should be inlined, and
warning about unused.

> +            case 0x00: /* beq */
> +              {
> +                int l1 = gen_new_label();
> +                tcg_gen_brcondi_i32 (TCG_COND_EQ, cc, CC_EQ, l1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x01: /* bne */
> +              {
> +                int l1 = gen_new_label();
> +                tcg_gen_brcondi_i32 (TCG_COND_NE, cc, CC_EQ, l1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x02: /* blt */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_LT);
> +                tcg_gen_brcondi_i32 (TCG_COND_EQ, t1, CC_LT, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x03: /* bgt */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_GT);
> +                tcg_gen_brcondi_i32 (TCG_COND_EQ, t1, CC_GT, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x04: /* bltu */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_LTU);
> +                tcg_gen_brcondi_i32 (TCG_COND_EQ, t1, CC_LTU, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x05: /* bgtu */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_GTU);
> +                tcg_gen_brcondi_i32 (TCG_COND_EQ, t1, CC_GTU, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x06: /* bge */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_GT|CC_EQ);
> +                tcg_gen_brcondi_i32 (TCG_COND_GT, t1, 0, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x07: /* ble */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_LT|CC_EQ);
> +                tcg_gen_brcondi_i32 (TCG_COND_GT, t1, 0, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x08: /* bgeu */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_GTU|CC_EQ);
> +                tcg_gen_brcondi_i32 (TCG_COND_GT, t1, 0, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;
> +            case 0x09: /* bleu */
> +              {
> +                int l1 = gen_new_label();
> +                TCGv t1 = new_tmp();
> +                tcg_gen_andi_i32(t1, cc, CC_LTU|CC_EQ);
> +                tcg_gen_brcondi_i32 (TCG_COND_GT, t1, 0, l1);
> +                dead_tmp(t1);
> +                gen_goto_tb(env, ctx, 1, ctx->pc+2);
> +                gen_set_label(l1);
> +                gen_goto_tb(env, ctx, 0, INST2OFFSET(opcode)+ctx->pc+2);
> +                ctx->bstate = BS_BRANCH;
> +              }
> +              break;

Consider a subroutine for all this?

> +            default:
> +              printf("* 0x%x\tForm 3 ********** 0x%x\n", ctx->pc, inst);

No raw printfs please.

> +            case 0x00: /* inc */
> +              {
> +                int a = (opcode >> 8) & 0xf;
> +                unsigned int v = (opcode & 0xff);
> +
> +                TCGv t1 = new_tmp();
> +                tcg_gen_addi_i32(t1, REG(a), v);
> +                tcg_gen_mov_i32(REG(a), t1);
> +                dead_tmp(t1);
> +              }

Note that there's no need for temporary allocation here.
Just perform the entire operation with tcg_gen_addi_i32.
Same with dec.

> +        case 0x03: /* jsra */
> +          {
> +            /* Load the stack pointer into T0.  */
> +            TCGv t1 = new_tmp();
> +            TCGv t2 = new_tmp();
> +
> +            tcg_gen_movi_i32(t1, ctx->pc+6);
> +
> +            /* Make space for the static chain and return address.  */
> +            tcg_gen_subi_i32(t2, REG(1), 8);
> +            tcg_gen_mov_i32(REG(1), t2);
> +            tcg_gen_qemu_st32(t1, REG(1), ctx->memidx);
> +
> +            /* Push the current frame pointer.  */
> +            tcg_gen_subi_i32(t2, REG(1), 4);
> +            tcg_gen_mov_i32(REG(1), t2);
> +            tcg_gen_qemu_st32(REG(0), REG(1), ctx->memidx);

There are two exceptions that can be taken here, for the two stores.
Are you certain that REG(1) should be updated before both are handled?
Should the write to REG(1) be delayed until after the second store?

> +        case 0x04: /* ret */
> +          {
> +            TCGv t1 = new_tmp();
> +
> +            /* The new $sp is the old $fp.  */
> +            tcg_gen_mov_i32(REG(1), REG(0));
> +
> +            /* Pop the frame pointer.  */
> +            tcg_gen_qemu_ld32u(REG(0), REG(1), ctx->memidx);
> +            tcg_gen_addi_i32(t1, REG(1), 4);
> +            tcg_gen_mov_i32(REG(1), t1);
> +
> +
> +            /* Pop the return address and skip over the static chain
> +               slot.  */
> +            tcg_gen_qemu_ld32u(cpu_pc, REG(1), ctx->memidx);
> +            tcg_gen_addi_i32(t1, REG(1), 8);
> +            tcg_gen_mov_i32(REG(1), t1);

Similarly, should any global variable be updated before the second load?
Same comments apply to JSR and SWI.

> +        case 0x05: /* add.l */
> +          {
> +            int a = (opcode >> 4) & 0xf;
> +            int b = opcode & 0xf;
> +
> +            TCGv result = new_tmp();
> +            tcg_gen_add_i32(result, REG(a), REG(b));
> +            tcg_gen_mov_i32(REG(a), result);
> +            dead_tmp(result);

I'll quit mentioning useless temporaries.  Suffice that there are lots.

> +        case 0x0e: /* cmp */
> +          {
> +            int a  = (opcode >> 4) & 0xf;
> +            int b  = opcode & 0xf;
> +
> +            int label_equal = gen_new_label();
> +            int label_done = gen_new_label();
> +
> +            /* Clear CC */
> +            tcg_gen_movi_i32(cc, 0);
> +
> +            tcg_gen_brcond_i32(TCG_COND_EQ, REG(a), REG(b), label_equal);
> +
> +#define CMPTEST(t,CODE)                                         \
> +            {                                                   \
> +              int lyes = gen_new_label();                       \
> +              int lno = gen_new_label();                        \
> +              TCGv t1 = new_tmp();                              \
> +              tcg_gen_brcond_i32(t, REG(a), REG(b), lyes);      \
> +              tcg_gen_br(lno);                                  \
> +              gen_set_label(lyes);                              \
> +              tcg_gen_ori_i32(t1, cc, CODE);                    \
> +              tcg_gen_mov_i32(cc, t1);                          \
> +              gen_set_label(lno);                               \
> +              dead_tmp(t1);                                     \

Consider making use of tcg_setcond_i32 here.  It's probably new since
you wrote all this in the first place.

> +        case 0x31: /* div.l */
> +          {
> +            int a = (opcode >> 4) & 0xf;
> +            int b = opcode & 0xf;
> +
> +            TCGv result = new_tmp();
> +            tcg_gen_div_i32(result, REG(a), REG(b));
> +            tcg_gen_mov_i32(REG(a), result);
> +            dead_tmp(result);
> +          }
> +          break;

No divide by zero check?  If not generated by moxie, you'll still want
to care for it being generated by the host.  And sometimes working
around the extra INT_MIN / -1 exception you'll get from an i386 host.
Given all those, it's sometimes easier to do this in a helper.

> +#ifdef DEBUG_DISAS
> +  if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
> +    qemu_log("------------------------------------------------\n");
> +    /* FIXME: This may print out stale hflags from env... */
> +    /* cpu_dump_state(env, logfile, fprintf, 0); */
> +  }
> +#endif

Remove this.  Proper TB_CPU logging is done in generic code.

Consider placing a tcg_gen_debug_insn_start at the beginning of decode_opc.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]