qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH v4 04/33] tcg-aarch64: Hoist common argument loa


From: Claudio Fontana
Subject: Re: [Qemu-devel] [PATCH v4 04/33] tcg-aarch64: Hoist common argument loads in tcg_out_op
Date: Mon, 16 Sep 2013 09:42:17 +0200
User-agent: Mozilla/5.0 (Windows NT 6.1; rv:17.0) Gecko/20130801 Thunderbird/17.0.8

Hello Richard,

On 14.09.2013 23:54, Richard Henderson wrote:
> This reduces the code size of the function significantly.
> 
> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  tcg/aarch64/tcg-target.c | 95 
> +++++++++++++++++++++++++-----------------------
>  1 file changed, 50 insertions(+), 45 deletions(-)
> 
> diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
> index 8f19b50..8f5814d 100644
> --- a/tcg/aarch64/tcg-target.c
> +++ b/tcg/aarch64/tcg-target.c
> @@ -1113,15 +1113,22 @@ static inline void tcg_out_load_pair(TCGContext *s, 
> TCGReg addr,
>  }
>  
>  static void tcg_out_op(TCGContext *s, TCGOpcode opc,
> -                       const TCGArg *args, const int *const_args)
> +                       const TCGArg args[TCG_MAX_OP_ARGS],
> +                       const int const_args[TCG_MAX_OP_ARGS])
>  {
>      /* 99% of the time, we can signal the use of extension registers
>         by looking to see if the opcode handles 64-bit data.  */
>      TCGType ext = (tcg_op_defs[opc].flags & TCG_OPF_64BIT) != 0;
>  
> +    /* Hoist the loads of the most common arguments.  */
> +    TCGArg a0 = args[0];
> +    TCGArg a1 = args[1];
> +    TCGArg a2 = args[2];
> +    int c2 = const_args[2];
> +

Either all or none (add c0, c1), I would expect the compiler not to generate 
code for the paths that don't use C[n].

Btw, if the compiler generates bloated code without this, we should notify the 
projects working on gcc for aarch64.

>      switch (opc) {
>      case INDEX_op_exit_tb:
> -        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, args[0]);
> +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
>          tcg_out_goto(s, (tcg_target_long)tb_ret_addr);
>          break;
>  
> @@ -1130,23 +1137,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>  #error "USE_DIRECT_JUMP required for aarch64"
>  #endif
>          assert(s->tb_jmp_offset != NULL); /* consistency for USE_DIRECT_JUMP 
> */
> -        s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
> +        s->tb_jmp_offset[a0] = s->code_ptr - s->code_buf;
>          /* actual branch destination will be patched by
>             aarch64_tb_set_jmp_target later, beware retranslation. */
>          tcg_out_goto_noaddr(s);
> -        s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
> +        s->tb_next_offset[a0] = s->code_ptr - s->code_buf;
>          break;
>  
>      case INDEX_op_call:
>          if (const_args[0]) {
> -            tcg_out_call(s, args[0]);
> +            tcg_out_call(s, a0);
>          } else {
> -            tcg_out_callr(s, args[0]);
> +            tcg_out_callr(s, a0);
>          }
>          break;
>  
>      case INDEX_op_br:
> -        tcg_out_goto_label(s, args[0]);
> +        tcg_out_goto_label(s, a0);
>          break;
>  
>      case INDEX_op_ld_i32:
> @@ -1169,97 +1176,95 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>      case INDEX_op_st16_i64:
>      case INDEX_op_st32_i64:
>          tcg_out_ldst(s, aarch64_ldst_get_data(opc), 
> aarch64_ldst_get_type(opc),
> -                     args[0], args[1], args[2]);
> +                     a0, a1, a2);
>          break;
>  
>      case INDEX_op_add_i64:
>      case INDEX_op_add_i32:
> -        tcg_out_arith(s, ARITH_ADD, ext, args[0], args[1], args[2], 0);
> +        tcg_out_arith(s, ARITH_ADD, ext, a0, a1, a2, 0);
>          break;
>  
>      case INDEX_op_sub_i64:
>      case INDEX_op_sub_i32:
> -        tcg_out_arith(s, ARITH_SUB, ext, args[0], args[1], args[2], 0);
> +        tcg_out_arith(s, ARITH_SUB, ext, a0, a1, a2, 0);
>          break;
>  
>      case INDEX_op_and_i64:
>      case INDEX_op_and_i32:
> -        tcg_out_arith(s, ARITH_AND, ext, args[0], args[1], args[2], 0);
> +        tcg_out_arith(s, ARITH_AND, ext, a0, a1, a2, 0);
>          break;
>  
>      case INDEX_op_or_i64:
>      case INDEX_op_or_i32:
> -        tcg_out_arith(s, ARITH_OR, ext, args[0], args[1], args[2], 0);
> +        tcg_out_arith(s, ARITH_OR, ext, a0, a1, a2, 0);
>          break;
>  
>      case INDEX_op_xor_i64:
>      case INDEX_op_xor_i32:
> -        tcg_out_arith(s, ARITH_XOR, ext, args[0], args[1], args[2], 0);
> +        tcg_out_arith(s, ARITH_XOR, ext, a0, a1, a2, 0);
>          break;
>  
>      case INDEX_op_mul_i64:
>      case INDEX_op_mul_i32:
> -        tcg_out_mul(s, ext, args[0], args[1], args[2]);
> +        tcg_out_mul(s, ext, a0, a1, a2);
>          break;
>  
>      case INDEX_op_shl_i64:
>      case INDEX_op_shl_i32:
> -        if (const_args[2]) {    /* LSL / UBFM Wd, Wn, (32 - m) */
> -            tcg_out_shl(s, ext, args[0], args[1], args[2]);
> +        if (c2) {    /* LSL / UBFM Wd, Wn, (32 - m) */
> +            tcg_out_shl(s, ext, a0, a1, a2);
>          } else {                /* LSL / LSLV */
> -            tcg_out_shiftrot_reg(s, SRR_SHL, ext, args[0], args[1], args[2]);
> +            tcg_out_shiftrot_reg(s, SRR_SHL, ext, a0, a1, a2);
>          }
>          break;
>  
>      case INDEX_op_shr_i64:
>      case INDEX_op_shr_i32:
> -        if (const_args[2]) {    /* LSR / UBFM Wd, Wn, m, 31 */
> -            tcg_out_shr(s, ext, args[0], args[1], args[2]);
> +        if (c2) {    /* LSR / UBFM Wd, Wn, m, 31 */
> +            tcg_out_shr(s, ext, a0, a1, a2);
>          } else {                /* LSR / LSRV */
> -            tcg_out_shiftrot_reg(s, SRR_SHR, ext, args[0], args[1], args[2]);
> +            tcg_out_shiftrot_reg(s, SRR_SHR, ext, a0, a1, a2);
>          }
>          break;
>  
>      case INDEX_op_sar_i64:
>      case INDEX_op_sar_i32:
> -        if (const_args[2]) {    /* ASR / SBFM Wd, Wn, m, 31 */
> -            tcg_out_sar(s, ext, args[0], args[1], args[2]);
> +        if (c2) {    /* ASR / SBFM Wd, Wn, m, 31 */
> +            tcg_out_sar(s, ext, a0, a1, a2);
>          } else {                /* ASR / ASRV */
> -            tcg_out_shiftrot_reg(s, SRR_SAR, ext, args[0], args[1], args[2]);
> +            tcg_out_shiftrot_reg(s, SRR_SAR, ext, a0, a1, a2);
>          }
>          break;
>  
>      case INDEX_op_rotr_i64:
>      case INDEX_op_rotr_i32:
> -        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, m */
> -            tcg_out_rotr(s, ext, args[0], args[1], args[2]);
> +        if (c2) {    /* ROR / EXTR Wd, Wm, Wm, m */
> +            tcg_out_rotr(s, ext, a0, a1, a2);
>          } else {                /* ROR / RORV */
> -            tcg_out_shiftrot_reg(s, SRR_ROR, ext, args[0], args[1], args[2]);
> +            tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, a2);
>          }
>          break;
>  
>      case INDEX_op_rotl_i64:
>      case INDEX_op_rotl_i32:     /* same as rotate right by (32 - m) */
> -        if (const_args[2]) {    /* ROR / EXTR Wd, Wm, Wm, 32 - m */
> -            tcg_out_rotl(s, ext, args[0], args[1], args[2]);
> +        if (c2) {    /* ROR / EXTR Wd, Wm, Wm, 32 - m */
> +            tcg_out_rotl(s, ext, a0, a1, a2);
>          } else {
> -            tcg_out_arith(s, ARITH_SUB, 0,
> -                          TCG_REG_TMP, TCG_REG_XZR, args[2], 0);
> -            tcg_out_shiftrot_reg(s, SRR_ROR, ext,
> -                                 args[0], args[1], TCG_REG_TMP);
> +            tcg_out_arith(s, ARITH_SUB, 0, TCG_REG_TMP, TCG_REG_XZR, a2, 0);
> +            tcg_out_shiftrot_reg(s, SRR_ROR, ext, a0, a1, TCG_REG_TMP);
>          }
>          break;
>  
>      case INDEX_op_brcond_i64:
> -    case INDEX_op_brcond_i32: /* CMP 0, 1, cond(2), label 3 */
> -        tcg_out_cmp(s, ext, args[0], args[1], 0);
> -        tcg_out_goto_label_cond(s, args[2], args[3]);
> +    case INDEX_op_brcond_i32:
> +        tcg_out_cmp(s, ext, a0, a1, 0);
> +        tcg_out_goto_label_cond(s, a2, args[3]);
>          break;
>  
>      case INDEX_op_setcond_i64:
>      case INDEX_op_setcond_i32:
> -        tcg_out_cmp(s, ext, args[1], args[2], 0);
> -        tcg_out_cset(s, 0, args[0], args[3]);
> +        tcg_out_cmp(s, ext, a1, a2, 0);
> +        tcg_out_cset(s, 0, a0, args[3]);
>          break;
>  
>      case INDEX_op_qemu_ld8u:
> @@ -1305,34 +1310,34 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          /* FALLTHRU */
>      case INDEX_op_bswap64_i64:
>      case INDEX_op_bswap32_i32:
> -        tcg_out_rev(s, ext, args[0], args[1]);
> +        tcg_out_rev(s, ext, a0, a1);
>          break;
>      case INDEX_op_bswap16_i64:
>      case INDEX_op_bswap16_i32:
> -        tcg_out_rev16(s, 0, args[0], args[1]);
> +        tcg_out_rev16(s, 0, a0, a1);
>          break;
>  
>      case INDEX_op_ext8s_i64:
>      case INDEX_op_ext8s_i32:
> -        tcg_out_sxt(s, ext, 0, args[0], args[1]);
> +        tcg_out_sxt(s, ext, 0, a0, a1);
>          break;
>      case INDEX_op_ext16s_i64:
>      case INDEX_op_ext16s_i32:
> -        tcg_out_sxt(s, ext, 1, args[0], args[1]);
> +        tcg_out_sxt(s, ext, 1, a0, a1);
>          break;
>      case INDEX_op_ext32s_i64:
> -        tcg_out_sxt(s, 1, 2, args[0], args[1]);
> +        tcg_out_sxt(s, 1, 2, a0, a1);
>          break;
>      case INDEX_op_ext8u_i64:
>      case INDEX_op_ext8u_i32:
> -        tcg_out_uxt(s, 0, args[0], args[1]);
> +        tcg_out_uxt(s, 0, a0, a1);
>          break;
>      case INDEX_op_ext16u_i64:
>      case INDEX_op_ext16u_i32:
> -        tcg_out_uxt(s, 1, args[0], args[1]);
> +        tcg_out_uxt(s, 1, a0, a1);
>          break;
>      case INDEX_op_ext32u_i64:
> -        tcg_out_movr(s, 0, args[0], args[1]);
> +        tcg_out_movr(s, 0, a0, a1);
>          break;
>  
>      case INDEX_op_mov_i64:
> 

Claudio





reply via email to

[Prev in Thread] Current Thread [Next in Thread]