qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 03/22] tcg-i386: Tidy ext8u and ext16u operation


From: Aurelien Jarno
Subject: Re: [Qemu-devel] [PATCH 03/22] tcg-i386: Tidy ext8u and ext16u operations.
Date: Wed, 19 May 2010 08:47:13 +0200
User-agent: Mutt/1.5.20 (2009-06-14)

Hi,

On Tue, Apr 13, 2010 at 03:59:20PM -0700, Richard Henderson wrote:
> Define OPC_MOVZBL and OPC_MOVZWL.  Factor opcode emission to
> separate functions.  Don't restrict the input register to the
> low 4 "q" registers; emit an AND instead if needed.

I am fine about the cleaning part, but I don't know what to think about
the constraints change.

The reg allocator is able to issue move if needed, so the only
improvement this patch is for doing a ext8u on both "q" registers.

OTOH the reg allocator knows this situation and will try to avoid this
situation during the allocation. Cheating on the reg allocator might
have some wrong effects, especially after your patch "Allocate
call-saved registers first". I am thinking of the scenario where the
value is in memory (which is likely to be the case given the limited 
number of registers), it will be likely loaded in a "r" register (they
are now at the top priority), and then ext8u will be called, which will 
issue "mov" + "and" instructions instead of a "movzbl" instruction.

If there are still cases to optimize I think it should be done in the
reg allocator instead, so it could benefit all ops and all targets. I
have started to play on that with Laurent Desnogues, I have a few more
ideas how to improve it, but unfortunately I don't have time to code
them.

All of that is purely theoretical. Do you know how does it behave in 
practice?

> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  tcg/i386/tcg-target.c |   68 ++++++++++++++++++++++++++++++------------------
>  1 files changed, 42 insertions(+), 26 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 359f81b..2cc1191 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -161,6 +161,11 @@ static inline int tcg_target_const_match(tcg_target_long 
> val,
>          return 0;
>  }
>  
> +#define P_EXT   0x100 /* 0x0f opcode prefix */
> +
> +#define OPC_MOVZBL   (0xb6 | P_EXT)
> +#define OPC_MOVZWL   (0xb7 | P_EXT)
> +
>  #define ARITH_ADD 0
>  #define ARITH_OR  1
>  #define ARITH_ADC 2
> @@ -194,8 +199,6 @@ static inline int tcg_target_const_match(tcg_target_long 
> val,
>  #define JCC_JLE 0xe
>  #define JCC_JG  0xf
>  
> -#define P_EXT   0x100 /* 0x0f opcode prefix */
> -
>  static const uint8_t tcg_cond_to_jcc[10] = {
>      [TCG_COND_EQ] = JCC_JE,
>      [TCG_COND_NE] = JCC_JNE,
> @@ -288,6 +291,27 @@ static inline void tcg_out_st(TCGContext *s, TCGType 
> type, int arg,
>      tcg_out_modrm_offset(s, 0x89, arg, arg1, arg2);
>  }
>  
> +static void tcg_out_ext8u(TCGContext *s, int dest, int src)
> +{
> +    if (src >= 4) {
> +        tcg_out_mov(s, dest, src);
> +        if (dest >= 4) {
> +            tcg_out_modrm(s, 0x81, ARITH_AND, dest);
> +            tcg_out32(s, 0xff);
> +            return;
> +        }
> +        src = dest;
> +    }
> +    /* movzbl */
> +    tcg_out_modrm(s, OPC_MOVZBL, dest, src);
> +}
> +
> +static inline void tcg_out_ext16u(TCGContext *s, int dest, int src)
> +{
> +    /* movzwl */
> +    tcg_out_modrm(s, OPC_MOVZWL, dest, src);
> +}
> +
>  static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, 
> int cf)
>  {
>      if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == 
> -1))) {
> @@ -300,11 +324,9 @@ static inline void tgen_arithi(TCGContext *s, int c, int 
> r0, int32_t val, int cf
>          tcg_out_modrm(s, 0x83, c, r0);
>          tcg_out8(s, val);
>      } else if (c == ARITH_AND && val == 0xffu && r0 < 4) {
> -        /* movzbl */
> -        tcg_out_modrm(s, 0xb6 | P_EXT, r0, r0);
> +        tcg_out_ext8u(s, r0, r0);
>      } else if (c == ARITH_AND && val == 0xffffu) {
> -        /* movzwl */
> -        tcg_out_modrm(s, 0xb7 | P_EXT, r0, r0);
> +        tcg_out_ext16u(s, r0, r0);
>      } else {
>          tcg_out_modrm(s, 0x81, c, r0);
>          tcg_out32(s, val);
> @@ -645,12 +667,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>          tcg_out_modrm(s, 0xbf | P_EXT, data_reg, TCG_REG_EAX);
>          break;
>      case 0:
> -        /* movzbl */
> -        tcg_out_modrm(s, 0xb6 | P_EXT, data_reg, TCG_REG_EAX);
> +        tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
>          break;
>      case 1:
> -        /* movzwl */
> -        tcg_out_modrm(s, 0xb7 | P_EXT, data_reg, TCG_REG_EAX);
> +        tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
>          break;
>      case 2:
>      default:
> @@ -690,7 +710,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>      switch(opc) {
>      case 0:
>          /* movzbl */
> -        tcg_out_modrm_offset(s, 0xb6 | P_EXT, data_reg, r0, GUEST_BASE);
> +        tcg_out_modrm_offset(s, OPC_MOVZBL, data_reg, r0, GUEST_BASE);
>          break;
>      case 0 | 4:
>          /* movsbl */
> @@ -698,7 +718,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>          break;
>      case 1:
>          /* movzwl */
> -        tcg_out_modrm_offset(s, 0xb7 | P_EXT, data_reg, r0, GUEST_BASE);
> +        tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
>          if (bswap) {
>              /* rolw $8, data_reg */
>              tcg_out8(s, 0x66); 
> @@ -850,12 +870,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      } else {
>          switch(opc) {
>          case 0:
> -            /* movzbl */
> -            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_EDX, data_reg);
> +            tcg_out_ext8u(s, TCG_REG_EDX, data_reg);
>              break;
>          case 1:
> -            /* movzwl */
> -            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_EDX, data_reg);
> +            tcg_out_ext16u(s, TCG_REG_EDX, data_reg);
>              break;
>          case 2:
>              tcg_out_mov(s, TCG_REG_EDX, data_reg);
> @@ -881,12 +899,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>          tcg_out_mov(s, TCG_REG_EDX, addr_reg2);
>          switch(opc) {
>          case 0:
> -            /* movzbl */
> -            tcg_out_modrm(s, 0xb6 | P_EXT, TCG_REG_ECX, data_reg);
> +            tcg_out_ext8u(s, TCG_REG_ECX, data_reg);
>              break;
>          case 1:
> -            /* movzwl */
> -            tcg_out_modrm(s, 0xb7 | P_EXT, TCG_REG_ECX, data_reg);
> +            tcg_out_ext16u(s, TCG_REG_ECX, data_reg);
>              break;
>          case 2:
>              tcg_out_mov(s, TCG_REG_ECX, data_reg);
> @@ -1022,7 +1038,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
> opc,
>          break;
>      case INDEX_op_ld8u_i32:
>          /* movzbl */
> -        tcg_out_modrm_offset(s, 0xb6 | P_EXT, args[0], args[1], args[2]);
> +        tcg_out_modrm_offset(s, OPC_MOVZBL, args[0], args[1], args[2]);
>          break;
>      case INDEX_op_ld8s_i32:
>          /* movsbl */
> @@ -1030,7 +1046,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
> opc,
>          break;
>      case INDEX_op_ld16u_i32:
>          /* movzwl */
> -        tcg_out_modrm_offset(s, 0xb7 | P_EXT, args[0], args[1], args[2]);
> +        tcg_out_modrm_offset(s, OPC_MOVZWL, args[0], args[1], args[2]);
>          break;
>      case INDEX_op_ld16s_i32:
>          /* movswl */
> @@ -1177,10 +1193,10 @@ static inline void tcg_out_op(TCGContext *s, 
> TCGOpcode opc,
>          tcg_out_modrm(s, 0xbf | P_EXT, args[0], args[1]);
>          break;
>      case INDEX_op_ext8u_i32:
> -        tcg_out_modrm(s, 0xb6 | P_EXT, args[0], args[1]);
> +        tcg_out_ext8u(s, args[0], args[1]);
>          break;
>      case INDEX_op_ext16u_i32:
> -        tcg_out_modrm(s, 0xb7 | P_EXT, args[0], args[1]);
> +        tcg_out_ext16u(s, args[0], args[1]);
>          break;
>  
>      case INDEX_op_setcond_i32:
> @@ -1275,8 +1291,8 @@ static const TCGTargetOpDef x86_op_defs[] = {
>  
>      { INDEX_op_ext8s_i32, { "r", "q" } },
>      { INDEX_op_ext16s_i32, { "r", "r" } },
> -    { INDEX_op_ext8u_i32, { "r", "q"} },
> -    { INDEX_op_ext16u_i32, { "r", "r"} },
> +    { INDEX_op_ext8u_i32, { "r", "r" } },
> +    { INDEX_op_ext16u_i32, { "r", "r" } },
>  
>      { INDEX_op_setcond_i32, { "q", "r", "ri" } },
>      { INDEX_op_setcond2_i32, { "r", "r", "r", "ri", "ri" } },
> -- 
> 1.6.6.1
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net



reply via email to

[Prev in Thread] Current Thread [Next in Thread]