qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations.


From: Aurelien Jarno
Subject: Re: [Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations.
Date: Mon, 19 Apr 2010 00:13:02 +0200
User-agent: Mutt/1.5.20 (2009-06-14)

On Tue, Apr 13, 2010 at 04:33:59PM -0700, Richard Henderson wrote:
> Define OPC_BSWAP.  Factor opcode emission to separate functions.
> Use bswap+shift to implement 16-bit swap instead of a rolw; this
> gets the proper zero-extension required by INDEX_op_bswap16_i32.

This is not required by INDEX_op_bswap16_i32. What is need is that the
value in the input register has the 16 upper bits set to 0. Considering
that, the rolw instruction is faster than bswap + shift.

> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  tcg/i386/tcg-target.c |   53 
> +++++++++++++++++++++++++------------------------
>  1 files changed, 27 insertions(+), 26 deletions(-)
> 
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 75b9915..0bafd00 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -163,6 +163,7 @@ static inline int tcg_target_const_match(tcg_target_long 
> val,
>  
>  #define P_EXT   0x100 /* 0x0f opcode prefix */
>  
> +#define OPC_BSWAP    (0xc8 | P_EXT)
>  #define OPC_MOVZBL   (0xb6 | P_EXT)
>  #define OPC_MOVZWL   (0xb7 | P_EXT)
>  #define OPC_MOVSBL   (0xbe | P_EXT)

> @@ -339,6 +340,22 @@ static inline void tcg_out_ext16s(TCGContext *s, int 
> dest, int src)
>      tcg_out_modrm(s, OPC_MOVSWL, dest, src);
>  }
>  
> +static inline void tcg_out_bswap32(TCGContext *s, int reg)
> +{
> +    tcg_out_opc(s, OPC_BSWAP + reg);
> +}
> +
> +static inline void tcg_out_bswap16(TCGContext *s, int reg, int sign)
> +{
> +    /* This swap+shift combination guarantees that the high part contains
> +       the sign or zero extension required.  It also doesn't suffer the
> +       problem of partial register stalls that using rolw does.  */
> +    tcg_out_bswap32(s, reg);
> +    /* shr $16, dest */
> +    tcg_out_modrm(s, 0xc1, (sign ? SHIFT_SAR : SHIFT_SHR), reg);
> +    tcg_out8(s, 16);
> +}
> +
>  static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, 
> int cf)
>  {
>      if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == 
> -1))) {
> @@ -745,31 +762,21 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>          /* movzwl */
>          tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
>          if (bswap) {
> -            /* rolw $8, data_reg */
> -            tcg_out8(s, 0x66); 
> -            tcg_out_modrm(s, 0xc1, 0, data_reg);
> -            tcg_out8(s, 8);
> +            tcg_out_bswap16(s, data_reg, 0);
>          }
>          break;
>      case 1 | 4:
>          /* movswl */
>          tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
>          if (bswap) {
> -            /* rolw $8, data_reg */
> -            tcg_out8(s, 0x66); 
> -            tcg_out_modrm(s, 0xc1, 0, data_reg);
> -            tcg_out8(s, 8);
> -
> -            /* movswl data_reg, data_reg */
> -            tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
> +            tcg_out_bswap16(s, data_reg, 1);
>          }
>          break;
>      case 2:
>          /* movl (r0), data_reg */
>          tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
>          if (bswap) {
> -            /* bswap */
> -            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
> +            tcg_out_bswap32(s, data_reg);
>          }
>          break;
>      case 3:
> @@ -786,11 +793,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
> *args,
>              tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4);
>          } else {
>              tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4);
> -            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
> +            tcg_out_bswap32(s, data_reg);
>  
>              tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE);
> -            /* bswap */
> -            tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT);
> +            tcg_out_bswap32(s, data_reg2);
>          }
>          break;
>      default:
> @@ -982,8 +988,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      case 2:
>          if (bswap) {
>              tcg_out_mov(s, r1, data_reg);
> -            /* bswap data_reg */
> -            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
> +            tcg_out_bswap32(s, r1);
>              data_reg = r1;
>          }
>          /* movl */
> @@ -992,12 +997,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
> *args,
>      case 3:
>          if (bswap) {
>              tcg_out_mov(s, r1, data_reg2);
> -            /* bswap data_reg */
> -            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
> +            tcg_out_bswap32(s, r1);
>              tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE);
>              tcg_out_mov(s, r1, data_reg);
> -            /* bswap data_reg */
> -            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
> +            tcg_out_bswap32(s, r1);
>              tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4);
>          } else {
>              tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
> @@ -1195,12 +1198,10 @@ static inline void tcg_out_op(TCGContext *s, 
> TCGOpcode opc,
>          break;
>  
>      case INDEX_op_bswap16_i32:
> -        tcg_out8(s, 0x66);
> -        tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
> -        tcg_out8(s, 8);
> +        tcg_out_bswap16(s, args[0], 0);
>          break;
>      case INDEX_op_bswap32_i32:
> -        tcg_out_opc(s, (0xc8 + args[0]) | P_EXT);
> +        tcg_out_bswap32(s, args[0]);
>          break;
>  
>      case INDEX_op_neg_i32:
> -- 
> 1.6.2.5
> 
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net




reply via email to

[Prev in Thread] Current Thread [Next in Thread]