qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 02/13] target/ppc: Use atomic load for LQ and LQ


From: David Gibson
Subject: Re: [Qemu-devel] [PATCH 02/13] target/ppc: Use atomic load for LQ and LQARX
Date: Thu, 28 Jun 2018 13:49:55 +1000
User-agent: Mutt/1.10.0 (2018-05-17)

On Tue, Jun 26, 2018 at 09:19:10AM -0700, Richard Henderson wrote:
> Section 1.4 of the Power ISA v3.0B states that both of these
> instructions are single-copy atomic.  As we cannot (yet) issue
> 128-bit loads within TCG, use the generic helpers provided.
> 
> Since TCG cannot (yet) return a 128-bit value, add a slot within
> CPUPPCState for returning the high half of a 128-bit return value.
> This solution is preferred to the helper assigning to architectural
> registers directly, as it avoids clobbering all TCG live values.
> 
> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  target/ppc/cpu.h        |  3 ++
>  target/ppc/helper.h     |  5 +++
>  target/ppc/mem_helper.c | 20 ++++++++-
>  target/ppc/translate.c  | 93 ++++++++++++++++++++++++++++++-----------
>  4 files changed, 95 insertions(+), 26 deletions(-)
> 
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index c7f3fb6b73..973cf44cda 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -1015,6 +1015,9 @@ struct CPUPPCState {
>      /* Next instruction pointer */
>      target_ulong nip;
>  
> +    /* High part of 128-bit helper return.  */
> +    uint64_t retxh;
> +

Adding a temporary here is kind of gross.  I guess the helper
interface doesn't allow for 128-bit returns, but couldn't you pass a
register number into the helper and have it update the right GPR
without going through a temp?

>      int access_type; /* when a memory exception occurs, the access
>                          type is stored here */
>  
> diff --git a/target/ppc/helper.h b/target/ppc/helper.h
> index d751f0e219..3f451a5d7e 100644
> --- a/target/ppc/helper.h
> +++ b/target/ppc/helper.h
> @@ -799,3 +799,8 @@ DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
>  
>  DEF_HELPER_1(tbegin, void, env)
>  DEF_HELPER_FLAGS_1(fixup_thrm, TCG_CALL_NO_RWG, void, env)
> +
> +#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
> +DEF_HELPER_FLAGS_3(lq_le_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
> +DEF_HELPER_FLAGS_3(lq_be_parallel, TCG_CALL_NO_WG, i64, env, tl, i32)
> +#endif
> diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
> index a34e604db3..44a8f3445a 100644
> --- a/target/ppc/mem_helper.c
> +++ b/target/ppc/mem_helper.c
> @@ -21,9 +21,9 @@
>  #include "exec/exec-all.h"
>  #include "qemu/host-utils.h"
>  #include "exec/helper-proto.h"
> -
>  #include "helper_regs.h"
>  #include "exec/cpu_ldst.h"
> +#include "tcg.h"
>  #include "internal.h"
>  
>  //#define DEBUG_OP
> @@ -215,6 +215,24 @@ target_ulong helper_lscbx(CPUPPCState *env, target_ulong 
> addr, uint32_t reg,
>      return i;
>  }
>  
> +#if defined(TARGET_PPC64) && defined(CONFIG_ATOMIC128)
> +uint64_t helper_lq_le_parallel(CPUPPCState *env, target_ulong addr,
> +                               uint32_t opidx)
> +{
> +    Int128 ret = helper_atomic_ldo_le_mmu(env, addr, opidx, GETPC());
> +    env->retxh = int128_gethi(ret);
> +    return int128_getlo(ret);
> +}
> +
> +uint64_t helper_lq_be_parallel(CPUPPCState *env, target_ulong addr,
> +                               uint32_t opidx)
> +{
> +    Int128 ret = helper_atomic_ldo_be_mmu(env, addr, opidx, GETPC());
> +    env->retxh = int128_gethi(ret);
> +    return int128_getlo(ret);
> +}
> +#endif
> +
>  
> /*****************************************************************************/
>  /* Altivec extension helpers */
>  #if defined(HOST_WORDS_BIGENDIAN)
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index 3a215a1dc6..0923cc24e3 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -2607,7 +2607,7 @@ static void gen_ld(DisasContext *ctx)
>  static void gen_lq(DisasContext *ctx)
>  {
>      int ra, rd;
> -    TCGv EA;
> +    TCGv EA, hi, lo;
>  
>      /* lq is a legal user mode instruction starting in ISA 2.07 */
>      bool legal_in_user_mode = (ctx->insns_flags2 & PPC2_LSQ_ISA207) != 0;
> @@ -2633,16 +2633,35 @@ static void gen_lq(DisasContext *ctx)
>      EA = tcg_temp_new();
>      gen_addr_imm_index(ctx, EA, 0x0F);
>  
> -    /* We only need to swap high and low halves. gen_qemu_ld64_i64 does
> -       necessary 64-bit byteswap already. */
> -    if (unlikely(ctx->le_mode)) {
> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
> +    /* Note that the low part is always in RD+1, even in LE mode.  */
> +    lo = cpu_gpr[rd + 1];
> +    hi = cpu_gpr[rd];
> +
> +    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
> +#ifdef CONFIG_ATOMIC128
> +        TCGv_i32 oi = tcg_temp_new_i32();
> +        if (ctx->le_mode) {
> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ, ctx->mem_idx));
> +            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
> +        } else {
> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ, ctx->mem_idx));
> +            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
> +        }
> +        tcg_temp_free_i32(oi);
> +        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
> +#else
> +        /* Restart with exclusive lock.  */
> +        gen_helper_exit_atomic(cpu_env);
> +        ctx->base.is_jmp = DISAS_NORETURN;
> +#endif
> +    } else if (ctx->le_mode) {
> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ);
>          gen_addr_add(ctx, EA, EA, 8);
> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
>      } else {
> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd], EA);
> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ);
>          gen_addr_add(ctx, EA, EA, 8);
> -        gen_qemu_ld64_i64(ctx, cpu_gpr[rd + 1], EA);
> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
>      }
>      tcg_temp_free(EA);
>  }
> @@ -3236,9 +3255,8 @@ STCX(stdcx_, DEF_MEMOP(MO_Q))
>  /* lqarx */
>  static void gen_lqarx(DisasContext *ctx)
>  {
> -    TCGv EA;
>      int rd = rD(ctx->opcode);
> -    TCGv gpr1, gpr2;
> +    TCGv EA, hi, lo;
>  
>      if (unlikely((rd & 1) || (rd == rA(ctx->opcode)) ||
>                   (rd == rB(ctx->opcode)))) {
> @@ -3247,24 +3265,49 @@ static void gen_lqarx(DisasContext *ctx)
>      }
>  
>      gen_set_access_type(ctx, ACCESS_RES);
> -    EA = tcg_temp_local_new();
> +    EA = tcg_temp_new();
>      gen_addr_reg_index(ctx, EA);
> -    gen_check_align(ctx, EA, 15);
> -    if (unlikely(ctx->le_mode)) {
> -        gpr1 = cpu_gpr[rd+1];
> -        gpr2 = cpu_gpr[rd];
> -    } else {
> -        gpr1 = cpu_gpr[rd];
> -        gpr2 = cpu_gpr[rd+1];
> -    }
> -    tcg_gen_qemu_ld_i64(gpr1, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
> -    tcg_gen_mov_tl(cpu_reserve, EA);
> -    gen_addr_add(ctx, EA, EA, 8);
> -    tcg_gen_qemu_ld_i64(gpr2, EA, ctx->mem_idx, DEF_MEMOP(MO_Q));
>  
> -    tcg_gen_st_tl(gpr1, cpu_env, offsetof(CPUPPCState, reserve_val));
> -    tcg_gen_st_tl(gpr2, cpu_env, offsetof(CPUPPCState, reserve_val2));
> +    /* Note that the low part is always in RD+1, even in LE mode.  */
> +    lo = cpu_gpr[rd + 1];
> +    hi = cpu_gpr[rd];
> +
> +    if (tb_cflags(ctx->base.tb) & CF_PARALLEL) {
> +#ifdef CONFIG_ATOMIC128
> +        TCGv_i32 oi = tcg_temp_new_i32();
> +        if (ctx->le_mode) {
> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_LEQ | MO_ALIGN_16,
> +                                                ctx->mem_idx));
> +            gen_helper_lq_le_parallel(lo, cpu_env, EA, oi);
> +        } else {
> +            tcg_gen_movi_i32(oi, make_memop_idx(MO_BEQ | MO_ALIGN_16,
> +                                                ctx->mem_idx));
> +            gen_helper_lq_be_parallel(lo, cpu_env, EA, oi);
> +        }
> +        tcg_temp_free_i32(oi);
> +        tcg_gen_ld_i64(hi, cpu_env, offsetof(CPUPPCState, retxh));
> +#else
> +        /* Restart with exclusive lock.  */
> +        gen_helper_exit_atomic(cpu_env);
> +        ctx->base.is_jmp = DISAS_NORETURN;
> +        tcg_temp_free(EA);
> +        return;
> +#endif
> +    } else if (ctx->le_mode) {
> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_LEQ | MO_ALIGN_16);
> +        tcg_gen_mov_tl(cpu_reserve, EA);
> +        gen_addr_add(ctx, EA, EA, 8);
> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_LEQ);
> +    } else {
> +        tcg_gen_qemu_ld_i64(hi, EA, ctx->mem_idx, MO_BEQ | MO_ALIGN_16);
> +        tcg_gen_mov_tl(cpu_reserve, EA);
> +        gen_addr_add(ctx, EA, EA, 8);
> +        tcg_gen_qemu_ld_i64(lo, EA, ctx->mem_idx, MO_BEQ);
> +    }
>      tcg_temp_free(EA);
> +
> +    tcg_gen_st_tl(hi, cpu_env, offsetof(CPUPPCState, reserve_val));
> +    tcg_gen_st_tl(lo, cpu_env, offsetof(CPUPPCState, reserve_val2));
>  }
>  
>  /* stqcx. */

-- 
David Gibson                    | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
                                | _way_ _around_!
http://www.ozlabs.org/~dgibson

Attachment: signature.asc
Description: PGP signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]