[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG t
From: |
David Gibson |
Subject: |
Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes |
Date: |
Mon, 16 Nov 2015 16:00:16 +1100 |
User-agent: |
Mutt/1.5.23 (2015-06-09) |
On Wed, Nov 11, 2015 at 11:27:16AM +1100, Benjamin Herrenschmidt wrote:
> On ppc64 especially, we flush the tlb on any slbie or tlbie instruction.
>
> However, those instructions often come in bursts of 3 or more (context
> switch will favor a series of slbie's for example to an slbia if the
> SLB has less than a certain number of entries in it, and tlbie's can
> happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries
> at a time.
>
> Doing a tlb_flush() each time is a waste of time. We end up doing a memset
> of the whole TLB, reloading it for the next instruction, memset'ing again,
> etc...
>
> Those instructions don't have to take effect immediately. For slbie, they
> can wait for the next context synchronizing event. For tlbie, the next
> tlbsync.
>
> This implements batching by keeping a flag that indicates that we have a
> TLB in need of flushing. We check it on interrupts, rfi's, isync's and
> tlbsync and flush the TLB if needed.
>
> This reduces the number of tlb_flush() on a boot to a ubuntu installer
> first dialog screen from roughly 360K down to 36K.
>
> Signed-off-by: Benjamin Herrenschmidt <address@hidden>
> ---
> hw/ppc/spapr_hcall.c | 12 +++++++++---
> target-ppc/cpu.h | 2 ++
> target-ppc/excp_helper.c | 9 +++++++++
> target-ppc/helper.h | 1 +
> target-ppc/helper_regs.h | 13 +++++++++++++
> target-ppc/mmu-hash64.c | 12 +++---------
> target-ppc/mmu_helper.c | 9 ++++++++-
> target-ppc/translate.c | 39 ++++++++++++++++++++++++++++++++++++---
> 8 files changed, 81 insertions(+), 16 deletions(-)
>
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index cebceea..7e2cb4b 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -220,6 +220,7 @@ static target_ulong h_remove(PowerPCCPU *cpu,
> sPAPRMachineState *spapr,
>
> switch (ret) {
> case REMOVE_SUCCESS:
> + check_tlb_flush(env);
> return H_SUCCESS;
>
> case REMOVE_NOT_FOUND:
> @@ -257,6 +258,7 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu,
> sPAPRMachineState *spapr,
> target_ulong opcode, target_ulong *args)
> {
> CPUPPCState *env = &cpu->env;
> + target_ulong rc = H_SUCCESS;
> int i;
>
> for (i = 0; i < H_BULK_REMOVE_MAX_BATCH; i++) {
> @@ -290,14 +292,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu,
> sPAPRMachineState *spapr,
> break;
>
> case REMOVE_PARM:
> - return H_PARAMETER;
> + rc = H_PARAMETER;
> + goto exit;
>
> case REMOVE_HW:
> - return H_HARDWARE;
> + rc = H_HARDWARE;
> + goto exit;
> }
> }
> + exit:
> + check_tlb_flush(env);
>
> - return H_SUCCESS;
> + return rc;
> }
>
> static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index aaa7117..e6c43f9 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1013,6 +1013,8 @@ struct CPUPPCState {
> /* PowerPC 64 SLB area */
> ppc_slb_t slb[MAX_SLB_ENTRIES];
> int32_t slb_nr;
> + /* tcg TLB needs flush (deferred slb inval instruction typically) */
> + uint32_t tlb_need_flush;
> #endif
> /* segment registers */
> hwaddr htab_base;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index 3e39098..c1d6605 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -671,6 +671,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int
> excp_model, int excp)
> /* Reset exception state */
> cs->exception_index = POWERPC_EXCP_NONE;
> env->error_code = 0;
> +
> + /* Any interrupt is context synchronizing, check if TCG TLB
> + * needs a delayed flush on ppc64
> + */
> + check_tlb_flush(env);
> }
>
> void ppc_cpu_do_interrupt(CPUState *cs)
> @@ -692,6 +697,7 @@ static void ppc_hw_interrupt(CPUPPCState *env)
> __func__, env, env->pending_interrupts,
> cs->interrupt_request, (int)msr_me, (int)msr_ee);
> #endif
> +
> /* External reset */
> if (env->pending_interrupts & (1 << PPC_INTERRUPT_RESET)) {
> env->pending_interrupts &= ~(1 << PPC_INTERRUPT_RESET);
> @@ -896,6 +902,9 @@ static inline void do_rfi(CPUPPCState *env, target_ulong
> nip, target_ulong msr,
> * as rfi is always the last insn of a TB
> */
> cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +
> + /* Context synchronizing: check if TCG TLB needs flush */
> + check_tlb_flush(env);
> }
>
> void helper_rfi(CPUPPCState *env)
> diff --git a/target-ppc/helper.h b/target-ppc/helper.h
> index 869be15..ff2d50b 100644
> --- a/target-ppc/helper.h
> +++ b/target-ppc/helper.h
> @@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env)
> DEF_HELPER_1(rfid, void, env)
> DEF_HELPER_1(hrfid, void, env)
> #endif
> +DEF_HELPER_1(check_tlb_flush, void, env)
> #endif
>
> DEF_HELPER_3(lmw, void, env, tl, i32)
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index f7edd5b..57da931 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env,
> target_ulong value,
> return excp;
> }
>
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void check_tlb_flush(CPUPPCState *env)
> +{
> + CPUState *cs = CPU(ppc_env_get_cpu(env));
> + if (env->tlb_need_flush) {
> + env->tlb_need_flush = 0;
> + tlb_flush(cs, 1);
> + }
> +}
> +#else
> +static inline void check_tlb_flush(CPUPPCState *env) { }
> +#endif
> +
> #endif /* !defined(__HELPER_REGS_H__) */
> diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
> index 7df6ede..71e1d14 100644
> --- a/target-ppc/mmu-hash64.c
> +++ b/target-ppc/mmu-hash64.c
> @@ -97,10 +97,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf,
> CPUPPCState *env)
>
> void helper_slbia(CPUPPCState *env)
> {
> - PowerPCCPU *cpu = ppc_env_get_cpu(env);
> - int n, do_invalidate;
> + int n;
>
> - do_invalidate = 0;
> /* XXX: Warning: slbia never invalidates the first segment */
> for (n = 1; n < env->slb_nr; n++) {
> ppc_slb_t *slb = &env->slb[n];
> @@ -111,17 +109,13 @@ void helper_slbia(CPUPPCState *env)
> * and we still don't have a tlb_flush_mask(env, n, mask)
> * in QEMU, we just invalidate all TLBs
> */
> - do_invalidate = 1;
> + env->tlb_need_flush = true;
> }
> }
> - if (do_invalidate) {
> - tlb_flush(CPU(cpu), 1);
> - }
> }
>
> void helper_slbie(CPUPPCState *env, target_ulong addr)
> {
> - PowerPCCPU *cpu = ppc_env_get_cpu(env);
> ppc_slb_t *slb;
>
> slb = slb_lookup(env, addr);
> @@ -136,7 +130,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr)
> * and we still don't have a tlb_flush_mask(env, n, mask)
> * in QEMU, we just invalidate all TLBs
> */
> - tlb_flush(CPU(cpu), 1);
> + env->tlb_need_flush = true;
> }
> }
>
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index e52d0e5..54bc5d1 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -23,6 +23,7 @@
> #include "mmu-hash64.h"
> #include "mmu-hash32.h"
> #include "exec/cpu_ldst.h"
> +#include "helper_regs.h"
>
> //#define DEBUG_MMU
> //#define DEBUG_BATS
> @@ -1940,6 +1941,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> case POWERPC_MMU_2_03:
> case POWERPC_MMU_2_06:
> case POWERPC_MMU_2_07:
> + env->tlb_need_flush = 0;
> #endif /* defined(TARGET_PPC64) */
> tlb_flush(CPU(cpu), 1);
> break;
Any particular reason you're leaving this one as an immediate rather
than deferred flush?
> @@ -2019,7 +2021,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env,
> target_ulong addr)
> * and we still don't have a tlb_flush_mask(env, n, mask) in
> QEMU,
> * we just invalidate all TLBs
> */
> - tlb_flush(CPU(cpu), 1);
> + env->tlb_need_flush = 1;
> break;
> #endif /* defined(TARGET_PPC64) */
> default:
> @@ -2904,6 +2906,11 @@ void helper_booke206_tlbflush(CPUPPCState *env,
> target_ulong type)
> }
>
>
> +void helper_check_tlb_flush(CPUPPCState *env)
> +{
> + check_tlb_flush(env);
> +}
> +
>
> /*****************************************************************************/
>
> /* try to fill the TLB and return an exception if error. If retaddr is
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 6d9f252..e18d204 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -3299,9 +3299,32 @@ static void gen_eieio(DisasContext *ctx)
> {
> }
>
> +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +static inline void gen_check_tlb_flush(DisasContext *ctx)
> +{
> + TCGv_i32 t = tcg_temp_new_i32();
> + TCGLabel *l = gen_new_label();
> +
> + tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> + tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> + gen_helper_check_tlb_flush(cpu_env);
> + gen_set_label(l);
> + tcg_temp_free_i32(t);
> +}
> +#else
> +static inline void gen_check_tlb_flush(DisasContext *ctx) { }
> +#endif
> +
> /* isync */
> static void gen_isync(DisasContext *ctx)
> {
> + /*
> + * We need to check for a pending TLB flush. This can only happen in
> + * kernel mode however so check MSR_PR
> + */
> + if (!ctx->pr) {
> + gen_check_tlb_flush(ctx);
> + }
> gen_stop_exception(ctx);
> }
>
> @@ -3458,6 +3481,15 @@ STCX(stqcx_, 16);
> /* sync */
> static void gen_sync(DisasContext *ctx)
> {
> + uint32_t l = (ctx->opcode >> 21) & 3;
> +
> + /*
> + * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
> + * This can only happen in kernel mode however so check MSR_PR as well.
> + */
> + if (l == 2 && !ctx->pr) {
> + gen_check_tlb_flush(ctx);
> + }
> }
>
> /* wait */
> @@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx)
> gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);
> return;
> }
> - /* This has no effect: it should ensure that all previous
> - * tlbie have completed
> + /* tlbsync is a nop for server, ptesync handles delayed tlb flush,
> + * embedded however needs to deal with tlbsync. We don't try to be
> + * fancy and swallow the overhead of checking for both.
> */
> - gen_stop_exception(ctx);
> + gen_check_tlb_flush(ctx);
> #endif
> }
>
Should you be clearing the pending flush flag cpu_reset()?
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
signature.asc
Description: PGP signature
- Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/77] ppc: Use split I/D mmu modes to avoid flushes on interrupts, (continued)
[Qemu-devel] [PATCH 05/77] ppc: Update SPR definitions, Benjamin Herrenschmidt, 2015/11/10
[Qemu-devel] [PATCH 11/77] ppc: Create cpu_ppc_set_papr() helper, Benjamin Herrenschmidt, 2015/11/10
[Qemu-devel] [PATCH 12/77] ppc: Better figure out if processor has HV mode, Benjamin Herrenschmidt, 2015/11/10
[Qemu-devel] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes, Benjamin Herrenschmidt, 2015/11/10
- Re: [Qemu-devel] [Qemu-ppc] [PATCH 03/77] ppc: Do some batching of TCG tlb flushes,
David Gibson <=
[Qemu-devel] [PATCH 09/77] ppc: Fix do_rfi() for rfi emulation, Benjamin Herrenschmidt, 2015/11/10
[Qemu-devel] [PATCH 08/77] ppc: Add number of threads per core to the processor definition, Benjamin Herrenschmidt, 2015/11/10
[Qemu-devel] [PATCH 06/77] ppc: Add macros to register hypervisor mode SPRs, Benjamin Herrenschmidt, 2015/11/10