[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB
From: |
Nicholas Piggin |
Subject: |
Re: [PATCH 2/4] target/ppc: Add recording of taken branches to BHRB |
Date: |
Fri, 15 Sep 2023 11:02:48 +1000 |
On Wed Sep 13, 2023 at 6:24 AM AEST, Glenn Miles wrote:
> This commit continues adding support for the Branch History
> Rolling Buffer (BHRB) as is provided starting with the P8
> processor and continuing with its successors. This commit
> is limited to the recording and filtering of taken branches.
>
> The following changes were made:
>
> - Added a BHRB buffer for storing branch instruction and
> target addresses for taken branches
> - Renamed gen_update_cfar to gen_update_branch_history and
> added a 'target' parameter to hold the branch target
> address and 'inst_type' parameter to use for filtering
> - Added a combination of jit-time and run-time checks to
> gen_update_branch_history for determining if a branch
> should be recorded
> - Added TCG code to gen_update_branch_history that stores
> data to the BHRB and updates the BHRB offset.
> - Added BHRB resource initialization and reset functions
> - Enabled functionality for P8, P9 and P10 processors.
>
> Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
> ---
> target/ppc/cpu.h | 18 +++-
> target/ppc/cpu_init.c | 41 ++++++++-
> target/ppc/helper_regs.c | 32 +++++++
> target/ppc/helper_regs.h | 1 +
> target/ppc/power8-pmu.c | 2 +
> target/ppc/power8-pmu.h | 7 ++
> target/ppc/translate.c | 114 +++++++++++++++++++++++--
> target/ppc/translate/branch-impl.c.inc | 2 +-
> 8 files changed, 205 insertions(+), 12 deletions(-)
>
> diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
> index 20ae1466a5..bda1afb700 100644
> --- a/target/ppc/cpu.h
> +++ b/target/ppc/cpu.h
> @@ -454,8 +454,9 @@ FIELD(MSR, LE, MSR_LE, 1)
> #define MMCR2_UREG_MASK (MMCR2_FC1P0 | MMCR2_FC2P0 | MMCR2_FC3P0 | \
> MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
>
> -#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording Disable */
> -
> +#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording Disable */
Fold this tidying into patch 1.
> +#define MMCRA_IFM_MASK PPC_BITMASK(32, 33) /* BHRB Instruction Filtering */
> +#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
>
> #define MMCR1_EVT_SIZE 8
> /* extract64() does a right shift before extracting */
> @@ -682,6 +683,8 @@ enum {
> POWERPC_FLAG_SMT = 0x00400000,
> /* Using "LPAR per core" mode (as opposed to per-thread)
> */
> POWERPC_FLAG_SMT_1LPAR = 0x00800000,
> + /* Has BHRB */
> + POWERPC_FLAG_BHRB = 0x01000000,
> };
Interesting question of which patch to add different flags. I'm
strongly in add when you add code that uses them like this one,
but it's a matter of taste and not always practical to be an
absolute rule. I don't mind too much what others do, but maybe
this and the pcc->flags init should go in patch 1 since that's adding
flags that aren't yet used?
>
> /*
> @@ -1110,6 +1113,9 @@ DEXCR_ASPECT(PHIE, 6)
> #define PPC_CPU_OPCODES_LEN 0x40
> #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
>
> +#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
> +#define BHRB_MAX_NUM_ENTRIES (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
> +
> struct CPUArchState {
> /* Most commonly used resources during translated code execution first */
> target_ulong gpr[32]; /* general purpose registers */
> @@ -1196,6 +1202,14 @@ struct CPUArchState {
> int dcache_line_size;
> int icache_line_size;
>
> + /* Branch History Rolling Buffer (BHRB) resources */
> + target_ulong bhrb_num_entries;
> + target_ulong bhrb_base;
> + target_ulong bhrb_filter;
> + target_ulong bhrb_offset;
> + target_ulong bhrb_offset_mask;
> + uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
Put these under ifdef TARGET_PPC64?
> +
> /* These resources are used during exception processing */
> /* CPU model definition */
> target_ulong msr_mask;
> diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
> index 568f9c3b88..19d7505a73 100644
> --- a/target/ppc/cpu_init.c
> +++ b/target/ppc/cpu_init.c
> @@ -6100,6 +6100,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
> pcc->l1_icache_size = 0x8000;
> }
>
> +static void bhrb_init_state(CPUPPCState *env, target_long num_entries_log2)
> +{
> + if (env->flags & POWERPC_FLAG_BHRB) {
> + if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
> + num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
> + }
> + env->bhrb_num_entries = 1 << num_entries_log2;
> + env->bhrb_base = (target_long)&env->bhrb[0];
> + env->bhrb_offset_mask = (env->bhrb_num_entries * sizeof(uint64_t)) -
> 1;
> + }
> +}
> +
> +static void bhrb_reset_state(CPUPPCState *env)
> +{
> + if (env->flags & POWERPC_FLAG_BHRB) {
> + env->bhrb_offset = 0;
> + env->bhrb_filter = 0;
> + memset(env->bhrb, 0, sizeof(env->bhrb));
> + }
> +}
> +
> +#define POWER8_BHRB_ENTRIES_LOG2 5
> static void init_proc_POWER8(CPUPPCState *env)
> {
> /* Common Registers */
> @@ -6141,6 +6163,8 @@ static void init_proc_POWER8(CPUPPCState *env)
> env->dcache_line_size = 128;
> env->icache_line_size = 128;
>
> + bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
> +
> /* Allocate hardware IRQ controller */
> init_excp_POWER8(env);
> ppcPOWER7_irq_init(env_archcpu(env));
> @@ -6241,7 +6265,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
> pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
> POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
> POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> - POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
> + POWERPC_FLAG_VSX | POWERPC_FLAG_TM |
> + POWERPC_FLAG_BHRB;
> pcc->l1_dcache_size = 0x8000;
> pcc->l1_icache_size = 0x8000;
> }
> @@ -6265,6 +6290,7 @@ static struct ppc_radix_page_info
> POWER9_radix_page_info = {
> };
> #endif /* CONFIG_USER_ONLY */
>
> +#define POWER9_BHRB_ENTRIES_LOG2 5
> static void init_proc_POWER9(CPUPPCState *env)
> {
> /* Common Registers */
> @@ -6315,6 +6341,8 @@ static void init_proc_POWER9(CPUPPCState *env)
> env->dcache_line_size = 128;
> env->icache_line_size = 128;
>
> + bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
> +
> /* Allocate hardware IRQ controller */
> init_excp_POWER9(env);
> ppcPOWER9_irq_init(env_archcpu(env));
> @@ -6434,7 +6462,8 @@ POWERPC_FAMILY(POWER9)(ObjectClass *oc, void *data)
> pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
> POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
> POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> - POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
> + POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV |
> + POWERPC_FLAG_BHRB;
> pcc->l1_dcache_size = 0x8000;
> pcc->l1_icache_size = 0x8000;
> }
> @@ -6458,6 +6487,7 @@ static struct ppc_radix_page_info
> POWER10_radix_page_info = {
> };
> #endif /* !CONFIG_USER_ONLY */
>
> +#define POWER10_BHRB_ENTRIES_LOG2 5
> static void init_proc_POWER10(CPUPPCState *env)
> {
> /* Common Registers */
> @@ -6505,6 +6535,8 @@ static void init_proc_POWER10(CPUPPCState *env)
> env->dcache_line_size = 128;
> env->icache_line_size = 128;
>
> + bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
> +
> /* Allocate hardware IRQ controller */
> init_excp_POWER10(env);
> ppcPOWER9_irq_init(env_archcpu(env));
> @@ -6610,7 +6642,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
> pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
> POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
> POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
> - POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV;
> + POWERPC_FLAG_VSX | POWERPC_FLAG_TM | POWERPC_FLAG_SCV |
> + POWERPC_FLAG_BHRB;
> pcc->l1_dcache_size = 0x8000;
> pcc->l1_icache_size = 0x8000;
> }
> @@ -7178,6 +7211,8 @@ static void ppc_cpu_reset_hold(Object *obj)
> }
> env->spr[i] = spr->default_value;
> }
> +
> + bhrb_reset_state(env);
> }
>
> #ifndef CONFIG_USER_ONLY
> diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
> index 4ff054063d..68b27196d9 100644
> --- a/target/ppc/helper_regs.c
> +++ b/target/ppc/helper_regs.c
> @@ -752,3 +752,35 @@ void register_usprgh_sprs(CPUPPCState *env)
> &spr_read_ureg, SPR_NOACCESS,
> 0x00000000);
> }
> +
> +void hreg_bhrb_filter_update(CPUPPCState *env)
> +{
> + target_long ifm;
> +
> + if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
> + /* disable recording to BHRB */
> + env->bhrb_filter = BHRB_TYPE_NORECORD;
> + return;
> + }
> +
> + ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> MMCRA_IFM_SHIFT;
> + switch (ifm) {
> + case 0:
> + /* record all branches */
> + env->bhrb_filter = -1;
> + break;
> + case 1:
> + /* only record calls (LK = 1) */
> + env->bhrb_filter = BHRB_TYPE_CALL;
> + break;
> + case 2:
> + /* only record indirect branches */
> + env->bhrb_filter = BHRB_TYPE_INDIRECT;
> + break;
> + case 3:
> + /* only record conditional branches */
> + env->bhrb_filter = BHRB_TYPE_COND;
> + break;
> + }
> +}
> +
> diff --git a/target/ppc/helper_regs.h b/target/ppc/helper_regs.h
> index 8196c1346d..ab6aba1c24 100644
> --- a/target/ppc/helper_regs.h
> +++ b/target/ppc/helper_regs.h
> @@ -25,6 +25,7 @@ void hreg_compute_hflags(CPUPPCState *env);
> void hreg_update_pmu_hflags(CPUPPCState *env);
> void cpu_interrupt_exittb(CPUState *cs);
> int hreg_store_msr(CPUPPCState *env, target_ulong value, int alter_hv);
> +void hreg_bhrb_filter_update(CPUPPCState *env);
>
> #ifdef CONFIG_USER_ONLY
> static inline void check_tlb_flush(CPUPPCState *env, bool global) { }
> diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
> index 6f5d4e1256..18149a15b2 100644
> --- a/target/ppc/power8-pmu.c
> +++ b/target/ppc/power8-pmu.c
> @@ -261,6 +261,7 @@ void helper_store_mmcr0(CPUPPCState *env, target_ulong
> value)
> env->spr[SPR_POWER_MMCR0] = value;
>
> pmu_mmcr01a_updated(env);
> + hreg_bhrb_filter_update(env);
>
> /* Update cycle overflow timers with the current MMCR0 state */
> pmu_update_overflow_timers(env);
> @@ -280,6 +281,7 @@ void helper_store_mmcrA(CPUPPCState *env, uint64_t value)
> env->spr[SPR_POWER_MMCRA] = value;
>
> pmu_mmcr01a_updated(env);
> + hreg_bhrb_filter_update(env);
> }
>
Can these just be moved into pmu_mmcr01a_updated? AFAIKS they only
depend on MMCR0/A.
> target_ulong helper_read_pmc(CPUPPCState *env, uint32_t sprn)
> diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
> index 87fa8c9334..a887094045 100644
> --- a/target/ppc/power8-pmu.h
> +++ b/target/ppc/power8-pmu.h
> @@ -17,6 +17,13 @@
>
> #define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
>
> +#define BHRB_TYPE_NORECORD 0x00
> +#define BHRB_TYPE_CALL 0x01
> +#define BHRB_TYPE_INDIRECT 0x02
> +#define BHRB_TYPE_COND 0x04
> +#define BHRB_TYPE_OTHER 0x08
> +#define BHRB_TYPE_XL_FORM 0x10
> +
> void cpu_ppc_pmu_init(CPUPPCState *env);
> void pmu_mmcr01a_updated(CPUPPCState *env);
> #else
> diff --git a/target/ppc/translate.c b/target/ppc/translate.c
> index d93fbd4574..7824475f54 100644
> --- a/target/ppc/translate.c
> +++ b/target/ppc/translate.c
> @@ -177,6 +177,7 @@ struct DisasContext {
> #if defined(TARGET_PPC64)
> bool sf_mode;
> bool has_cfar;
> + bool has_bhrb;
> #endif
> bool fpu_enabled;
> bool altivec_enabled;
> @@ -4104,12 +4105,100 @@ static void gen_rvwinkle(DisasContext *ctx)
> }
> #endif /* #if defined(TARGET_PPC64) */
>
> -static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
> +
> +static inline void gen_update_branch_history(DisasContext *ctx,
> + target_ulong nip,
> + TCGv target,
> + target_long inst_type)
> {
> #if defined(TARGET_PPC64)
> + TCGv t0;
> + TCGv t1;
> + TCGv t2;
> + TCGv t3;
> + TCGLabel *no_update;
> +
> if (ctx->has_cfar) {
> tcg_gen_movi_tl(cpu_cfar, nip);
> }
> +
> + if (!ctx->has_bhrb || inst_type == BHRB_TYPE_NORECORD) {
> + return;
> + }
> +
> + /* ISA 3.1 adds the PMCRA[BRHBRD] and problem state checks */
> + if ((ctx->insns_flags2 & PPC2_ISA310) && (ctx->mmcra_bhrbrd ||
> !ctx->pr)) {
> + return;
> + }
> +
> + /* Check for BHRB "frozen" conditions */
> + if (ctx->mmcr0_fcpc) {
> + if (ctx->mmcr0_fcp) {
> + if ((ctx->hv) && (ctx->pr)) {
> + return;
> + }
> + } else if (!(ctx->hv) && (ctx->pr)) {
> + return;
> + }
> + } else if ((ctx->mmcr0_fcp) && (ctx->pr)) {
> + return;
> + }
> +
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + t2 = tcg_temp_new();
> + t3 = tcg_temp_new();
> + no_update = gen_new_label();
> +
> + /* check for bhrb filtering */
> + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_filter));
> + tcg_gen_andi_tl(t0, t0, inst_type);
> + tcg_gen_brcondi_tl(TCG_COND_EQ, t0, 0, no_update);
> +
> + /* load bhrb base address into t2 */
> + tcg_gen_ld_tl(t2, cpu_env, offsetof(CPUPPCState, bhrb_base));
> +
> + /* load current bhrb_offset into t0 */
> + tcg_gen_ld_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
> +
> + /* load a BHRB offset mask into t3 */
> + tcg_gen_ld_tl(t3, cpu_env, offsetof(CPUPPCState, bhrb_offset_mask));
> +
> + /* add t2 to t0 to get address of bhrb entry */
> + tcg_gen_add_tl(t1, t0, t2);
> +
> + /* store nip into bhrb at bhrb_offset */
> + tcg_gen_st_i64(tcg_constant_i64(nip), (TCGv_ptr)t1, 0);
> +
> + /* add 8 to current bhrb_offset */
> + tcg_gen_addi_tl(t0, t0, 8);
> +
> + /* apply offset mask */
> + tcg_gen_and_tl(t0, t0, t3);
For some cases where the value remains live for a time and not reused,
maybe use a more descriptive name? I know existing code is pretty lazy
with names (and I share some blame). t0,t2,t3 could be base, offset,
mask respectively, and t1 could be tmp that is used for filter and
address generation. Just a suggestion?
> +
> + if (inst_type & BHRB_TYPE_XL_FORM) {
> + /* Also record the target address for XL-Form branches */
> +
> + /* add t2 to t0 to get address of bhrb entry */
> + tcg_gen_add_tl(t1, t0, t2);
> +
> + /* Set the 'T' bit for target entries */
> + tcg_gen_ori_tl(t2, target, 0x2);
> +
> + /* Store target address in bhrb */
> + tcg_gen_st_tl(t2, (TCGv_ptr)t1, 0);
> +
> + /* add 8 to current bhrb_offset */
> + tcg_gen_addi_tl(t0, t0, 8);
> +
> + /* apply offset mask */
> + tcg_gen_and_tl(t0, t0, t3);
> + }
I would say this is bordering on warranting a new function since it
mostly duplicates previous block. You could have it take base, mask,
offset and value to store, and have it return new offset.
> +
> + /* save updated bhrb_offset for next time */
> + tcg_gen_st_tl(t0, cpu_env, offsetof(CPUPPCState, bhrb_offset));
> +
> + gen_set_label(no_update);
> #endif
> }
It all looks pretty good otherwise. I do worry about POWER8/9 which
do not have BHRB disable bit. How much do they slow down I wonder?
Thanks,
Nick
- [PATCH 0/4] Add BHRB Facility Support, Glenn Miles, 2023/09/12
- [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions, Glenn Miles, 2023/09/12
- [PATCH 3/4] target/ppc: Add clrbhrb and mfbhrbe instructions, Glenn Miles, 2023/09/12
- [PATCH 4/4] target/ppc: Add migration support for BHRB, Glenn Miles, 2023/09/12
- [PATCH 4/4] target/ppc: Add migration support for BHRB, Glenn Miles, 2023/09/12