[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 23/72] This commit continues adding support for the Branch History
|
From: |
Nicholas Piggin |
|
Subject: |
[PULL 23/72] This commit continues adding support for the Branch History Rolling Buffer (BHRB) as is provided starting with the P8 processor and continuing with its successors. This commit is limited to the recording and filtering of taken branches. |
|
Date: |
Fri, 24 May 2024 09:06:56 +1000 |
From: Glenn Miles <milesg@linux.vnet.ibm.com>
The following changes were made:
- Enabled functionality on P10 processors only due to
performance impact seen with P8 and P9 where it is not
disabled for non problem state branches.
- Added a BHRB buffer for storing branch instruction and
target addresses for taken branches
- Renamed gen_update_cfar to gen_update_branch_history and
added a 'target' parameter to hold the branch target
address and 'inst_type' parameter to use for filtering
- Added TCG code to gen_update_branch_history that stores
data to the BHRB and updates the BHRB offset.
- Added BHRB resource initialization and reset functions
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Glenn Miles <milesg@linux.vnet.ibm.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
target/ppc/cpu.h | 17 +++++
target/ppc/cpu_init.c | 37 +++++++++-
target/ppc/power8-pmu.c | 33 +++++++++
target/ppc/power8-pmu.h | 7 ++
target/ppc/translate.c | 97 ++++++++++++++++++++++++--
target/ppc/translate/branch-impl.c.inc | 2 +-
6 files changed, 185 insertions(+), 8 deletions(-)
diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 195d4be2b7..2f91d7dc33 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -548,6 +548,8 @@ FIELD(MSR, LE, MSR_LE, 1)
MMCR2_FC4P0 | MMCR2_FC5P0 | MMCR2_FC6P0)
#define MMCRA_BHRBRD PPC_BIT(26) /* BHRB Recording Disable */
+#define MMCRA_IFM_MASK PPC_BITMASK(32, 33) /* BHRB Instruction Filtering */
+#define MMCRA_IFM_SHIFT PPC_BIT_NR(33)
#define MMCR1_EVT_SIZE 8
/* extract64() does a right shift before extracting */
@@ -774,6 +776,8 @@ enum {
POWERPC_FLAG_SMT = 0x00400000,
/* Using "LPAR per core" mode (as opposed to per-thread) */
POWERPC_FLAG_SMT_1LPAR = 0x00800000,
+ /* Has BHRB */
+ POWERPC_FLAG_BHRB = 0x01000000,
};
/*
@@ -1215,6 +1219,9 @@ struct pnv_tod_tbst {
#define PPC_CPU_OPCODES_LEN 0x40
#define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
+#define BHRB_MAX_NUM_ENTRIES_LOG2 (5)
+#define BHRB_MAX_NUM_ENTRIES (1 << BHRB_MAX_NUM_ENTRIES_LOG2)
+
struct CPUArchState {
/* Most commonly used resources during translated code execution first */
target_ulong gpr[32]; /* general purpose registers */
@@ -1311,6 +1318,16 @@ struct CPUArchState {
int dcache_line_size;
int icache_line_size;
+#ifdef TARGET_PPC64
+ /* Branch History Rolling Buffer (BHRB) resources */
+ target_ulong bhrb_num_entries;
+ intptr_t bhrb_base;
+ target_ulong bhrb_filter;
+ target_ulong bhrb_offset;
+ target_ulong bhrb_offset_mask;
+ uint64_t bhrb[BHRB_MAX_NUM_ENTRIES];
+#endif
+
/* These resources are used during exception processing */
/* CPU model definition */
target_ulong msr_mask;
diff --git a/target/ppc/cpu_init.c b/target/ppc/cpu_init.c
index 366ea8568b..1ec84b5ddc 100644
--- a/target/ppc/cpu_init.c
+++ b/target/ppc/cpu_init.c
@@ -6142,6 +6142,28 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
pcc->l1_icache_size = 0x8000;
}
+static void bhrb_init_state(CPUPPCState *env, target_long num_entries_log2)
+{
+ if (env->flags & POWERPC_FLAG_BHRB) {
+ if (num_entries_log2 > BHRB_MAX_NUM_ENTRIES_LOG2) {
+ num_entries_log2 = BHRB_MAX_NUM_ENTRIES_LOG2;
+ }
+ env->bhrb_num_entries = 1 << num_entries_log2;
+ env->bhrb_base = (intptr_t)&env->bhrb[0];
+ env->bhrb_offset_mask = (env->bhrb_num_entries * sizeof(uint64_t)) - 1;
+ }
+}
+
+static void bhrb_reset_state(CPUPPCState *env)
+{
+ if (env->flags & POWERPC_FLAG_BHRB) {
+ env->bhrb_offset = 0;
+ env->bhrb_filter = 0;
+ memset(env->bhrb, 0, sizeof(env->bhrb));
+ }
+}
+
+#define POWER8_BHRB_ENTRIES_LOG2 5
static void init_proc_POWER8(CPUPPCState *env)
{
/* Common Registers */
@@ -6183,6 +6205,8 @@ static void init_proc_POWER8(CPUPPCState *env)
env->dcache_line_size = 128;
env->icache_line_size = 128;
+ bhrb_init_state(env, POWER8_BHRB_ENTRIES_LOG2);
+
/* Allocate hardware IRQ controller */
init_excp_POWER8(env);
ppcPOWER7_irq_init(env_archcpu(env));
@@ -6307,6 +6331,7 @@ static struct ppc_radix_page_info POWER9_radix_page_info
= {
};
#endif /* CONFIG_USER_ONLY */
+#define POWER9_BHRB_ENTRIES_LOG2 5
static void init_proc_POWER9(CPUPPCState *env)
{
/* Common Registers */
@@ -6357,6 +6382,8 @@ static void init_proc_POWER9(CPUPPCState *env)
env->dcache_line_size = 128;
env->icache_line_size = 128;
+ bhrb_init_state(env, POWER9_BHRB_ENTRIES_LOG2);
+
/* Allocate hardware IRQ controller */
init_excp_POWER9(env);
ppcPOWER9_irq_init(env_archcpu(env));
@@ -6497,6 +6524,7 @@ static struct ppc_radix_page_info POWER10_radix_page_info
= {
};
#endif /* !CONFIG_USER_ONLY */
+#define POWER10_BHRB_ENTRIES_LOG2 5
static void init_proc_POWER10(CPUPPCState *env)
{
/* Common Registers */
@@ -6546,6 +6574,8 @@ static void init_proc_POWER10(CPUPPCState *env)
env->dcache_line_size = 128;
env->icache_line_size = 128;
+ bhrb_init_state(env, POWER10_BHRB_ENTRIES_LOG2);
+
/* Allocate hardware IRQ controller */
init_excp_POWER10(env);
ppcPOWER9_irq_init(env_archcpu(env));
@@ -6650,7 +6680,8 @@ POWERPC_FAMILY(POWER10)(ObjectClass *oc, void *data)
pcc->flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
- POWERPC_FLAG_VSX | POWERPC_FLAG_SCV;
+ POWERPC_FLAG_VSX | POWERPC_FLAG_SCV |
+ POWERPC_FLAG_BHRB;
pcc->l1_dcache_size = 0x8000;
pcc->l1_icache_size = 0x8000;
}
@@ -7222,6 +7253,10 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType
type)
}
env->spr[i] = spr->default_value;
}
+
+#if defined(TARGET_PPC64)
+ bhrb_reset_state(env);
+#endif
}
#ifndef CONFIG_USER_ONLY
diff --git a/target/ppc/power8-pmu.c b/target/ppc/power8-pmu.c
index 6f5d4e1256..db9ee8e96b 100644
--- a/target/ppc/power8-pmu.c
+++ b/target/ppc/power8-pmu.c
@@ -82,6 +82,37 @@ static void pmu_update_summaries(CPUPPCState *env)
env->pmc_cyc_cnt = cyc_cnt;
}
+static void hreg_bhrb_filter_update(CPUPPCState *env)
+{
+ target_long ifm;
+
+ if (!(env->spr[SPR_POWER_MMCR0] & MMCR0_PMAE)) {
+ /* disable recording to BHRB */
+ env->bhrb_filter = BHRB_TYPE_NORECORD;
+ return;
+ }
+
+ ifm = (env->spr[SPR_POWER_MMCRA] & MMCRA_IFM_MASK) >> MMCRA_IFM_SHIFT;
+ switch (ifm) {
+ case 0:
+ /* record all branches */
+ env->bhrb_filter = -1;
+ break;
+ case 1:
+ /* only record calls (LK = 1) */
+ env->bhrb_filter = BHRB_TYPE_CALL;
+ break;
+ case 2:
+ /* only record indirect branches */
+ env->bhrb_filter = BHRB_TYPE_INDIRECT;
+ break;
+ case 3:
+ /* only record conditional branches */
+ env->bhrb_filter = BHRB_TYPE_COND;
+ break;
+ }
+}
+
void pmu_mmcr01a_updated(CPUPPCState *env)
{
PowerPCCPU *cpu = env_archcpu(env);
@@ -95,6 +126,8 @@ void pmu_mmcr01a_updated(CPUPPCState *env)
ppc_set_irq(cpu, PPC_INTERRUPT_PERFM, 0);
}
+ hreg_bhrb_filter_update(env);
+
/*
* Should this update overflow timers (if mmcr0 is updated) so they
* get set in cpu_post_load?
diff --git a/target/ppc/power8-pmu.h b/target/ppc/power8-pmu.h
index 87fa8c9334..3f79cfc45b 100644
--- a/target/ppc/power8-pmu.h
+++ b/target/ppc/power8-pmu.h
@@ -13,6 +13,13 @@
#ifndef POWER8_PMU_H
#define POWER8_PMU_H
+#define BHRB_TYPE_NORECORD 0x00
+#define BHRB_TYPE_CALL 0x01
+#define BHRB_TYPE_INDIRECT 0x02
+#define BHRB_TYPE_COND 0x04
+#define BHRB_TYPE_OTHER 0x08
+#define BHRB_TYPE_XL_FORM 0x10
+
#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
#define PMC_COUNTER_NEGATIVE_VAL 0x80000000UL
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index a85f596d65..8aa2439700 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -180,6 +180,7 @@ struct DisasContext {
#if defined(TARGET_PPC64)
bool sf_mode;
bool has_cfar;
+ bool has_bhrb;
#endif
bool fpu_enabled;
bool altivec_enabled;
@@ -3371,14 +3372,85 @@ static void gen_rvwinkle(DisasContext *ctx)
gen_exception_nip(ctx, EXCP_HLT, ctx->base.pc_next);
#endif /* defined(CONFIG_USER_ONLY) */
}
+
+static inline TCGv gen_write_bhrb(TCGv_ptr base, TCGv offset, TCGv mask, TCGv
value)
+{
+ TCGv_ptr tmp = tcg_temp_new_ptr();
+
+ /* add base and offset to get address of bhrb entry */
+ tcg_gen_add_ptr(tmp, base, (TCGv_ptr)offset);
+
+ /* store value into bhrb at bhrb_offset */
+ tcg_gen_st_i64(value, tmp, 0);
+
+ /* add 8 to current bhrb_offset */
+ tcg_gen_addi_tl(offset, offset, 8);
+
+ /* apply offset mask */
+ tcg_gen_and_tl(offset, offset, mask);
+
+ return offset;
+}
#endif /* #if defined(TARGET_PPC64) */
-static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
+static inline void gen_update_branch_history(DisasContext *ctx,
+ target_ulong nip,
+ TCGv target,
+ target_long inst_type)
{
#if defined(TARGET_PPC64)
+ TCGv_ptr base;
+ TCGv tmp;
+ TCGv offset;
+ TCGv mask;
+ TCGLabel *no_update;
+
if (ctx->has_cfar) {
tcg_gen_movi_tl(cpu_cfar, nip);
}
+
+ if (!ctx->has_bhrb ||
+ !ctx->bhrb_enable ||
+ inst_type == BHRB_TYPE_NORECORD) {
+ return;
+ }
+
+ tmp = tcg_temp_new();
+ no_update = gen_new_label();
+
+ /* check for bhrb filtering */
+ tcg_gen_ld_tl(tmp, tcg_env, offsetof(CPUPPCState, bhrb_filter));
+ tcg_gen_andi_tl(tmp, tmp, inst_type);
+ tcg_gen_brcondi_tl(TCG_COND_EQ, tmp, 0, no_update);
+
+ base = tcg_temp_new_ptr();
+ offset = tcg_temp_new();
+ mask = tcg_temp_new();
+
+ /* load bhrb base address */
+ tcg_gen_ld_ptr(base, tcg_env, offsetof(CPUPPCState, bhrb_base));
+
+ /* load current bhrb_offset */
+ tcg_gen_ld_tl(offset, tcg_env, offsetof(CPUPPCState, bhrb_offset));
+
+ /* load a BHRB offset mask */
+ tcg_gen_ld_tl(mask, tcg_env, offsetof(CPUPPCState, bhrb_offset_mask));
+
+ offset = gen_write_bhrb(base, offset, mask, tcg_constant_i64(nip));
+
+ /* Also record the target address for XL-Form branches */
+ if (inst_type & BHRB_TYPE_XL_FORM) {
+
+ /* Set the 'T' bit for target entries */
+ tcg_gen_ori_tl(tmp, target, 0x2);
+
+ offset = gen_write_bhrb(base, offset, mask, tmp);
+ }
+
+ /* save updated bhrb_offset for next time */
+ tcg_gen_st_tl(offset, tcg_env, offsetof(CPUPPCState, bhrb_offset));
+
+ gen_set_label(no_update);
#endif
}
@@ -3508,8 +3580,10 @@ static void gen_b(DisasContext *ctx)
}
if (LK(ctx->opcode)) {
gen_setlr(ctx, ctx->base.pc_next);
+ gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_CALL);
+ } else {
+ gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_OTHER);
}
- gen_update_cfar(ctx, ctx->cia);
gen_goto_tb(ctx, 0, target);
ctx->base.is_jmp = DISAS_NORETURN;
}
@@ -3524,6 +3598,7 @@ static void gen_bcond(DisasContext *ctx, int type)
uint32_t bo = BO(ctx->opcode);
TCGLabel *l1;
TCGv target;
+ target_long bhrb_type = BHRB_TYPE_OTHER;
if (type == BCOND_LR || type == BCOND_CTR || type == BCOND_TAR) {
target = tcg_temp_new();
@@ -3534,11 +3609,16 @@ static void gen_bcond(DisasContext *ctx, int type)
} else {
tcg_gen_mov_tl(target, cpu_lr);
}
+ if (!LK(ctx->opcode)) {
+ bhrb_type |= BHRB_TYPE_INDIRECT;
+ }
+ bhrb_type |= BHRB_TYPE_XL_FORM;
} else {
target = NULL;
}
if (LK(ctx->opcode)) {
gen_setlr(ctx, ctx->base.pc_next);
+ bhrb_type |= BHRB_TYPE_CALL;
}
l1 = gen_new_label();
if ((bo & 0x4) == 0) {
@@ -3589,6 +3669,7 @@ static void gen_bcond(DisasContext *ctx, int type)
tcg_gen_brcondi_tl(TCG_COND_EQ, temp, 0, l1);
}
}
+ bhrb_type |= BHRB_TYPE_COND;
}
if ((bo & 0x10) == 0) {
/* Test CR */
@@ -3603,8 +3684,11 @@ static void gen_bcond(DisasContext *ctx, int type)
tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
}
+ bhrb_type |= BHRB_TYPE_COND;
}
- gen_update_cfar(ctx, ctx->cia);
+
+ gen_update_branch_history(ctx, ctx->cia, target, bhrb_type);
+
if (type == BCOND_IM) {
target_ulong li = (target_long)((int16_t)(BD(ctx->opcode)));
if (likely(AA(ctx->opcode) == 0)) {
@@ -3720,7 +3804,7 @@ static void gen_rfi(DisasContext *ctx)
/* Restore CPU state */
CHK_SV(ctx);
translator_io_start(&ctx->base);
- gen_update_cfar(ctx, ctx->cia);
+ gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfi(tcg_env);
ctx->base.is_jmp = DISAS_EXIT;
#endif
@@ -3735,7 +3819,7 @@ static void gen_rfid(DisasContext *ctx)
/* Restore CPU state */
CHK_SV(ctx);
translator_io_start(&ctx->base);
- gen_update_cfar(ctx, ctx->cia);
+ gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfid(tcg_env);
ctx->base.is_jmp = DISAS_EXIT;
#endif
@@ -3750,7 +3834,7 @@ static void gen_rfscv(DisasContext *ctx)
/* Restore CPU state */
CHK_SV(ctx);
translator_io_start(&ctx->base);
- gen_update_cfar(ctx, ctx->cia);
+ gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfscv(tcg_env);
ctx->base.is_jmp = DISAS_EXIT;
#endif
@@ -6330,6 +6414,7 @@ static void ppc_tr_init_disas_context(DisasContextBase
*dcbase, CPUState *cs)
#if defined(TARGET_PPC64)
ctx->sf_mode = (hflags >> HFLAGS_64) & 1;
ctx->has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
+ ctx->has_bhrb = !!(env->flags & POWERPC_FLAG_BHRB);
#endif
ctx->lazy_tlb_flush = env->mmu_model == POWERPC_MMU_32B
|| env->mmu_model & POWERPC_MMU_64;
diff --git a/target/ppc/translate/branch-impl.c.inc
b/target/ppc/translate/branch-impl.c.inc
index fb0fcf30cc..9ade0c659a 100644
--- a/target/ppc/translate/branch-impl.c.inc
+++ b/target/ppc/translate/branch-impl.c.inc
@@ -17,7 +17,7 @@ static bool trans_RFEBB(DisasContext *ctx, arg_XL_s *arg)
REQUIRE_INSNS_FLAGS2(ctx, ISA207S);
translator_io_start(&ctx->base);
- gen_update_cfar(ctx, ctx->cia);
+ gen_update_branch_history(ctx, ctx->cia, NULL, BHRB_TYPE_NORECORD);
gen_helper_rfebb(tcg_env, cpu_gpr[arg->s]);
ctx->base.is_jmp = DISAS_CHAIN;
--
2.43.0
- [PULL 16/72] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree., (continued)
- [PULL 16/72] target/ppc: Move div/mod fixed-point insns (64 bits operands) to decodetree., Nicholas Piggin, 2024/05/23
- [PULL 18/72] target/ppc: Move logical fixed-point instructions to decodetree., Nicholas Piggin, 2024/05/23
- [PULL 17/72] target/ppc: Move cmp{rb, eqb}, tw[i], td[i], isel instructions to decodetree., Nicholas Piggin, 2024/05/23
- [PULL 20/72] target/ppc: Move VMX integer logical instructions to decodetree., Nicholas Piggin, 2024/05/23
- [PULL 21/72] target/ppc: Move VMX integer max/min instructions to decodetree., Nicholas Piggin, 2024/05/23
- [PULL 19/72] target/ppc: Move VMX storage access instructions to decodetree, Nicholas Piggin, 2024/05/23
- [PULL 24/72] Add support for the clrbhrb and mfbhrbe instructions., Nicholas Piggin, 2024/05/23
- [PULL 26/72] target/ppc: larx/stcx generation need only apply DEF_MEMOP() once, Nicholas Piggin, 2024/05/23
- [PULL 22/72] This commit is preparatory to the addition of Branch History Rolling Buffer (BHRB) functionality, which is being provided today starting with the P8 processor., Nicholas Piggin, 2024/05/23
- [PULL 28/72] target/ppc: Make checkstop actually stop the system, Nicholas Piggin, 2024/05/23
- [PULL 23/72] This commit continues adding support for the Branch History Rolling Buffer (BHRB) as is provided starting with the P8 processor and continuing with its successors. This commit is limited to the recording and filtering of taken branches.,
Nicholas Piggin <=
- [PULL 29/72] target/ppc: improve checkstop logging, Nicholas Piggin, 2024/05/23
- [PULL 27/72] target/ppc: Remove redundant MEMOP_GET_SIZE macro, Nicholas Piggin, 2024/05/23
- [PULL 32/72] target/ppc: Add PPR32 SPR, Nicholas Piggin, 2024/05/23
- [PULL 33/72] target/ppc: add helper to write per-LPAR SPRs, Nicholas Piggin, 2024/05/23
- [PULL 30/72] target/ppc: Implement attn instruction on BookS 64-bit processors, Nicholas Piggin, 2024/05/23
- [PULL 35/72] target/ppc: Add SMT support to PTCR SPR, Nicholas Piggin, 2024/05/23
- [PULL 34/72] target/ppc: Add SMT support to simple SPRs, Nicholas Piggin, 2024/05/23
- [PULL 25/72] Adds migration support for Branch History Rolling Buffer (BHRB) internal state., Nicholas Piggin, 2024/05/23
- [PULL 42/72] target/ppc/mmu_common.c: Remove unneeded local variable, Nicholas Piggin, 2024/05/23