[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 20/23] accel/tcg: allow plugin instrumentation to be disa
From: |
Aaron Lindsay |
Subject: |
Re: [PATCH v3 20/23] accel/tcg: allow plugin instrumentation to be disable via cflags |
Date: |
Wed, 17 Feb 2021 11:30:39 -0500 |
On Feb 13 13:03, Alex Bennée wrote:
> When icount is enabled and we recompile an MMIO access we end up
> double counting the instruction execution. To avoid this we introduce
> the CF_MEMI cflag which only allows memory instrumentation for the
> next TB (which won't yet have been counted). As this is part of the
> hashed compile flags we will only execute the generated TB while
> coming out of a cpu_io_recompile.
>
> While we are at it delete the old TODO. We might as well keep the
> translation handy as it's likely you will repeatedly hit it on each
> MMIO access.
>
> Reported-by: Aaron Lindsay <aaron@os.amperecomputing.com>
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Message-Id: <20210210221053.18050-21-alex.bennee@linaro.org>
This resolves the issue for me - I'm now seeing one instruction callback
and one memory callback for both MMIO load and store instructions, as
expected.
Tested-by: Aaron Lindsay <aaron@os.amperecomputing.com>
Thanks!
-Aaron
>
> ---
> v3
> - s/CF_NOINSTR/CF_MEMI_ONY/
> - Limit instrumentation at API call sites instead of skipping altogether
> - clean-up commit log message
> ---
> include/exec/exec-all.h | 6 +++---
> include/exec/plugin-gen.h | 4 ++--
> include/qemu/plugin.h | 4 ++++
> accel/tcg/plugin-gen.c | 3 ++-
> accel/tcg/translate-all.c | 18 +++++++++---------
> accel/tcg/translator.c | 5 ++++-
> plugins/api.c | 36 +++++++++++++++++++++++++-----------
> 7 files changed, 49 insertions(+), 27 deletions(-)
>
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index e08179de34..77a2dc044d 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -454,14 +454,14 @@ struct TranslationBlock {
> uint32_t cflags; /* compile flags */
> #define CF_COUNT_MASK 0x00007fff
> #define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
> +#define CF_MEMI_ONLY 0x00010000 /* Only instrument memory ops */
> #define CF_USE_ICOUNT 0x00020000
> #define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */
> #define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
> #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
> #define CF_CLUSTER_SHIFT 24
> -/* cflags' mask for hashing/comparison */
> -#define CF_HASH_MASK \
> - (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL |
> CF_CLUSTER_MASK)
> +/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
> +#define CF_HASH_MASK (~CF_INVALID)
>
> /* Per-vCPU dynamic tracing state used to generate this TB */
> uint32_t trace_vcpu_dstate;
> diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
> index 4834a9e2f4..b1b72b5d90 100644
> --- a/include/exec/plugin-gen.h
> +++ b/include/exec/plugin-gen.h
> @@ -19,7 +19,7 @@ struct DisasContextBase;
>
> #ifdef CONFIG_PLUGIN
>
> -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb);
> +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool
> supress);
> void plugin_gen_tb_end(CPUState *cpu);
> void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
> void plugin_gen_insn_end(void);
> @@ -41,7 +41,7 @@ static inline void plugin_insn_append(const void *from,
> size_t size)
> #else /* !CONFIG_PLUGIN */
>
> static inline
> -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
> +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool
> supress)
> {
> return false;
> }
> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
> index 841deed79c..c5a79a89f0 100644
> --- a/include/qemu/plugin.h
> +++ b/include/qemu/plugin.h
> @@ -92,6 +92,7 @@ struct qemu_plugin_dyn_cb {
> };
> };
>
> +/* Internal context for instrumenting an instruction */
> struct qemu_plugin_insn {
> GByteArray *data;
> uint64_t vaddr;
> @@ -99,6 +100,7 @@ struct qemu_plugin_insn {
> GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES];
> bool calls_helpers;
> bool mem_helper;
> + bool mem_only;
> };
>
> /*
> @@ -128,6 +130,7 @@ static inline struct qemu_plugin_insn
> *qemu_plugin_insn_alloc(void)
> return insn;
> }
>
> +/* Internal context for this TranslationBlock */
> struct qemu_plugin_tb {
> GPtrArray *insns;
> size_t n;
> @@ -135,6 +138,7 @@ struct qemu_plugin_tb {
> uint64_t vaddr2;
> void *haddr1;
> void *haddr2;
> + bool mem_only;
> GArray *cbs[PLUGIN_N_CB_SUBTYPES];
> };
>
> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
> index 8a1bb801e0..c3dc3effe7 100644
> --- a/accel/tcg/plugin-gen.c
> +++ b/accel/tcg/plugin-gen.c
> @@ -842,7 +842,7 @@ static void plugin_gen_inject(const struct qemu_plugin_tb
> *plugin_tb)
> pr_ops();
> }
>
> -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
> +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool
> mem_only)
> {
> struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
> bool ret = false;
> @@ -855,6 +855,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const
> TranslationBlock *tb)
> ptb->vaddr2 = -1;
> get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
> ptb->haddr2 = NULL;
> + ptb->mem_only = mem_only;
>
> plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
> }
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 0666f9ef14..fdf88dc1c3 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -2399,7 +2399,8 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t
> retaddr)
> }
>
> #ifndef CONFIG_USER_ONLY
> -/* in deterministic execution mode, instructions doing device I/Os
> +/*
> + * In deterministic execution mode, instructions doing device I/Os
> * must be at the end of the TB.
> *
> * Called by softmmu_template.h, with iothread mutex not held.
> @@ -2430,19 +2431,18 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t
> retaddr)
> n = 2;
> }
>
> - /* Generate a new TB executing the I/O insn. */
> - cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
> + /*
> + * Exit the loop and potentially generate a new TB executing the
> + * just the I/O insns. We also limit instrumentation to memory
> + * operations only (which execute after completion) so we don't
> + * double instrument the instruction.
> + */
> + cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
>
> qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
> "cpu_io_recompile: rewound execution of TB to "
> TARGET_FMT_lx "\n", tb->pc);
>
> - /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
> - * the first in the TB) then we end up generating a whole new TB and
> - * repeating the fault, which is horribly inefficient.
> - * Better would be to execute just this insn uncached, or generate a
> - * second new TB.
> - */
> cpu_loop_exit_noexc(cpu);
> }
>
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index a49a794065..2dfc27102f 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -58,7 +58,8 @@ void translator_loop(const TranslatorOps *ops,
> DisasContextBase *db,
> ops->tb_start(db, cpu);
> tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */
>
> - plugin_enabled = plugin_gen_tb_start(cpu, tb);
> + plugin_enabled = plugin_gen_tb_start(cpu, tb,
> + tb_cflags(db->tb) & CF_MEMI_ONLY);
>
> while (true) {
> db->num_insns++;
> @@ -100,6 +101,8 @@ void translator_loop(const TranslatorOps *ops,
> DisasContextBase *db,
> gen_io_start();
> ops->translate_insn(db, cpu);
> } else {
> + /* we should only see CF_MEMI_ONLY for io_recompile */
> + tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY));
> ops->translate_insn(db, cpu);
> }
>
> diff --git a/plugins/api.c b/plugins/api.c
> index 5dc8e6f934..0b04380d57 100644
> --- a/plugins/api.c
> +++ b/plugins/api.c
> @@ -84,15 +84,19 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct
> qemu_plugin_tb *tb,
> enum qemu_plugin_cb_flags flags,
> void *udata)
> {
> - plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
> - cb, flags, udata);
> + if (!tb->mem_only) {
> + plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
> + cb, flags, udata);
> + }
> }
>
> void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
> enum qemu_plugin_op op,
> void *ptr, uint64_t imm)
> {
> - plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
> + if (!tb->mem_only) {
> + plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr,
> imm);
> + }
> }
>
> void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
> @@ -100,20 +104,27 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct
> qemu_plugin_insn *insn,
> enum qemu_plugin_cb_flags flags,
> void *udata)
> {
> -
> plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
> - cb, flags, udata);
> + if (!insn->mem_only) {
> +
> plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
> + cb, flags, udata);
> + }
> }
>
> void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn
> *insn,
> enum qemu_plugin_op op,
> void *ptr, uint64_t imm)
> {
> - plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
> - 0, op, ptr, imm);
> + if (!insn->mem_only) {
> +
> plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
> + 0, op, ptr, imm);
> + }
> }
>
>
> -
> +/*
> + * We always plant memory instrumentation because they don't finalise until
> + * after the operation has complete.
> + */
> void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
> qemu_plugin_vcpu_mem_cb_t cb,
> enum qemu_plugin_cb_flags flags,
> @@ -121,7 +132,7 @@ void qemu_plugin_register_vcpu_mem_cb(struct
> qemu_plugin_insn *insn,
> void *udata)
> {
> plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR],
> - cb, flags, rw, udata);
> + cb, flags, rw, udata);
> }
>
> void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
> @@ -130,7 +141,7 @@ void qemu_plugin_register_vcpu_mem_inline(struct
> qemu_plugin_insn *insn,
> uint64_t imm)
> {
> plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
> - rw, op, ptr, imm);
> + rw, op, ptr, imm);
> }
>
> void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
> @@ -181,10 +192,13 @@ uint64_t qemu_plugin_tb_vaddr(const struct
> qemu_plugin_tb *tb)
> struct qemu_plugin_insn *
> qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx)
> {
> + struct qemu_plugin_insn *insn;
> if (unlikely(idx >= tb->n)) {
> return NULL;
> }
> - return g_ptr_array_index(tb->insns, idx);
> + insn = g_ptr_array_index(tb->insns, idx);
> + insn->mem_only = tb->mem_only;
> + return insn;
> }
>
> /*
> --
> 2.20.1
>
- [PATCH v3 12/23] target/mips: Create mips_io_recompile_replay_branch, (continued)
- [PATCH v3 12/23] target/mips: Create mips_io_recompile_replay_branch, Alex Bennée, 2021/02/13
- [PATCH v3 08/23] contrib: Open brace '{' following struct go on the same line, Alex Bennée, 2021/02/13
- [PATCH v3 06/23] contrib: Add spaces around operator, Alex Bennée, 2021/02/13
- [PATCH v3 09/23] accel/tcg/plugin-gen: fix the call signature for inline callbacks, Alex Bennée, 2021/02/13
- [PATCH v3 05/23] contrib: Fix some code style problems, ERROR: "foo * bar" should be "foo *bar", Alex Bennée, 2021/02/13
- [PATCH v3 07/23] contrib: space required after that ',', Alex Bennée, 2021/02/13
- [PATCH v3 18/23] accel/tcg: re-factor non-RAM execution code, Alex Bennée, 2021/02/13
- [PATCH v3 14/23] tests/plugin: expand insn test to detect duplicate instructions, Alex Bennée, 2021/02/13
- [PATCH v3 20/23] accel/tcg: allow plugin instrumentation to be disable via cflags, Alex Bennée, 2021/02/13
- Re: [PATCH v3 20/23] accel/tcg: allow plugin instrumentation to be disable via cflags,
Aaron Lindsay <=
- [PATCH v3 11/23] accel/tcg: Create io_recompile_replay_branch hook, Alex Bennée, 2021/02/13
- [PATCH v3 19/23] accel/tcg: remove CF_NOCACHE and special cases, Alex Bennée, 2021/02/13
- [PATCH v3 13/23] target/sh4: Create superh_io_recompile_replay_branch, Alex Bennée, 2021/02/13
- [PATCH v3 22/23] tests/plugin: allow memory plugin to do both inline and callbacks, Alex Bennée, 2021/02/13
- [PATCH v3 21/23] tests/acceptance: add a new tests to detect counting errors, Alex Bennée, 2021/02/13
- [PATCH v3 23/23] tests/acceptance: add a memory callback check, Alex Bennée, 2021/02/13
- [PATCH v3 17/23] accel/tcg: cache single instruction TB on pending replay exception, Alex Bennée, 2021/02/13