qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 20/23] accel/tcg: allow plugin instrumentation to be disa


From: Aaron Lindsay
Subject: Re: [PATCH v3 20/23] accel/tcg: allow plugin instrumentation to be disable via cflags
Date: Wed, 17 Feb 2021 11:30:39 -0500

On Feb 13 13:03, Alex Bennée wrote:
> When icount is enabled and we recompile an MMIO access we end up
> double counting the instruction execution. To avoid this we introduce
> the CF_MEMI cflag which only allows memory instrumentation for the
> next TB (which won't yet have been counted). As this is part of the
> hashed compile flags we will only execute the generated TB while
> coming out of a cpu_io_recompile.
> 
> While we are at it delete the old TODO. We might as well keep the
> translation handy as it's likely you will repeatedly hit it on each
> MMIO access.
> 
> Reported-by: Aaron Lindsay <aaron@os.amperecomputing.com>
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> Message-Id: <20210210221053.18050-21-alex.bennee@linaro.org>

This resolves the issue for me - I'm now seeing one instruction callback
and one memory callback for both MMIO load and store instructions, as
expected.

Tested-by: Aaron Lindsay <aaron@os.amperecomputing.com>

Thanks!

-Aaron

> 
> ---
> v3
>   - s/CF_NOINSTR/CF_MEMI_ONY/
>   - Limit instrumentation at API call sites instead of skipping altogether
>   - clean-up commit log message
> ---
>  include/exec/exec-all.h   |  6 +++---
>  include/exec/plugin-gen.h |  4 ++--
>  include/qemu/plugin.h     |  4 ++++
>  accel/tcg/plugin-gen.c    |  3 ++-
>  accel/tcg/translate-all.c | 18 +++++++++---------
>  accel/tcg/translator.c    |  5 ++++-
>  plugins/api.c             | 36 +++++++++++++++++++++++++-----------
>  7 files changed, 49 insertions(+), 27 deletions(-)
> 
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index e08179de34..77a2dc044d 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -454,14 +454,14 @@ struct TranslationBlock {
>      uint32_t cflags;    /* compile flags */
>  #define CF_COUNT_MASK  0x00007fff
>  #define CF_LAST_IO     0x00008000 /* Last insn may be an IO access.  */
> +#define CF_MEMI_ONLY   0x00010000 /* Only instrument memory ops */
>  #define CF_USE_ICOUNT  0x00020000
>  #define CF_INVALID     0x00040000 /* TB is stale. Set with @jmp_lock held */
>  #define CF_PARALLEL    0x00080000 /* Generate code for a parallel context */
>  #define CF_CLUSTER_MASK 0xff000000 /* Top 8 bits are cluster ID */
>  #define CF_CLUSTER_SHIFT 24
> -/* cflags' mask for hashing/comparison */
> -#define CF_HASH_MASK   \
> -    (CF_COUNT_MASK | CF_LAST_IO | CF_USE_ICOUNT | CF_PARALLEL | 
> CF_CLUSTER_MASK)
> +/* cflags' mask for hashing/comparison, basically ignore CF_INVALID */
> +#define CF_HASH_MASK   (~CF_INVALID)
>  
>      /* Per-vCPU dynamic tracing state used to generate this TB */
>      uint32_t trace_vcpu_dstate;
> diff --git a/include/exec/plugin-gen.h b/include/exec/plugin-gen.h
> index 4834a9e2f4..b1b72b5d90 100644
> --- a/include/exec/plugin-gen.h
> +++ b/include/exec/plugin-gen.h
> @@ -19,7 +19,7 @@ struct DisasContextBase;
>  
>  #ifdef CONFIG_PLUGIN
>  
> -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb);
> +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool 
> supress);
>  void plugin_gen_tb_end(CPUState *cpu);
>  void plugin_gen_insn_start(CPUState *cpu, const struct DisasContextBase *db);
>  void plugin_gen_insn_end(void);
> @@ -41,7 +41,7 @@ static inline void plugin_insn_append(const void *from, 
> size_t size)
>  #else /* !CONFIG_PLUGIN */
>  
>  static inline
> -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
> +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool 
> supress)
>  {
>      return false;
>  }
> diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
> index 841deed79c..c5a79a89f0 100644
> --- a/include/qemu/plugin.h
> +++ b/include/qemu/plugin.h
> @@ -92,6 +92,7 @@ struct qemu_plugin_dyn_cb {
>      };
>  };
>  
> +/* Internal context for instrumenting an instruction */
>  struct qemu_plugin_insn {
>      GByteArray *data;
>      uint64_t vaddr;
> @@ -99,6 +100,7 @@ struct qemu_plugin_insn {
>      GArray *cbs[PLUGIN_N_CB_TYPES][PLUGIN_N_CB_SUBTYPES];
>      bool calls_helpers;
>      bool mem_helper;
> +    bool mem_only;
>  };
>  
>  /*
> @@ -128,6 +130,7 @@ static inline struct qemu_plugin_insn 
> *qemu_plugin_insn_alloc(void)
>      return insn;
>  }
>  
> +/* Internal context for this TranslationBlock */
>  struct qemu_plugin_tb {
>      GPtrArray *insns;
>      size_t n;
> @@ -135,6 +138,7 @@ struct qemu_plugin_tb {
>      uint64_t vaddr2;
>      void *haddr1;
>      void *haddr2;
> +    bool mem_only;
>      GArray *cbs[PLUGIN_N_CB_SUBTYPES];
>  };
>  
> diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
> index 8a1bb801e0..c3dc3effe7 100644
> --- a/accel/tcg/plugin-gen.c
> +++ b/accel/tcg/plugin-gen.c
> @@ -842,7 +842,7 @@ static void plugin_gen_inject(const struct qemu_plugin_tb 
> *plugin_tb)
>      pr_ops();
>  }
>  
> -bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb)
> +bool plugin_gen_tb_start(CPUState *cpu, const TranslationBlock *tb, bool 
> mem_only)
>  {
>      struct qemu_plugin_tb *ptb = tcg_ctx->plugin_tb;
>      bool ret = false;
> @@ -855,6 +855,7 @@ bool plugin_gen_tb_start(CPUState *cpu, const 
> TranslationBlock *tb)
>          ptb->vaddr2 = -1;
>          get_page_addr_code_hostp(cpu->env_ptr, tb->pc, &ptb->haddr1);
>          ptb->haddr2 = NULL;
> +        ptb->mem_only = mem_only;
>  
>          plugin_gen_empty_callback(PLUGIN_GEN_FROM_TB);
>      }
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 0666f9ef14..fdf88dc1c3 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -2399,7 +2399,8 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t 
> retaddr)
>  }
>  
>  #ifndef CONFIG_USER_ONLY
> -/* in deterministic execution mode, instructions doing device I/Os
> +/*
> + * In deterministic execution mode, instructions doing device I/Os
>   * must be at the end of the TB.
>   *
>   * Called by softmmu_template.h, with iothread mutex not held.
> @@ -2430,19 +2431,18 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t 
> retaddr)
>          n = 2;
>      }
>  
> -    /* Generate a new TB executing the I/O insn.  */
> -    cpu->cflags_next_tb = curr_cflags() | CF_LAST_IO | n;
> +    /*
> +     * Exit the loop and potentially generate a new TB executing the
> +     * just the I/O insns. We also limit instrumentation to memory
> +     * operations only (which execute after completion) so we don't
> +     * double instrument the instruction.
> +     */
> +    cpu->cflags_next_tb = curr_cflags() | CF_MEMI_ONLY | CF_LAST_IO | n;
>  
>      qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
>                             "cpu_io_recompile: rewound execution of TB to "
>                             TARGET_FMT_lx "\n", tb->pc);
>  
> -    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
> -     * the first in the TB) then we end up generating a whole new TB and
> -     *  repeating the fault, which is horribly inefficient.
> -     *  Better would be to execute just this insn uncached, or generate a
> -     *  second new TB.
> -     */
>      cpu_loop_exit_noexc(cpu);
>  }
>  
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index a49a794065..2dfc27102f 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -58,7 +58,8 @@ void translator_loop(const TranslatorOps *ops, 
> DisasContextBase *db,
>      ops->tb_start(db, cpu);
>      tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
>  
> -    plugin_enabled = plugin_gen_tb_start(cpu, tb);
> +    plugin_enabled = plugin_gen_tb_start(cpu, tb,
> +                                         tb_cflags(db->tb) & CF_MEMI_ONLY);
>  
>      while (true) {
>          db->num_insns++;
> @@ -100,6 +101,8 @@ void translator_loop(const TranslatorOps *ops, 
> DisasContextBase *db,
>              gen_io_start();
>              ops->translate_insn(db, cpu);
>          } else {
> +            /* we should only see CF_MEMI_ONLY for io_recompile */
> +            tcg_debug_assert(!(tb_cflags(db->tb) & CF_MEMI_ONLY));
>              ops->translate_insn(db, cpu);
>          }
>  
> diff --git a/plugins/api.c b/plugins/api.c
> index 5dc8e6f934..0b04380d57 100644
> --- a/plugins/api.c
> +++ b/plugins/api.c
> @@ -84,15 +84,19 @@ void qemu_plugin_register_vcpu_tb_exec_cb(struct 
> qemu_plugin_tb *tb,
>                                            enum qemu_plugin_cb_flags flags,
>                                            void *udata)
>  {
> -    plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
> -                                  cb, flags, udata);
> +    if (!tb->mem_only) {
> +        plugin_register_dyn_cb__udata(&tb->cbs[PLUGIN_CB_REGULAR],
> +                                      cb, flags, udata);
> +    }
>  }
>  
>  void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
>                                                enum qemu_plugin_op op,
>                                                void *ptr, uint64_t imm)
>  {
> -    plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
> +    if (!tb->mem_only) {
> +        plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, 
> imm);
> +    }
>  }
>  
>  void qemu_plugin_register_vcpu_insn_exec_cb(struct qemu_plugin_insn *insn,
> @@ -100,20 +104,27 @@ void qemu_plugin_register_vcpu_insn_exec_cb(struct 
> qemu_plugin_insn *insn,
>                                              enum qemu_plugin_cb_flags flags,
>                                              void *udata)
>  {
> -    
> plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
> -        cb, flags, udata);
> +    if (!insn->mem_only) {
> +        
> plugin_register_dyn_cb__udata(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_REGULAR],
> +                                      cb, flags, udata);
> +    }
>  }
>  
>  void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn 
> *insn,
>                                                  enum qemu_plugin_op op,
>                                                  void *ptr, uint64_t imm)
>  {
> -    plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
> -                              0, op, ptr, imm);
> +    if (!insn->mem_only) {
> +        
> plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
> +                                  0, op, ptr, imm);
> +    }
>  }
>  
>  
> -
> +/*
> + * We always plant memory instrumentation because they don't finalise until
> + * after the operation has complete.
> + */
>  void qemu_plugin_register_vcpu_mem_cb(struct qemu_plugin_insn *insn,
>                                        qemu_plugin_vcpu_mem_cb_t cb,
>                                        enum qemu_plugin_cb_flags flags,
> @@ -121,7 +132,7 @@ void qemu_plugin_register_vcpu_mem_cb(struct 
> qemu_plugin_insn *insn,
>                                        void *udata)
>  {
>      plugin_register_vcpu_mem_cb(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_REGULAR],
> -                                cb, flags, rw, udata);
> +                                    cb, flags, rw, udata);
>  }
>  
>  void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
> @@ -130,7 +141,7 @@ void qemu_plugin_register_vcpu_mem_inline(struct 
> qemu_plugin_insn *insn,
>                                            uint64_t imm)
>  {
>      plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
> -        rw, op, ptr, imm);
> +                              rw, op, ptr, imm);
>  }
>  
>  void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
> @@ -181,10 +192,13 @@ uint64_t qemu_plugin_tb_vaddr(const struct 
> qemu_plugin_tb *tb)
>  struct qemu_plugin_insn *
>  qemu_plugin_tb_get_insn(const struct qemu_plugin_tb *tb, size_t idx)
>  {
> +    struct qemu_plugin_insn *insn;
>      if (unlikely(idx >= tb->n)) {
>          return NULL;
>      }
> -    return g_ptr_array_index(tb->insns, idx);
> +    insn = g_ptr_array_index(tb->insns, idx);
> +    insn->mem_only = tb->mem_only;
> +    return insn;
>  }
>  
>  /*
> -- 
> 2.20.1
> 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]